All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] Another approach to large transactions
@ 2015-04-16 23:17 Stefan Beller
  2015-04-16 23:17 ` [PATCH 1/3] refs.c: remove lock_fd from struct ref_lock Stefan Beller
                   ` (3 more replies)
  0 siblings, 4 replies; 28+ messages in thread
From: Stefan Beller @ 2015-04-16 23:17 UTC (permalink / raw)
  To: gitster, mhagger; +Cc: git, Stefan Beller

* We keep the speed on small transactions 
  (no close and reopen of fds in small transactions)

* No refactoring for refs included, only minimally invasive to the refs.c code

* applies on top of origin/sb/remove-fd-from-ref-lock replacing the last
  commit there (I reworded the commit message of the last patch of that tip,
  being the first patch in this series)
  
* another approach would be to move the fd counting into the lock file api,
  I think that's not worth it for now.


Stefan Beller (3):
  refs.c: remove lock_fd from struct ref_lock
  Move unsigned int get_max_fd_limit(void) to git_compat_util.h
  refs.c: enable large transactions

 git-compat-util.h     |  1 +
 refs.c                | 28 ++++++++++++++++++----------
 sha1_file.c           | 41 -----------------------------------------
 t/t1400-update-ref.sh |  4 ++--
 wrapper.c             | 41 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 62 insertions(+), 53 deletions(-)

-- 
2.3.0.81.gc37f363

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 1/3] refs.c: remove lock_fd from struct ref_lock
  2015-04-16 23:17 [PATCH 0/3] Another approach to large transactions Stefan Beller
@ 2015-04-16 23:17 ` Stefan Beller
  2015-04-16 23:17 ` [PATCH 2/3] Move get_max_fd_limit(void) to git_compat_util.h Stefan Beller
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 28+ messages in thread
From: Stefan Beller @ 2015-04-16 23:17 UTC (permalink / raw)
  To: gitster, mhagger; +Cc: git, Stefan Beller

The 'lock_fd' is the same as 'lk->fd'. No need to store it twice so remove
it.

No functional changes intended.

Signed-off-by: Stefan Beller <sbeller@google.com>
Reviewed-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 refs.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/refs.c b/refs.c
index 47e4e53..4f495bd 100644
--- a/refs.c
+++ b/refs.c
@@ -11,7 +11,6 @@ struct ref_lock {
 	char *orig_ref_name;
 	struct lock_file *lk;
 	unsigned char old_sha1[20];
-	int lock_fd;
 };
 
 /*
@@ -2284,7 +2283,6 @@ static struct ref_lock *lock_ref_sha1_basic(const char *refname,
 	int attempts_remaining = 3;
 
 	lock = xcalloc(1, sizeof(struct ref_lock));
-	lock->lock_fd = -1;
 
 	if (mustexist)
 		resolve_flags |= RESOLVE_REF_READING;
@@ -2356,8 +2354,7 @@ static struct ref_lock *lock_ref_sha1_basic(const char *refname,
 		goto error_return;
 	}
 
-	lock->lock_fd = hold_lock_file_for_update(lock->lk, ref_file, lflags);
-	if (lock->lock_fd < 0) {
+	if (hold_lock_file_for_update(lock->lk, ref_file, lflags) < 0) {
 		last_errno = errno;
 		if (errno == ENOENT && --attempts_remaining > 0)
 			/*
@@ -2868,7 +2865,6 @@ static int close_ref(struct ref_lock *lock)
 {
 	if (close_lock_file(lock->lk))
 		return -1;
-	lock->lock_fd = -1;
 	return 0;
 }
 
@@ -2876,7 +2872,6 @@ static int commit_ref(struct ref_lock *lock)
 {
 	if (commit_lock_file(lock->lk))
 		return -1;
-	lock->lock_fd = -1;
 	return 0;
 }
 
@@ -3046,8 +3041,8 @@ static int write_ref_sha1(struct ref_lock *lock,
 		errno = EINVAL;
 		return -1;
 	}
-	if (write_in_full(lock->lock_fd, sha1_to_hex(sha1), 40) != 40 ||
-	    write_in_full(lock->lock_fd, &term, 1) != 1 ||
+	if (write_in_full(lock->lk->fd, sha1_to_hex(sha1), 40) != 40 ||
+	    write_in_full(lock->lk->fd, &term, 1) != 1 ||
 	    close_ref(lock) < 0) {
 		int save_errno = errno;
 		error("Couldn't write %s", lock->lk->filename.buf);
@@ -4084,9 +4079,9 @@ int reflog_expire(const char *refname, const unsigned char *sha1,
 			status |= error("couldn't write %s: %s", log_file,
 					strerror(errno));
 		} else if (update &&
-			(write_in_full(lock->lock_fd,
+			   (write_in_full(lock->lk->fd,
 				sha1_to_hex(cb.last_kept_sha1), 40) != 40 ||
-			 write_str_in_full(lock->lock_fd, "\n") != 1 ||
+			 write_str_in_full(lock->lk->fd, "\n") != 1 ||
 			 close_ref(lock) < 0)) {
 			status |= error("couldn't write %s",
 					lock->lk->filename.buf);
-- 
2.3.0.81.gc37f363

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 2/3] Move get_max_fd_limit(void) to git_compat_util.h
  2015-04-16 23:17 [PATCH 0/3] Another approach to large transactions Stefan Beller
  2015-04-16 23:17 ` [PATCH 1/3] refs.c: remove lock_fd from struct ref_lock Stefan Beller
@ 2015-04-16 23:17 ` Stefan Beller
  2015-04-16 23:17 ` [PATCH 3/3] refs.c: enable large transactions Stefan Beller
  2015-04-17 17:09 ` [PATCH 0/3] Another approach to " Junio C Hamano
  3 siblings, 0 replies; 28+ messages in thread
From: Stefan Beller @ 2015-04-16 23:17 UTC (permalink / raw)
  To: gitster, mhagger; +Cc: git, Stefan Beller

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 git-compat-util.h |  1 +
 sha1_file.c       | 41 -----------------------------------------
 wrapper.c         | 41 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 42 insertions(+), 41 deletions(-)

diff --git a/git-compat-util.h b/git-compat-util.h
index bc8fc8c..2c55ca7 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -675,6 +675,7 @@ extern int xmkstemp_mode(char *template, int mode);
 extern int odb_mkstemp(char *template, size_t limit, const char *pattern);
 extern int odb_pack_keep(char *name, size_t namesz, const unsigned char *sha1);
 extern char *xgetcwd(void);
+extern unsigned int get_max_fd_limit(void);
 
 #define REALLOC_ARRAY(x, alloc) (x) = xrealloc((x), (alloc) * sizeof(*(x)))
 
diff --git a/sha1_file.c b/sha1_file.c
index 88f06ba..1f2519c 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -870,47 +870,6 @@ void free_pack_by_name(const char *pack_name)
 	}
 }
 
-static unsigned int get_max_fd_limit(void)
-{
-#ifdef RLIMIT_NOFILE
-	{
-		struct rlimit lim;
-
-		if (!getrlimit(RLIMIT_NOFILE, &lim))
-			return lim.rlim_cur;
-	}
-#endif
-
-#ifdef _SC_OPEN_MAX
-	{
-		long open_max = sysconf(_SC_OPEN_MAX);
-		if (0 < open_max)
-			return open_max;
-		/*
-		 * Otherwise, we got -1 for one of the two
-		 * reasons:
-		 *
-		 * (1) sysconf() did not understand _SC_OPEN_MAX
-		 *     and signaled an error with -1; or
-		 * (2) sysconf() said there is no limit.
-		 *
-		 * We _could_ clear errno before calling sysconf() to
-		 * tell these two cases apart and return a huge number
-		 * in the latter case to let the caller cap it to a
-		 * value that is not so selfish, but letting the
-		 * fallback OPEN_MAX codepath take care of these cases
-		 * is a lot simpler.
-		 */
-	}
-#endif
-
-#ifdef OPEN_MAX
-	return OPEN_MAX;
-#else
-	return 1; /* see the caller ;-) */
-#endif
-}
-
 /*
  * Do not call this directly as this leaks p->pack_fd on error return;
  * call open_packed_git() instead.
diff --git a/wrapper.c b/wrapper.c
index d5a6cef..493bf6f 100644
--- a/wrapper.c
+++ b/wrapper.c
@@ -303,6 +303,47 @@ ssize_t pread_in_full(int fd, void *buf, size_t count, off_t offset)
 	return total;
 }
 
+unsigned int get_max_fd_limit(void)
+{
+#ifdef RLIMIT_NOFILE
+	{
+		struct rlimit lim;
+
+		if (!getrlimit(RLIMIT_NOFILE, &lim))
+			return lim.rlim_cur;
+	}
+#endif
+
+#ifdef _SC_OPEN_MAX
+	{
+		long open_max = sysconf(_SC_OPEN_MAX);
+		if (0 < open_max)
+			return open_max;
+		/*
+		 * Otherwise, we got -1 for one of the two
+		 * reasons:
+		 *
+		 * (1) sysconf() did not understand _SC_OPEN_MAX
+		 *     and signaled an error with -1; or
+		 * (2) sysconf() said there is no limit.
+		 *
+		 * We _could_ clear errno before calling sysconf() to
+		 * tell these two cases apart and return a huge number
+		 * in the latter case to let the caller cap it to a
+		 * value that is not so selfish, but letting the
+		 * fallback OPEN_MAX codepath take care of these cases
+		 * is a lot simpler.
+		 */
+	}
+#endif
+
+#ifdef OPEN_MAX
+	return OPEN_MAX;
+#else
+	return 1; /* see the caller ;-) */
+#endif
+}
+
 int xdup(int fd)
 {
 	int ret = dup(fd);
-- 
2.3.0.81.gc37f363

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 3/3] refs.c: enable large transactions
  2015-04-16 23:17 [PATCH 0/3] Another approach to large transactions Stefan Beller
  2015-04-16 23:17 ` [PATCH 1/3] refs.c: remove lock_fd from struct ref_lock Stefan Beller
  2015-04-16 23:17 ` [PATCH 2/3] Move get_max_fd_limit(void) to git_compat_util.h Stefan Beller
@ 2015-04-16 23:17 ` Stefan Beller
  2015-04-17 17:09 ` [PATCH 0/3] Another approach to " Junio C Hamano
  3 siblings, 0 replies; 28+ messages in thread
From: Stefan Beller @ 2015-04-16 23:17 UTC (permalink / raw)
  To: gitster, mhagger; +Cc: git, Stefan Beller

This is another attempt on enabling large transactions
(large in terms of open file descriptors). We keep track of how many
lock files are opened by the ref_transaction_commit function.
When more than a reasonable amount of files is open, we close
the file descriptors to make sure the transaction can continue.

Another idea I had during implementing this was to move this file
closing into the lock file API, such that only a certain amount of
lock files can be open at any given point in time and we'd be 'garbage
collecting' open fds when necessary in any relevant call to the lock
file API. This would have brought the advantage of having such
functionality available in other users of the lock file API as well.
The downside however is the over complication, you really need to always
check for (lock->fd != -1) all the time, which may slow down other parts
of the code, which did not ask for such a feature.

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 refs.c                | 13 +++++++++++++
 t/t1400-update-ref.sh |  4 ++--
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/refs.c b/refs.c
index 4f495bd..44e0b15 100644
--- a/refs.c
+++ b/refs.c
@@ -3041,6 +3041,8 @@ static int write_ref_sha1(struct ref_lock *lock,
 		errno = EINVAL;
 		return -1;
 	}
+	if (lock->lk->fd == -1)
+		reopen_lock_file(lock->lk);
 	if (write_in_full(lock->lk->fd, sha1_to_hex(sha1), 40) != 40 ||
 	    write_in_full(lock->lk->fd, &term, 1) != 1 ||
 	    close_ref(lock) < 0) {
@@ -3719,6 +3721,12 @@ int ref_transaction_commit(struct ref_transaction *transaction,
 {
 	int ret = 0, i;
 	int n = transaction->nr;
+	/*
+	 * We may want to open many files in a large transaction, so come up with
+	 * a reasonable maximum, keep some spares for stdin/out and other open
+	 * files.
+	 */
+	int remaining_fds = get_max_fd_limit() - 8;
 	struct ref_update **updates = transaction->updates;
 	struct string_list refs_to_delete = STRING_LIST_INIT_NODUP;
 	struct string_list_item *ref_to_delete;
@@ -3762,6 +3770,11 @@ int ref_transaction_commit(struct ref_transaction *transaction,
 				    update->refname);
 			goto cleanup;
 		}
+		if (remaining_fds > 0) {
+			remaining_fds--;
+		} else {
+			close_lock_file(update->lock->lk);
+		}
 	}
 
 	/* Perform updates first so live commits remain referenced */
diff --git a/t/t1400-update-ref.sh b/t/t1400-update-ref.sh
index 7a69f1a..636d3a1 100755
--- a/t/t1400-update-ref.sh
+++ b/t/t1400-update-ref.sh
@@ -1071,7 +1071,7 @@ run_with_limited_open_files () {
 
 test_lazy_prereq ULIMIT_FILE_DESCRIPTORS 'run_with_limited_open_files true'
 
-test_expect_failure ULIMIT_FILE_DESCRIPTORS 'large transaction creating branches does not burst open file limit' '
+test_expect_success ULIMIT_FILE_DESCRIPTORS 'large transaction creating branches does not burst open file limit' '
 (
 	for i in $(test_seq 33)
 	do
@@ -1082,7 +1082,7 @@ test_expect_failure ULIMIT_FILE_DESCRIPTORS 'large transaction creating branches
 )
 '
 
-test_expect_failure ULIMIT_FILE_DESCRIPTORS 'large transaction deleting branches does not burst open file limit' '
+test_expect_success ULIMIT_FILE_DESCRIPTORS 'large transaction deleting branches does not burst open file limit' '
 (
 	for i in $(test_seq 33)
 	do
-- 
2.3.0.81.gc37f363

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH 0/3] Another approach to large transactions
  2015-04-16 23:17 [PATCH 0/3] Another approach to large transactions Stefan Beller
                   ` (2 preceding siblings ...)
  2015-04-16 23:17 ` [PATCH 3/3] refs.c: enable large transactions Stefan Beller
@ 2015-04-17 17:09 ` Junio C Hamano
  2015-04-17 22:12   ` Junio C Hamano
  3 siblings, 1 reply; 28+ messages in thread
From: Junio C Hamano @ 2015-04-17 17:09 UTC (permalink / raw)
  To: Stefan Beller; +Cc: mhagger, git

Stefan Beller <sbeller@google.com> writes:

> * We keep the speed on small transactions 
>   (no close and reopen of fds in small transactions)
>
> * No refactoring for refs included, only minimally invasive to the refs.c code
>
> * applies on top of origin/sb/remove-fd-from-ref-lock replacing the last
>   commit there (I reworded the commit message of the last patch of that tip,
>   being the first patch in this series)
>   
> * another approach would be to move the fd counting into the lock file api,
>   I think that's not worth it for now.

I agree that it is a good direction to go to limit the number of
open file descriptors.  Overall it looked good to me.


>
>
> Stefan Beller (3):
>   refs.c: remove lock_fd from struct ref_lock
>   Move unsigned int get_max_fd_limit(void) to git_compat_util.h
>   refs.c: enable large transactions
>
>  git-compat-util.h     |  1 +
>  refs.c                | 28 ++++++++++++++++++----------
>  sha1_file.c           | 41 -----------------------------------------
>  t/t1400-update-ref.sh |  4 ++--
>  wrapper.c             | 41 +++++++++++++++++++++++++++++++++++++++++
>  5 files changed, 62 insertions(+), 53 deletions(-)

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 0/3] Another approach to large transactions
  2015-04-17 17:09 ` [PATCH 0/3] Another approach to " Junio C Hamano
@ 2015-04-17 22:12   ` Junio C Hamano
  2015-04-17 22:17     ` Stefan Beller
  0 siblings, 1 reply; 28+ messages in thread
From: Junio C Hamano @ 2015-04-17 22:12 UTC (permalink / raw)
  To: Stefan Beller; +Cc: mhagger, git

Junio C Hamano <gitster@pobox.com> writes:

> Stefan Beller <sbeller@google.com> writes:
>
>> * We keep the speed on small transactions 
>>   (no close and reopen of fds in small transactions)
>>
>> * No refactoring for refs included, only minimally invasive to the refs.c code
>>
>> * applies on top of origin/sb/remove-fd-from-ref-lock replacing the last
>>   commit there (I reworded the commit message of the last patch of that tip,
>>   being the first patch in this series)
>>   
>> * another approach would be to move the fd counting into the lock file api,
>>   I think that's not worth it for now.
>
> I agree that it is a good direction to go to limit the number of
> open file descriptors.  Overall it looked good to me.

This is now pushed out and sitting at the tip of 'pu'.  It seems to
break one of the tests in 1400 when merged to 'next', but I didn't
look it closely.

Thanks.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 0/3] Another approach to large transactions
  2015-04-17 22:12   ` Junio C Hamano
@ 2015-04-17 22:17     ` Stefan Beller
  2015-04-17 23:31       ` Stefan Beller
  0 siblings, 1 reply; 28+ messages in thread
From: Stefan Beller @ 2015-04-17 22:17 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Michael Haggerty, git

On Fri, Apr 17, 2015 at 3:12 PM, Junio C Hamano <gitster@pobox.com> wrote:
>
> This is now pushed out and sitting at the tip of 'pu'.  It seems to
> break one of the tests in 1400 when merged to 'next', but I didn't
> look it closely.
>
> Thanks.

ok, I'll look more closely.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 0/3] Another approach to large transactions
  2015-04-17 22:17     ` Stefan Beller
@ 2015-04-17 23:31       ` Stefan Beller
  2015-04-20 22:26         ` Stefan Beller
  0 siblings, 1 reply; 28+ messages in thread
From: Stefan Beller @ 2015-04-17 23:31 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Michael Haggerty, git

On Fri, Apr 17, 2015 at 3:17 PM, Stefan Beller <sbeller@google.com> wrote:
> On Fri, Apr 17, 2015 at 3:12 PM, Junio C Hamano <gitster@pobox.com> wrote:
>>
>> This is now pushed out and sitting at the tip of 'pu'.  It seems to
>> break one of the tests in 1400 when merged to 'next', but I didn't
>> look it closely.
>>
>> Thanks.
>
> ok, I'll look more closely.

Apparently I screwed up even before sending the patches over the wire.

  not ok 144 - large transaction deleting branches does not burst open
file limit

fails in my local branch as well as origin/pu as well on
origin/sb/remove-fd-from-ref-lock

So there is a pretty strong argument, the code is only improving
large transaction creating branches and not deleting branches.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 0/3] Another approach to large transactions
  2015-04-17 23:31       ` Stefan Beller
@ 2015-04-20 22:26         ` Stefan Beller
  2015-04-20 22:51           ` Junio C Hamano
  0 siblings, 1 reply; 28+ messages in thread
From: Stefan Beller @ 2015-04-20 22:26 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Michael Haggerty, git

On Fri, Apr 17, 2015 at 4:31 PM, Stefan Beller <sbeller@google.com> wrote:
> On Fri, Apr 17, 2015 at 3:17 PM, Stefan Beller <sbeller@google.com> wrote:
>> On Fri, Apr 17, 2015 at 3:12 PM, Junio C Hamano <gitster@pobox.com> wrote:
>>>
>>> This is now pushed out and sitting at the tip of 'pu'.  It seems to
>>> break one of the tests in 1400 when merged to 'next', but I didn't
>>> look it closely.
>>>
>>> Thanks.
>>
>> ok, I'll look more closely.
>
> Apparently I screwed up even before sending the patches over the wire.

For the deleting refs test failing:
The problem comes from guessing the number of fds we're allowed to use.
At first I thought it was a fundamental issue with the code being broken, but
it turns out we just need a larger offset as we apparently have 9 files open
already, before the transaction even starts.
I did not expect the number to be that high, which is why I came up with the
arbitrary number of 8 (3 for stdin/out/err, maybe packed refs and reflog so I
guessed, 8 would do fine).

I am not sure if the 9 is a constant or if it scales to some unknown
property yet.
So to make the series work, all we need is:

- int remaining_fds = get_max_fd_limit() - 8;
+ int remaining_fds = get_max_fd_limit() - 9;

I am going to try to understand where the 9 comes from and resend the patches.

Thanks,
Stefan

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 0/3] Another approach to large transactions
  2015-04-20 22:26         ` Stefan Beller
@ 2015-04-20 22:51           ` Junio C Hamano
  2015-04-20 23:07             ` Stefan Beller
  2015-04-21 12:37             ` Michael Haggerty
  0 siblings, 2 replies; 28+ messages in thread
From: Junio C Hamano @ 2015-04-20 22:51 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Michael Haggerty, git

Stefan Beller <sbeller@google.com> writes:

> The problem comes from guessing the number of fds we're allowed to use.
> At first I thought it was a fundamental issue with the code being broken, but
> it turns out we just need a larger offset as we apparently have 9 files open
> already, before the transaction even starts.
> I did not expect the number to be that high, which is why I came up with the
> arbitrary number of 8 (3 for stdin/out/err, maybe packed refs and reflog so I
> guessed, 8 would do fine).
>
> I am not sure if the 9 is a constant or if it scales to some unknown
> property yet.
> So to make the series work, all we need is:
>
> - int remaining_fds = get_max_fd_limit() - 8;
> + int remaining_fds = get_max_fd_limit() - 9;
>
> I am going to try to understand where the 9 comes from and resend the patches.

I have a suspicion that the above is an indication that the approach
is fundamentally not sound.  9 may be OK in your test repository,
but that may fail in a repository with different resource usage
patterns.

On the core management side, xmalloc() and friends retry upon
failure, after attempting to free the resource.  I wonder if your
codepath can do something similar to that, perhaps?

On the other hand, it may be that this "let's keep it open as long
as possible, as creat-close-open-write-close is more expensive" may
not be worth the complexity.  I wonder if it might not be a bad idea
to start with a simpler rule, e.g. "use creat-write-close for ref
updates outside transactions, and creat-close-open-write-close for
inside transactions, as that is likely to be multi-ref updates" or
something stupid and simple like that?

Michael?

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 0/3] Another approach to large transactions
  2015-04-20 22:51           ` Junio C Hamano
@ 2015-04-20 23:07             ` Stefan Beller
  2015-04-21  0:31               ` Stefan Beller
  2015-04-21 17:19               ` Junio C Hamano
  2015-04-21 12:37             ` Michael Haggerty
  1 sibling, 2 replies; 28+ messages in thread
From: Stefan Beller @ 2015-04-20 23:07 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Michael Haggerty, git

On Mon, Apr 20, 2015 at 3:51 PM, Junio C Hamano <gitster@pobox.com> wrote:
> Stefan Beller <sbeller@google.com> writes:
>
>> The problem comes from guessing the number of fds we're allowed to use.
>> At first I thought it was a fundamental issue with the code being broken, but
>> it turns out we just need a larger offset as we apparently have 9 files open
>> already, before the transaction even starts.
>> I did not expect the number to be that high, which is why I came up with the
>> arbitrary number of 8 (3 for stdin/out/err, maybe packed refs and reflog so I
>> guessed, 8 would do fine).
>>
>> I am not sure if the 9 is a constant or if it scales to some unknown
>> property yet.
>> So to make the series work, all we need is:
>>
>> - int remaining_fds = get_max_fd_limit() - 8;
>> + int remaining_fds = get_max_fd_limit() - 9;
>>
>> I am going to try to understand where the 9 comes from and resend the patches.
>
> I have a suspicion that the above is an indication that the approach
> is fundamentally not sound.  9 may be OK in your test repository,
> but that may fail in a repository with different resource usage
> patterns.

You put my concerns in a better wording.

>
> On the core management side, xmalloc() and friends retry upon
> failure, after attempting to free the resource.  I wonder if your
> codepath can do something similar to that, perhaps?

But then we'd need to think about which fds can be 'garbage collected'.
The lock files certainly can be closed and reopened. The first 3 fd not so.
So we'd need to maintain a data structure of file descriptors good/bad
for this reclaiming.

>
> On the other hand, it may be that this "let's keep it open as long
> as possible, as creat-close-open-write-close is more expensive" may
> not be worth the complexity.  I wonder if it might not be a bad idea
> to start with a simpler rule, e.g. "use creat-write-close for ref
> updates outside transactions, and creat-close-open-write-close for
> inside transactions, as that is likely to be multi-ref updates" or
> something stupid and simple like that?

I thought about any ref about goes through transaction nowadays.
Having the current patches the first n locks are creat-write-close,
while the remaining locks have the  creat-close-open-write-close
pattern, so it slows only the large transactions.

My plan is to strace all open calls and check if the aforementioned
9 open files are just a constant.

>
> Michael?
>
>

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 0/3] Another approach to large transactions
  2015-04-20 23:07             ` Stefan Beller
@ 2015-04-21  0:31               ` Stefan Beller
  2015-04-21  0:35                 ` [PATCH] refs.c: enable " Stefan Beller
  2015-04-21 23:21                 ` [PATCH 0/3] Another approach to " Jeff King
  2015-04-21 17:19               ` Junio C Hamano
  1 sibling, 2 replies; 28+ messages in thread
From: Stefan Beller @ 2015-04-21  0:31 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Michael Haggerty, git

On Mon, Apr 20, 2015 at 4:07 PM, Stefan Beller <sbeller@google.com> wrote:
> On Mon, Apr 20, 2015 at 3:51 PM, Junio C Hamano <gitster@pobox.com> wrote:
>> Stefan Beller <sbeller@google.com> writes:
>>
>>> The problem comes from guessing the number of fds we're allowed to use.
>>> At first I thought it was a fundamental issue with the code being broken, but
>>> it turns out we just need a larger offset as we apparently have 9 files open
>>> already, before the transaction even starts.
>>> I did not expect the number to be that high, which is why I came up with the
>>> arbitrary number of 8 (3 for stdin/out/err, maybe packed refs and reflog so I
>>> guessed, 8 would do fine).
>>>
>>> I am not sure if the 9 is a constant or if it scales to some unknown
>>> property yet.
>>> So to make the series work, all we need is:
>>>
>>> - int remaining_fds = get_max_fd_limit() - 8;
>>> + int remaining_fds = get_max_fd_limit() - 9;
>>>
>>> I am going to try to understand where the 9 comes from and resend the patches.
>>
>> I have a suspicion that the above is an indication that the approach
>> is fundamentally not sound.  9 may be OK in your test repository,
>> but that may fail in a repository with different resource usage
>> patterns.
>
> You put my concerns in a better wording.
>
>>
>> On the core management side, xmalloc() and friends retry upon
>> failure, after attempting to free the resource.  I wonder if your
>> codepath can do something similar to that, perhaps?
>
> But then we'd need to think about which fds can be 'garbage collected'.
> The lock files certainly can be closed and reopened. The first 3 fd not so.
> So we'd need to maintain a data structure of file descriptors good/bad
> for this reclaiming.
>
>>
>> On the other hand, it may be that this "let's keep it open as long
>> as possible, as creat-close-open-write-close is more expensive" may
>> not be worth the complexity.  I wonder if it might not be a bad idea
>> to start with a simpler rule, e.g. "use creat-write-close for ref
>> updates outside transactions, and creat-close-open-write-close for
>> inside transactions, as that is likely to be multi-ref updates" or
>> something stupid and simple like that?
>
> I thought about any ref about goes through transaction nowadays.
> Having the current patches the first n locks are creat-write-close,
> while the remaining locks have the  creat-close-open-write-close
> pattern, so it slows only the large transactions.
>
> My plan is to strace all open calls and check if the aforementioned
> 9 open files are just a constant.

When running the test locally, i.e. not in the test suite, but typing
the commands
myself into the shell, Git is fine with having just 5 file descriptors left.
The additional 4 required fds come from beign run inside the test suite.

When strace-ing git, I cannot see any possible other fds which would require
having some left over space required. So I'd propose we'd just take a reasonable
number not too small for various test setups like 32 and then go with the
proposed patches.

I'll just resend the patches to have a new basis for discussion.

>
>>
>> Michael?
>>
>>

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH] refs.c: enable large transactions
  2015-04-21  0:31               ` Stefan Beller
@ 2015-04-21  0:35                 ` Stefan Beller
  2015-04-21 17:16                   ` Junio C Hamano
  2015-04-21 17:22                   ` [PATCH] " Junio C Hamano
  2015-04-21 23:21                 ` [PATCH 0/3] Another approach to " Jeff King
  1 sibling, 2 replies; 28+ messages in thread
From: Stefan Beller @ 2015-04-21  0:35 UTC (permalink / raw)
  To: gitster, mhagger, git; +Cc: Stefan Beller

This is another attempt on enabling large transactions
(large in terms of open file descriptors). We keep track of how many
lock files are opened by the ref_transaction_commit function.
When more than a reasonable amount of files is open, we close
the file descriptors to make sure the transaction can continue.

Another idea I had during implementing this was to move this file
closing into the lock file API, such that only a certain amount of
lock files can be open at any given point in time and we'd be 'garbage
collecting' open fds when necessary in any relevant call to the lock
file API. This would have brought the advantage of having such
functionality available in other users of the lock file API as well.
The downside however is the over complication, you really need to always
check for (lock->fd != -1) all the time, which may slow down other parts
of the code, which did not ask for such a feature.

Signed-off-by: Stefan Beller <sbeller@google.com>
---

This replaces the latest patch on origin/sb/remove-fd-from-ref-lock
The test suite passes now

 refs.c                | 13 +++++++++++++
 t/t1400-update-ref.sh |  4 ++--
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/refs.c b/refs.c
index 4f495bd..1e8cb16 100644
--- a/refs.c
+++ b/refs.c
@@ -3041,6 +3041,8 @@ static int write_ref_sha1(struct ref_lock *lock,
 		errno = EINVAL;
 		return -1;
 	}
+	if (lock->lk->fd == -1)
+		reopen_lock_file(lock->lk);
 	if (write_in_full(lock->lk->fd, sha1_to_hex(sha1), 40) != 40 ||
 	    write_in_full(lock->lk->fd, &term, 1) != 1 ||
 	    close_ref(lock) < 0) {
@@ -3719,6 +3721,12 @@ int ref_transaction_commit(struct ref_transaction *transaction,
 {
 	int ret = 0, i;
 	int n = transaction->nr;
+	/*
+	 * We may want to open many files in a large transaction, so come up with
+	 * a reasonable maximum, keep some spares for stdin/out and other open
+	 * files.
+	 */
+	int remaining_fds = get_max_fd_limit() - 32;
 	struct ref_update **updates = transaction->updates;
 	struct string_list refs_to_delete = STRING_LIST_INIT_NODUP;
 	struct string_list_item *ref_to_delete;
@@ -3762,6 +3770,11 @@ int ref_transaction_commit(struct ref_transaction *transaction,
 				    update->refname);
 			goto cleanup;
 		}
+		if (remaining_fds > 0) {
+			remaining_fds--;
+		} else {
+			close_lock_file(update->lock->lk);
+		}
 	}
 
 	/* Perform updates first so live commits remain referenced */
diff --git a/t/t1400-update-ref.sh b/t/t1400-update-ref.sh
index 7a69f1a..636d3a1 100755
--- a/t/t1400-update-ref.sh
+++ b/t/t1400-update-ref.sh
@@ -1071,7 +1071,7 @@ run_with_limited_open_files () {
 
 test_lazy_prereq ULIMIT_FILE_DESCRIPTORS 'run_with_limited_open_files true'
 
-test_expect_failure ULIMIT_FILE_DESCRIPTORS 'large transaction creating branches does not burst open file limit' '
+test_expect_success ULIMIT_FILE_DESCRIPTORS 'large transaction creating branches does not burst open file limit' '
 (
 	for i in $(test_seq 33)
 	do
@@ -1082,7 +1082,7 @@ test_expect_failure ULIMIT_FILE_DESCRIPTORS 'large transaction creating branches
 )
 '
 
-test_expect_failure ULIMIT_FILE_DESCRIPTORS 'large transaction deleting branches does not burst open file limit' '
+test_expect_success ULIMIT_FILE_DESCRIPTORS 'large transaction deleting branches does not burst open file limit' '
 (
 	for i in $(test_seq 33)
 	do
-- 
2.4.0.rc2.5.g4c2045b.dirty

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH 0/3] Another approach to large transactions
  2015-04-20 22:51           ` Junio C Hamano
  2015-04-20 23:07             ` Stefan Beller
@ 2015-04-21 12:37             ` Michael Haggerty
  1 sibling, 0 replies; 28+ messages in thread
From: Michael Haggerty @ 2015-04-21 12:37 UTC (permalink / raw)
  To: Junio C Hamano, Stefan Beller; +Cc: git

On 04/21/2015 12:51 AM, Junio C Hamano wrote:
> Stefan Beller <sbeller@google.com> writes:
> 
>> The problem comes from guessing the number of fds we're allowed to use.
>> At first I thought it was a fundamental issue with the code being broken, but
>> it turns out we just need a larger offset as we apparently have 9 files open
>> already, before the transaction even starts.
>> I did not expect the number to be that high, which is why I came up with the
>> arbitrary number of 8 (3 for stdin/out/err, maybe packed refs and reflog so I
>> guessed, 8 would do fine).
>>
>> I am not sure if the 9 is a constant or if it scales to some unknown
>> property yet.
>> So to make the series work, all we need is:
>>
>> - int remaining_fds = get_max_fd_limit() - 8;
>> + int remaining_fds = get_max_fd_limit() - 9;
>>
>> I am going to try to understand where the 9 comes from and resend the patches.
> 
> I have a suspicion that the above is an indication that the approach
> is fundamentally not sound.  9 may be OK in your test repository,
> but that may fail in a repository with different resource usage
> patterns.
> 
> On the core management side, xmalloc() and friends retry upon
> failure, after attempting to free the resource.  I wonder if your
> codepath can do something similar to that, perhaps?
> 
> On the other hand, it may be that this "let's keep it open as long
> as possible, as creat-close-open-write-close is more expensive" may
> not be worth the complexity.  I wonder if it might not be a bad idea
> to start with a simpler rule, e.g. "use creat-write-close for ref
> updates outside transactions, and creat-close-open-write-close for
> inside transactions, as that is likely to be multi-ref updates" or
> something stupid and simple like that?
> 
> Michael?

Given that the release is so close, I think we should use the simplest
thing that could work, which I think is Stefan's original
N*(creat-close),N*(open-write-close),N*rename patch. I don't think there
are many code paths that might build up a big transaction anyway (I
guess only "git update-ref --stdin" and "git push --atomic"?) Neither of
these has been around very long, so I don't think the small performance
hit will bother anybody.

The correct solution is clearly N*(creat-write-close),N*rename, but that
is too complicated for this release. So let's get the bug fixed for the
release and try to get the better fix in the next release.

It would be possible to optimize the N=1 case (I guess it's by far the
most common case) really stupidly using something like

        if (n > 1)
                close_lock_file(update->lock->lk);

but I doubt even that's worth it.

Michael

-- 
Michael Haggerty
mhagger@alum.mit.edu

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] refs.c: enable large transactions
  2015-04-21  0:35                 ` [PATCH] refs.c: enable " Stefan Beller
@ 2015-04-21 17:16                   ` Junio C Hamano
  2015-04-21 17:24                     ` Stefan Beller
  2015-04-21 17:22                   ` [PATCH] " Junio C Hamano
  1 sibling, 1 reply; 28+ messages in thread
From: Junio C Hamano @ 2015-04-21 17:16 UTC (permalink / raw)
  To: Stefan Beller; +Cc: mhagger, git

Stefan Beller <sbeller@google.com> writes:

> +	/*
> +	 * We may want to open many files in a large transaction, so come up with
> +	 * a reasonable maximum, keep some spares for stdin/out and other open
> +	 * files.
> +	 */
> +	int remaining_fds = get_max_fd_limit() - 32;

Can this go negative?  If it does so, does it matter?  I think the
code doesn't barf, but starting from a negative "remaining" feels
cryptic, compared to starting from a zero "remaining".

>  	struct ref_update **updates = transaction->updates;
>  	struct string_list refs_to_delete = STRING_LIST_INIT_NODUP;
>  	struct string_list_item *ref_to_delete;
> @@ -3762,6 +3770,11 @@ int ref_transaction_commit(struct ref_transaction *transaction,
>  				    update->refname);
>  			goto cleanup;
>  		}
> +		if (remaining_fds > 0) {
> +			remaining_fds--;
> +		} else {
> +			close_lock_file(update->lock->lk);
> +		}

Any plan to add more code to these blocks in future updates?

Thanks.

> diff --git a/t/t1400-update-ref.sh b/t/t1400-update-ref.sh
> index 7a69f1a..636d3a1 100755
> --- a/t/t1400-update-ref.sh
> +++ b/t/t1400-update-ref.sh
> @@ -1071,7 +1071,7 @@ run_with_limited_open_files () {
>  
>  test_lazy_prereq ULIMIT_FILE_DESCRIPTORS 'run_with_limited_open_files true'
>  
> -test_expect_failure ULIMIT_FILE_DESCRIPTORS 'large transaction creating branches does not burst open file limit' '
> +test_expect_success ULIMIT_FILE_DESCRIPTORS 'large transaction creating branches does not burst open file limit' '
>  (
>  	for i in $(test_seq 33)
>  	do
> @@ -1082,7 +1082,7 @@ test_expect_failure ULIMIT_FILE_DESCRIPTORS 'large transaction creating branches
>  )
>  '
>  
> -test_expect_failure ULIMIT_FILE_DESCRIPTORS 'large transaction deleting branches does not burst open file limit' '
> +test_expect_success ULIMIT_FILE_DESCRIPTORS 'large transaction deleting branches does not burst open file limit' '
>  (
>  	for i in $(test_seq 33)
>  	do

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 0/3] Another approach to large transactions
  2015-04-20 23:07             ` Stefan Beller
  2015-04-21  0:31               ` Stefan Beller
@ 2015-04-21 17:19               ` Junio C Hamano
  2015-04-21 17:31                 ` Stefan Beller
  1 sibling, 1 reply; 28+ messages in thread
From: Junio C Hamano @ 2015-04-21 17:19 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Michael Haggerty, git

Stefan Beller <sbeller@google.com> writes:

> On Mon, Apr 20, 2015 at 3:51 PM, Junio C Hamano <gitster@pobox.com> wrote:
>
>> On the core management side, xmalloc() and friends retry upon
>> failure, after attempting to free the resource.  I wonder if your
>> codepath can do something similar to that, perhaps?
>
> But then we'd need to think about which fds can be 'garbage collected'.
> The lock files certainly can be closed and reopened.

... and that is the natural thing to garbage collect, no?  After
all, this approach allows lock-file subsystem to keep fds open even
when they do not absolutely have to, so they are the best candidates
to be shot down first when things gets tight.

A good thing is that you have a list of all them already for use by
the atexit handler ;-)

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] refs.c: enable large transactions
  2015-04-21  0:35                 ` [PATCH] refs.c: enable " Stefan Beller
  2015-04-21 17:16                   ` Junio C Hamano
@ 2015-04-21 17:22                   ` Junio C Hamano
  1 sibling, 0 replies; 28+ messages in thread
From: Junio C Hamano @ 2015-04-21 17:22 UTC (permalink / raw)
  To: Stefan Beller; +Cc: mhagger, git

Stefan Beller <sbeller@google.com> writes:

> This replaces the latest patch on origin/sb/remove-fd-from-ref-lock

Thanks, will replace.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] refs.c: enable large transactions
  2015-04-21 17:16                   ` Junio C Hamano
@ 2015-04-21 17:24                     ` Stefan Beller
  2015-04-21 18:00                       ` Junio C Hamano
  0 siblings, 1 reply; 28+ messages in thread
From: Stefan Beller @ 2015-04-21 17:24 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Michael Haggerty, git

On Tue, Apr 21, 2015 at 10:16 AM, Junio C Hamano <gitster@pobox.com> wrote:
> Stefan Beller <sbeller@google.com> writes:
>
>> +     /*
>> +      * We may want to open many files in a large transaction, so come up with
>> +      * a reasonable maximum, keep some spares for stdin/out and other open
>> +      * files.
>> +      */
>> +     int remaining_fds = get_max_fd_limit() - 32;
>
> Can this go negative?  If it does so, does it matter?  I think the

Yes it can go negative. It doesn't matter as we'd then run into the
"close_lock_file(update->lock->lk);" case below.

I thought about putting a cap on it to not let it go negative in the first
place, but I did not find an easily accessible max() function, as I'd like
to write it as

    int remaining_fds = max(get_max_fd_limit() - 32, 0);

to have it in one line. The alternative of

    int remaining_fds = get_max_fd_limit() - 32;
    ...
    if (remaining_fds < 0)
        remaining_fds = 0

seemed to cuttered to me. Yet another alternative

     int remaining_fds = get_max_fd_limit() - 32 < 0 ? 0 :
get_max_fd_limit() - 32;

is also not appealing as we'd have to call get_max_fd_limit 2 times.
That's why I thought going negative is not as bad, but have readable
code instead.

As you think about the possibility of remaining_fds being negative,
this might not
be the best idea though


> code doesn't barf, but starting from a negative "remaining" feels
> cryptic, compared to starting from a zero "remaining".
>
>>       struct ref_update **updates = transaction->updates;
>>       struct string_list refs_to_delete = STRING_LIST_INIT_NODUP;
>>       struct string_list_item *ref_to_delete;
>> @@ -3762,6 +3770,11 @@ int ref_transaction_commit(struct ref_transaction *transaction,
>>                                   update->refname);
>>                       goto cleanup;
>>               }
>> +             if (remaining_fds > 0) {
>> +                     remaining_fds--;
>> +             } else {
>> +                     close_lock_file(update->lock->lk);
>> +             }
>
> Any plan to add more code to these blocks in future updates?

I'll remove the braces. :(

>
> Thanks.
>
>> diff --git a/t/t1400-update-ref.sh b/t/t1400-update-ref.sh
>> index 7a69f1a..636d3a1 100755
>> --- a/t/t1400-update-ref.sh
>> +++ b/t/t1400-update-ref.sh
>> @@ -1071,7 +1071,7 @@ run_with_limited_open_files () {
>>
>>  test_lazy_prereq ULIMIT_FILE_DESCRIPTORS 'run_with_limited_open_files true'
>>
>> -test_expect_failure ULIMIT_FILE_DESCRIPTORS 'large transaction creating branches does not burst open file limit' '
>> +test_expect_success ULIMIT_FILE_DESCRIPTORS 'large transaction creating branches does not burst open file limit' '
>>  (
>>       for i in $(test_seq 33)
>>       do
>> @@ -1082,7 +1082,7 @@ test_expect_failure ULIMIT_FILE_DESCRIPTORS 'large transaction creating branches
>>  )
>>  '
>>
>> -test_expect_failure ULIMIT_FILE_DESCRIPTORS 'large transaction deleting branches does not burst open file limit' '
>> +test_expect_success ULIMIT_FILE_DESCRIPTORS 'large transaction deleting branches does not burst open file limit' '
>>  (
>>       for i in $(test_seq 33)
>>       do

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 0/3] Another approach to large transactions
  2015-04-21 17:19               ` Junio C Hamano
@ 2015-04-21 17:31                 ` Stefan Beller
  0 siblings, 0 replies; 28+ messages in thread
From: Stefan Beller @ 2015-04-21 17:31 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Michael Haggerty, git

On Tue, Apr 21, 2015 at 10:19 AM, Junio C Hamano <gitster@pobox.com> wrote:
> Stefan Beller <sbeller@google.com> writes:
>
>> On Mon, Apr 20, 2015 at 3:51 PM, Junio C Hamano <gitster@pobox.com> wrote:
>>
>>> On the core management side, xmalloc() and friends retry upon
>>> failure, after attempting to free the resource.  I wonder if your
>>> codepath can do something similar to that, perhaps?
>>
>> But then we'd need to think about which fds can be 'garbage collected'.
>> The lock files certainly can be closed and reopened.
>
> ... and that is the natural thing to garbage collect, no?  After
> all, this approach allows lock-file subsystem to keep fds open even
> when they do not absolutely have to, so they are the best candidates
> to be shot down first when things gets tight.
>
> A good thing is that you have a list of all them already for use by
> the atexit handler ;-)

Yes, but when such a garbage collection is in place, it is part of the API,
which means you need to check all places, where lock files are used,
so that you have the pattern

   open lock file
   ...
   if lock_file->lk == -1
       lock_file_reopen(...)
   use(lock_file->lk)

I think that could be easily integrated into the lock_file API when the API
users don't directly read the values of the lock file struct, but rather call
a method
    int lock_file_get_fd(lock);

which guarantees to return a valid fd, but that is not long term stable. You
can use the fd for the next action, but it may become garbage collected again.

So then you don't know how long the fd is valid as the garbage collection
may be either triggered from the lock file code or by yourself?

There is some uncertainty on when which data is valid, which is why I dislike
this approach and rather prefer to make it explicit. If we run into such
a problem in another place, we can still think about a general solution in the
lock file API.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] refs.c: enable large transactions
  2015-04-21 17:24                     ` Stefan Beller
@ 2015-04-21 18:00                       ` Junio C Hamano
  2015-04-21 19:06                         ` [PATCHv2] " Stefan Beller
  0 siblings, 1 reply; 28+ messages in thread
From: Junio C Hamano @ 2015-04-21 18:00 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Michael Haggerty, git

Stefan Beller <sbeller@google.com> writes:

> I thought about putting a cap on it to not let it go negative in the first
> place, but I did not find an easily accessible max() function, as I'd like
> to write it as
>
>     int remaining_fds = max(get_max_fd_limit() - 32, 0);
>
> to have it in one line. The alternative of
>
>     int remaining_fds = get_max_fd_limit() - 32;
>     ...
>     if (remaining_fds < 0)
>         remaining_fds = 0
>
> seemed to cuttered to me.

Just to set the standard yardstick straight, either is fine, but I
would say the latter is slightly preferrable from readability's
point of view.  Rows of dense single lines get tiring to read pretty
quickly.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCHv2] refs.c: enable large transactions
  2015-04-21 18:00                       ` Junio C Hamano
@ 2015-04-21 19:06                         ` Stefan Beller
  2015-04-21 19:56                           ` Stefan Beller
  2015-04-22 14:11                           ` Michael Haggerty
  0 siblings, 2 replies; 28+ messages in thread
From: Stefan Beller @ 2015-04-21 19:06 UTC (permalink / raw)
  To: gitster, mhagger, git; +Cc: Stefan Beller

This is another attempt on enabling large transactions
(large in terms of open file descriptors). We keep track of how many
lock files are opened by the ref_transaction_commit function.
When more than a reasonable amount of files is open, we close
the file descriptors to make sure the transaction can continue.

Another idea I had during implementing this was to move this file
closing into the lock file API, such that only a certain amount of
lock files can be open at any given point in time and we'd be 'garbage
collecting' open fds when necessary in any relevant call to the lock
file API. This would have brought the advantage of having such
functionality available in other users of the lock file API as well.
The downside however is the over complication, you really need to always
check for (lock->fd != -1) all the time, which may slow down other parts
of the code, which did not ask for such a feature.

Signed-off-by: Stefan Beller <sbeller@google.com>
---

* Removed unneeded braces in the condition to check if we want to close
  the lock file.
* made the counter for the remaining fds an unsigned int. That is what   
  get_max_fd_limit() returns, so there are no concerns for an overflow.
  Also it cannot go below 0 any more.
* moved the initialisation of the remaining_fds a bit down and added a comment  
  
 refs.c                | 21 +++++++++++++++++++++
 t/t1400-update-ref.sh |  4 ++--
 2 files changed, 23 insertions(+), 2 deletions(-)
 
 

diff --git a/refs.c b/refs.c
index 4f495bd..34cfcdf 100644
--- a/refs.c
+++ b/refs.c
@@ -3041,6 +3041,8 @@ static int write_ref_sha1(struct ref_lock *lock,
 		errno = EINVAL;
 		return -1;
 	}
+	if (lock->lk->fd == -1)
+		reopen_lock_file(lock->lk);
 	if (write_in_full(lock->lk->fd, sha1_to_hex(sha1), 40) != 40 ||
 	    write_in_full(lock->lk->fd, &term, 1) != 1 ||
 	    close_ref(lock) < 0) {
@@ -3718,6 +3720,7 @@ int ref_transaction_commit(struct ref_transaction *transaction,
 			   struct strbuf *err)
 {
 	int ret = 0, i;
+	unsigned int remaining_fds;
 	int n = transaction->nr;
 	struct ref_update **updates = transaction->updates;
 	struct string_list refs_to_delete = STRING_LIST_INIT_NODUP;
@@ -3733,6 +3736,20 @@ int ref_transaction_commit(struct ref_transaction *transaction,
 		return 0;
 	}
 
+	/*
+	 * We need to open many files in a large transaction, so come up with
+	 * a reasonable maximum. We still keep some spares for stdin/out and
+	 * other open files. Experiments determined we need more fds when
+	 * running inside our test suite than directly in the shell. It's
+	 * unclear where these fds come from. 32 should be a reasonable large
+	 * number though.
+	 */
+	remaining_fds = get_max_fd_limit();
+	if (remaining_fds > 32)
+		remaining_fds -= 32;
+	else
+		remaining_fds = 0;
+
 	/* Copy, sort, and reject duplicate refs */
 	qsort(updates, n, sizeof(*updates), ref_update_compare);
 	if (ref_update_reject_duplicates(updates, n, err)) {
@@ -3762,6 +3779,10 @@ int ref_transaction_commit(struct ref_transaction *transaction,
 				    update->refname);
 			goto cleanup;
 		}
+		if (remaining_fds > 0)
+			remaining_fds--;
+		else
+			close_lock_file(update->lock->lk);
 	}
 
 	/* Perform updates first so live commits remain referenced */
diff --git a/t/t1400-update-ref.sh b/t/t1400-update-ref.sh
index 7a69f1a..636d3a1 100755
--- a/t/t1400-update-ref.sh
+++ b/t/t1400-update-ref.sh
@@ -1071,7 +1071,7 @@ run_with_limited_open_files () {
 
 test_lazy_prereq ULIMIT_FILE_DESCRIPTORS 'run_with_limited_open_files true'
 
-test_expect_failure ULIMIT_FILE_DESCRIPTORS 'large transaction creating branches does not burst open file limit' '
+test_expect_success ULIMIT_FILE_DESCRIPTORS 'large transaction creating branches does not burst open file limit' '
 (
 	for i in $(test_seq 33)
 	do
@@ -1082,7 +1082,7 @@ test_expect_failure ULIMIT_FILE_DESCRIPTORS 'large transaction creating branches
 )
 '
 
-test_expect_failure ULIMIT_FILE_DESCRIPTORS 'large transaction deleting branches does not burst open file limit' '
+test_expect_success ULIMIT_FILE_DESCRIPTORS 'large transaction deleting branches does not burst open file limit' '
 (
 	for i in $(test_seq 33)
 	do
-- 
2.4.0.rc2.5.g4c2045b.dirty

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCHv2] refs.c: enable large transactions
  2015-04-21 19:06                         ` [PATCHv2] " Stefan Beller
@ 2015-04-21 19:56                           ` Stefan Beller
  2015-04-22 14:11                           ` Michael Haggerty
  1 sibling, 0 replies; 28+ messages in thread
From: Stefan Beller @ 2015-04-21 19:56 UTC (permalink / raw)
  To: Junio C Hamano, Michael Haggerty, git; +Cc: Stefan Beller

>
> * Removed unneeded braces in the condition to check if we want to close
>   the lock file.
> * made the counter for the remaining fds an unsigned int. That is what
>   get_max_fd_limit() returns, so there are no concerns for an overflow.
>   Also it cannot go below 0 any more.
> * moved the initialisation of the remaining_fds a bit down and added a comment

* Once again this replaces the last patch on top of
origin/sb/remove-fd-from-ref-lock

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 0/3] Another approach to large transactions
  2015-04-21  0:31               ` Stefan Beller
  2015-04-21  0:35                 ` [PATCH] refs.c: enable " Stefan Beller
@ 2015-04-21 23:21                 ` Jeff King
  2015-04-22 19:14                   ` Stefan Beller
  1 sibling, 1 reply; 28+ messages in thread
From: Jeff King @ 2015-04-21 23:21 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Junio C Hamano, Michael Haggerty, git

On Mon, Apr 20, 2015 at 05:31:11PM -0700, Stefan Beller wrote:

> When running the test locally, i.e. not in the test suite, but typing
> the commands
> myself into the shell, Git is fine with having just 5 file descriptors left.
> The additional 4 required fds come from beign run inside the test suite.
> 
> When strace-ing git, I cannot see any possible other fds which would require
> having some left over space required. So I'd propose we'd just take a reasonable
> number not too small for various test setups like 32 and then go with the
> proposed patches.

FWIW, we already use a magic value of "25 extra" in open_packed_git_1. I
don't know if that means the number has been proven in practice, or if
it is simply that nobody actually exercises the pack_max_fds code. I
suspect it is the latter, especially since d131b7a (sha1_file.c: Don't
retain open fds on small packs, 2011-03-02).

-Peff

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCHv2] refs.c: enable large transactions
  2015-04-21 19:06                         ` [PATCHv2] " Stefan Beller
  2015-04-21 19:56                           ` Stefan Beller
@ 2015-04-22 14:11                           ` Michael Haggerty
  2015-04-22 19:09                             ` Stefan Beller
  1 sibling, 1 reply; 28+ messages in thread
From: Michael Haggerty @ 2015-04-22 14:11 UTC (permalink / raw)
  To: Stefan Beller, gitster, git

On 04/21/2015 09:06 PM, Stefan Beller wrote:
> This is another attempt on enabling large transactions
> (large in terms of open file descriptors). We keep track of how many
> lock files are opened by the ref_transaction_commit function.
> When more than a reasonable amount of files is open, we close
> the file descriptors to make sure the transaction can continue.
> 
> Another idea I had during implementing this was to move this file
> closing into the lock file API, such that only a certain amount of
> lock files can be open at any given point in time and we'd be 'garbage
> collecting' open fds when necessary in any relevant call to the lock
> file API. This would have brought the advantage of having such
> functionality available in other users of the lock file API as well.
> The downside however is the over complication, you really need to always
> check for (lock->fd != -1) all the time, which may slow down other parts
> of the code, which did not ask for such a feature.

Aside from a missing error check (see below), this looks good to me.

> Signed-off-by: Stefan Beller <sbeller@google.com>
> ---
> 
> * Removed unneeded braces in the condition to check if we want to close
>   the lock file.
> * made the counter for the remaining fds an unsigned int. That is what   
>   get_max_fd_limit() returns, so there are no concerns for an overflow.
>   Also it cannot go below 0 any more.
> * moved the initialisation of the remaining_fds a bit down and added a comment  
>   
>  refs.c                | 21 +++++++++++++++++++++
>  t/t1400-update-ref.sh |  4 ++--
>  2 files changed, 23 insertions(+), 2 deletions(-)
>  
>  
> 
> diff --git a/refs.c b/refs.c
> index 4f495bd..34cfcdf 100644
> --- a/refs.c
> +++ b/refs.c
> @@ -3041,6 +3041,8 @@ static int write_ref_sha1(struct ref_lock *lock,
>  		errno = EINVAL;
>  		return -1;
>  	}
> +	if (lock->lk->fd == -1)
> +		reopen_lock_file(lock->lk);

You should check that reopen_lock_file() was successful.

>  	if (write_in_full(lock->lk->fd, sha1_to_hex(sha1), 40) != 40 ||
>  	    write_in_full(lock->lk->fd, &term, 1) != 1 ||
>  	    close_ref(lock) < 0) {
> @@ -3718,6 +3720,7 @@ int ref_transaction_commit(struct ref_transaction *transaction,
>  			   struct strbuf *err)
>  {
>  	int ret = 0, i;
> +	unsigned int remaining_fds;
>  	int n = transaction->nr;
>  	struct ref_update **updates = transaction->updates;
>  	struct string_list refs_to_delete = STRING_LIST_INIT_NODUP;
> @@ -3733,6 +3736,20 @@ int ref_transaction_commit(struct ref_transaction *transaction,
>  		return 0;
>  	}
>  
> +	/*
> +	 * We need to open many files in a large transaction, so come up with
> +	 * a reasonable maximum. We still keep some spares for stdin/out and
> +	 * other open files. Experiments determined we need more fds when
> +	 * running inside our test suite than directly in the shell. It's
> +	 * unclear where these fds come from. 32 should be a reasonable large
> +	 * number though.
> +	 */
> +	remaining_fds = get_max_fd_limit();
> +	if (remaining_fds > 32)
> +		remaining_fds -= 32;
> +	else
> +		remaining_fds = 0;
> +
>  	/* Copy, sort, and reject duplicate refs */
>  	qsort(updates, n, sizeof(*updates), ref_update_compare);
>  	if (ref_update_reject_duplicates(updates, n, err)) {
> @@ -3762,6 +3779,10 @@ int ref_transaction_commit(struct ref_transaction *transaction,
>  				    update->refname);
>  			goto cleanup;
>  		}
> +		if (remaining_fds > 0)
> +			remaining_fds--;
> +		else
> +			close_lock_file(update->lock->lk);

I consider this code a stopgap, and simplicity is more important than
optimization. But just for the sake of discussion, if we planned to keep
this code around, it could be improved by not wasting open file
descriptors for references that are only being verified or deleted, like so:

                if (!(flags & REF_HAVE_NEW) ||
                    is_null_sha1(update->new_sha1) ||
                    remaining_fds == 0)
                        close_lock_file(update->lock->lk);
                else
                        remaining_fds--;

>  	}
>  
>  	/* Perform updates first so live commits remain referenced */
> [...]

Michael

-- 
Michael Haggerty
mhagger@alum.mit.edu

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCHv2] refs.c: enable large transactions
  2015-04-22 14:11                           ` Michael Haggerty
@ 2015-04-22 19:09                             ` Stefan Beller
  2015-04-22 20:12                               ` Michael Haggerty
  0 siblings, 1 reply; 28+ messages in thread
From: Stefan Beller @ 2015-04-22 19:09 UTC (permalink / raw)
  To: Michael Haggerty; +Cc: Junio C Hamano, git

On Wed, Apr 22, 2015 at 7:11 AM, Michael Haggerty <mhagger@alum.mit.edu> wrote:
>> +     if (lock->lk->fd == -1)
>> +             reopen_lock_file(lock->lk);
>
> You should check that reopen_lock_file() was successful.

ok


>> @@ -3762,6 +3779,10 @@ int ref_transaction_commit(struct ref_transaction *transaction,
>>                                   update->refname);
>>                       goto cleanup;
>>               }
>> +             if (remaining_fds > 0)
>> +                     remaining_fds--;
>> +             else
>> +                     close_lock_file(update->lock->lk);
>
> I consider this code a stopgap, and simplicity is more important than
> optimization.

Can you explain a bit why you think this is a stopgap?

Looking at the patch this looks simple to me, as there are no
huge pain points involved. (Compared to [1] as we'd change a lot in
that series)

Also this is pretty good on performance.
The small cases do not have an additional unneeded close and reopen,
but only the
larger cases do.

[1] http://git.661346.n2.nabble.com/PATCHv1-0-6-Fix-bug-in-large-transactions-tt7624363.html#a7624368




> But just for the sake of discussion, if we planned to keep
> this code around, it could be improved by not wasting open file
> descriptors for references that are only being verified or deleted, like so:

I'll pick that up for the resend.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 0/3] Another approach to large transactions
  2015-04-21 23:21                 ` [PATCH 0/3] Another approach to " Jeff King
@ 2015-04-22 19:14                   ` Stefan Beller
  2015-04-22 20:11                     ` Jeff King
  0 siblings, 1 reply; 28+ messages in thread
From: Stefan Beller @ 2015-04-22 19:14 UTC (permalink / raw)
  To: Jeff King; +Cc: Junio C Hamano, Michael Haggerty, git

On Tue, Apr 21, 2015 at 4:21 PM, Jeff King <peff@peff.net> wrote:
> On Mon, Apr 20, 2015 at 05:31:11PM -0700, Stefan Beller wrote:
>
>> When running the test locally, i.e. not in the test suite, but typing
>> the commands
>> myself into the shell, Git is fine with having just 5 file descriptors left.
>> The additional 4 required fds come from beign run inside the test suite.
>>
>> When strace-ing git, I cannot see any possible other fds which would require
>> having some left over space required. So I'd propose we'd just take a reasonable
>> number not too small for various test setups like 32 and then go with the
>> proposed patches.
>
> FWIW, we already use a magic value of "25 extra" in open_packed_git_1. I
> don't know if that means the number has been proven in practice, or if
> it is simply that nobody actually exercises the pack_max_fds code. I
> suspect it is the latter, especially since d131b7a (sha1_file.c: Don't
> retain open fds on small packs, 2011-03-02).

25 is equally sound as I could not find any hard calculation on that
number in the
history or code. I will change it to 25 in the next version of the patch.

Thanks!
Stefan

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 0/3] Another approach to large transactions
  2015-04-22 19:14                   ` Stefan Beller
@ 2015-04-22 20:11                     ` Jeff King
  0 siblings, 0 replies; 28+ messages in thread
From: Jeff King @ 2015-04-22 20:11 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Junio C Hamano, Michael Haggerty, git

On Wed, Apr 22, 2015 at 12:14:08PM -0700, Stefan Beller wrote:

> > FWIW, we already use a magic value of "25 extra" in open_packed_git_1. I
> > don't know if that means the number has been proven in practice, or if
> > it is simply that nobody actually exercises the pack_max_fds code. I
> > suspect it is the latter, especially since d131b7a (sha1_file.c: Don't
> > retain open fds on small packs, 2011-03-02).
> 
> 25 is equally sound as I could not find any hard calculation on that
> number in the
> history or code. I will change it to 25 in the next version of the patch.

FWIW, I think 32 is just fine, too, and the patch doesn't need re-rolled
because of this. I mostly wanted to point out that yes, indeed, we use
this "eh, a few dozen is probably enough" strategy elsewhere. Which
maybe, sort-of validates it. :)

-Peff

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCHv2] refs.c: enable large transactions
  2015-04-22 19:09                             ` Stefan Beller
@ 2015-04-22 20:12                               ` Michael Haggerty
  0 siblings, 0 replies; 28+ messages in thread
From: Michael Haggerty @ 2015-04-22 20:12 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Junio C Hamano, git

On 04/22/2015 09:09 PM, Stefan Beller wrote:
> On Wed, Apr 22, 2015 at 7:11 AM, Michael Haggerty <mhagger@alum.mit.edu> wrote:
>>> +     if (lock->lk->fd == -1)
>>> +             reopen_lock_file(lock->lk);
>>
>> You should check that reopen_lock_file() was successful.
> 
> ok
> 
> 
>>> @@ -3762,6 +3779,10 @@ int ref_transaction_commit(struct ref_transaction *transaction,
>>>                                   update->refname);
>>>                       goto cleanup;
>>>               }
>>> +             if (remaining_fds > 0)
>>> +                     remaining_fds--;
>>> +             else
>>> +                     close_lock_file(update->lock->lk);
>>
>> I consider this code a stopgap, and simplicity is more important than
>> optimization.
> 
> Can you explain a bit why you think this is a stopgap?

At the point the lockfile is created, we have all the information we
need to write the new SHA-1 to it and close it immediately. It seems
more straightforward to do it that way than the way it is done in the
current code, where the locking and writing are separated in time and
space and now there is the small extra complication of
maybe-closing-maybe-not. But getting to the final destination requires
more refactoring than would be prudent for the upcoming release.

In other words, I think your fix is OK but that the whole area of code
has still not reached its final form. I am working on a patch series
that does what I have in mind, but it's not ready yet. As I remember I
got stuck when I realized that the reflog for HEAD is updated somewhere
out of the blue without proper locking and I haven't gotten around to
sorting it out yet.

> [...]
>> But just for the sake of discussion, if we planned to keep
>> this code around, it could be improved by not wasting open file
>> descriptors for references that are only being verified or deleted, like so:
> 
> I'll pick that up for the resend.

OK.

Michael

-- 
Michael Haggerty
mhagger@alum.mit.edu

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2015-04-22 20:12 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-04-16 23:17 [PATCH 0/3] Another approach to large transactions Stefan Beller
2015-04-16 23:17 ` [PATCH 1/3] refs.c: remove lock_fd from struct ref_lock Stefan Beller
2015-04-16 23:17 ` [PATCH 2/3] Move get_max_fd_limit(void) to git_compat_util.h Stefan Beller
2015-04-16 23:17 ` [PATCH 3/3] refs.c: enable large transactions Stefan Beller
2015-04-17 17:09 ` [PATCH 0/3] Another approach to " Junio C Hamano
2015-04-17 22:12   ` Junio C Hamano
2015-04-17 22:17     ` Stefan Beller
2015-04-17 23:31       ` Stefan Beller
2015-04-20 22:26         ` Stefan Beller
2015-04-20 22:51           ` Junio C Hamano
2015-04-20 23:07             ` Stefan Beller
2015-04-21  0:31               ` Stefan Beller
2015-04-21  0:35                 ` [PATCH] refs.c: enable " Stefan Beller
2015-04-21 17:16                   ` Junio C Hamano
2015-04-21 17:24                     ` Stefan Beller
2015-04-21 18:00                       ` Junio C Hamano
2015-04-21 19:06                         ` [PATCHv2] " Stefan Beller
2015-04-21 19:56                           ` Stefan Beller
2015-04-22 14:11                           ` Michael Haggerty
2015-04-22 19:09                             ` Stefan Beller
2015-04-22 20:12                               ` Michael Haggerty
2015-04-21 17:22                   ` [PATCH] " Junio C Hamano
2015-04-21 23:21                 ` [PATCH 0/3] Another approach to " Jeff King
2015-04-22 19:14                   ` Stefan Beller
2015-04-22 20:11                     ` Jeff King
2015-04-21 17:19               ` Junio C Hamano
2015-04-21 17:31                 ` Stefan Beller
2015-04-21 12:37             ` Michael Haggerty

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.