All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v1 0/2] Use CLOEXEC to avoid fd leaks
@ 2016-09-05 21:11 larsxschneider
  2016-09-05 21:11 ` [PATCH v1 1/2] sha1_file: open window into packfiles with CLOEXEC larsxschneider
  2016-09-05 21:11 ` [PATCH v1 2/2] read-cache: make sure file handles are not inherited by child processes larsxschneider
  0 siblings, 2 replies; 15+ messages in thread
From: larsxschneider @ 2016-09-05 21:11 UTC (permalink / raw)
  To: git; +Cc: gitster, tboegi, Johannes.Schindelin, e, Lars Schneider

From: Lars Schneider <larsxschneider@gmail.com>

Use the CLOEXEC flag similar to 05d1ed61 to fix leaked file descriptors.

Patch 1/2 was contemplated by Junio here:
http://public-inbox.org/git/xmqq37lnbbpk.fsf@gitster.mtv.corp.google.com/

I followed Torsten's advice and renamed `git_open_noatime` to
`git_open_noatime_cloexec`:
http://public-inbox.org/git/20160830145429.GA11221@tb-raspi/

Patch 2/2 was part of my "Git filter protocol" series:
http://public-inbox.org/git/20160825110752.31581-14-larsxschneider@gmail.com/
(I kept the patch as-is but changed the commit messages slightly)

Thanks,
Lars

Lars Schneider (2):
  sha1_file: open window into packfiles with CLOEXEC
  read-cache: make sure file handles are not inherited by child
    processes

 builtin/pack-objects.c |  2 +-
 cache.h                |  2 +-
 pack-bitmap.c          |  2 +-
 read-cache.c           |  2 +-
 sha1_file.c            | 14 +++++++-------
 5 files changed, 11 insertions(+), 11 deletions(-)

--
2.10.0


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v1 1/2] sha1_file: open window into packfiles with CLOEXEC
  2016-09-05 21:11 [PATCH v1 0/2] Use CLOEXEC to avoid fd leaks larsxschneider
@ 2016-09-05 21:11 ` larsxschneider
  2016-09-05 22:27   ` Eric Wong
  2016-09-05 21:11 ` [PATCH v1 2/2] read-cache: make sure file handles are not inherited by child processes larsxschneider
  1 sibling, 1 reply; 15+ messages in thread
From: larsxschneider @ 2016-09-05 21:11 UTC (permalink / raw)
  To: git; +Cc: gitster, tboegi, Johannes.Schindelin, e, Lars Schneider

From: Lars Schneider <larsxschneider@gmail.com>

All processes that the Git main process spawns inherit the open file
descriptors of the main process. These leaked file descriptors can
cause problems.

Use the CLOEXEC flag similar to 05d1ed61 to fix the leaked file
descriptors.

Signed-off-by: Lars Schneider <larsxschneider@gmail.com>
---
 builtin/pack-objects.c |  2 +-
 cache.h                |  2 +-
 pack-bitmap.c          |  2 +-
 sha1_file.c            | 14 +++++++-------
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 4a63398..a2b1fb6 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -718,7 +718,7 @@ static off_t write_reused_pack(struct sha1file *f)
 	if (!is_pack_valid(reuse_packfile))
 		die("packfile is invalid: %s", reuse_packfile->pack_name);
 
-	fd = git_open_noatime(reuse_packfile->pack_name);
+	fd = git_open_noatime_cloexec(reuse_packfile->pack_name);
 	if (fd < 0)
 		die_errno("unable to open packfile for reuse: %s",
 			  reuse_packfile->pack_name);
diff --git a/cache.h b/cache.h
index b780a91..ae79747 100644
--- a/cache.h
+++ b/cache.h
@@ -1089,7 +1089,7 @@ extern int write_sha1_file(const void *buf, unsigned long len, const char *type,
 extern int hash_sha1_file_literally(const void *buf, unsigned long len, const char *type, unsigned char *sha1, unsigned flags);
 extern int pretend_sha1_file(void *, unsigned long, enum object_type, unsigned char *);
 extern int force_object_loose(const unsigned char *sha1, time_t mtime);
-extern int git_open_noatime(const char *name);
+extern int git_open_noatime_cloexec(const char *name);
 extern void *map_sha1_file(const unsigned char *sha1, unsigned long *size);
 extern int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz);
 extern int parse_sha1_header(const char *hdr, unsigned long *sizep);
diff --git a/pack-bitmap.c b/pack-bitmap.c
index b949e51..1b39e5d 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -266,7 +266,7 @@ static int open_pack_bitmap_1(struct packed_git *packfile)
 		return -1;
 
 	idx_name = pack_bitmap_filename(packfile);
-	fd = git_open_noatime(idx_name);
+	fd = git_open_noatime_cloexec(idx_name);
 	free(idx_name);
 
 	if (fd < 0)
diff --git a/sha1_file.c b/sha1_file.c
index 3045aea..c1701dc 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -356,7 +356,7 @@ void read_info_alternates(const char * relative_base, int depth)
 	int fd;
 
 	path = xstrfmt("%s/info/alternates", relative_base);
-	fd = git_open_noatime(path);
+	fd = git_open_noatime_cloexec(path);
 	free(path);
 	if (fd < 0)
 		return;
@@ -550,7 +550,7 @@ static int check_packed_git_idx(const char *path, struct packed_git *p)
 	struct pack_idx_header *hdr;
 	size_t idx_size;
 	uint32_t version, nr, i, *index;
-	int fd = git_open_noatime(path);
+	int fd = git_open_noatime_cloexec(path);
 	struct stat st;
 
 	if (fd < 0)
@@ -956,7 +956,7 @@ static int open_packed_git_1(struct packed_git *p)
 	while (pack_max_fds <= pack_open_fds && close_one_pack())
 		; /* nothing */
 
-	p->pack_fd = git_open_noatime(p->pack_name);
+	p->pack_fd = git_open_noatime_cloexec(p->pack_name);
 	if (p->pack_fd < 0 || fstat(p->pack_fd, &st))
 		return -1;
 	pack_open_fds++;
@@ -1459,9 +1459,9 @@ int check_sha1_signature(const unsigned char *sha1, void *map,
 	return hashcmp(sha1, real_sha1) ? -1 : 0;
 }
 
-int git_open_noatime(const char *name)
+int git_open_noatime_cloexec(const char *name)
 {
-	static int sha1_file_open_flag = O_NOATIME;
+	static int sha1_file_open_flag = O_NOATIME | O_CLOEXEC;
 
 	for (;;) {
 		int fd;
@@ -1505,7 +1505,7 @@ static int open_sha1_file(const unsigned char *sha1)
 	struct alternate_object_database *alt;
 	int most_interesting_errno;
 
-	fd = git_open_noatime(sha1_file_name(sha1));
+	fd = git_open_noatime_cloexec(sha1_file_name(sha1));
 	if (fd >= 0)
 		return fd;
 	most_interesting_errno = errno;
@@ -1513,7 +1513,7 @@ static int open_sha1_file(const unsigned char *sha1)
 	prepare_alt_odb();
 	for (alt = alt_odb_list; alt; alt = alt->next) {
 		fill_sha1_path(alt->name, sha1);
-		fd = git_open_noatime(alt->base);
+		fd = git_open_noatime_cloexec(alt->base);
 		if (fd >= 0)
 			return fd;
 		if (most_interesting_errno == ENOENT)
-- 
2.10.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v1 2/2] read-cache: make sure file handles are not inherited by child processes
  2016-09-05 21:11 [PATCH v1 0/2] Use CLOEXEC to avoid fd leaks larsxschneider
  2016-09-05 21:11 ` [PATCH v1 1/2] sha1_file: open window into packfiles with CLOEXEC larsxschneider
@ 2016-09-05 21:11 ` larsxschneider
  2016-09-06 11:41   ` Johannes Schindelin
  2016-09-06 21:06   ` Eric Wong
  1 sibling, 2 replies; 15+ messages in thread
From: larsxschneider @ 2016-09-05 21:11 UTC (permalink / raw)
  To: git; +Cc: gitster, tboegi, Johannes.Schindelin, e, Lars Schneider

From: Lars Schneider <larsxschneider@gmail.com>

This fix prepares a series with the goal to avoid launching a new
clean/smudge filter process for each file that is filtered. A new
long running filter process is introduced that is used to filter all
files in a single Git invocation.

Consider the case of a file that requires filtering and is present in
branch A but not in branch B. If A is the current HEAD and we checkout B
then the following happens:

1. ce_compare_data() opens the file
2.   index_fd() detects that the file requires to run a clean filter and
     calls index_stream_convert_blob()
4.     index_stream_convert_blob() calls convert_to_git_filter_fd()
5.       convert_to_git_filter_fd() calls apply_filter() which creates a
         new long running filter process (in case it is the first file
         of this kind to be filtered)
6.       The new filter process inherits all file handles. This is the
         default on Linux/OSX and is explicitly defined in the
         `CreateProcessW` call in `mingw.c` on Windows.
7. ce_compare_data() closes the file
8. Git unlinks the file as it is not present in B

The unlink operation does not work on Windows because the filter process
has still an open handle to the file. On Linux/OSX the unlink operation
succeeds but the file descriptors still leak into the child process.

Fix this problem by opening files in read-cache with the CLOEXEC flag to
ensure that the file descriptor does not remain open in a newly spawned
process similar to 05d1ed61.

Signed-off-by: Lars Schneider <larsxschneider@gmail.com>
---
 read-cache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/read-cache.c b/read-cache.c
index 491e52d..02f74d3 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -156,7 +156,7 @@ void fill_stat_cache_info(struct cache_entry *ce, struct stat *st)
 static int ce_compare_data(const struct cache_entry *ce, struct stat *st)
 {
 	int match = -1;
-	int fd = open(ce->name, O_RDONLY);
+	int fd = open(ce->name, O_RDONLY | O_CLOEXEC);
 
 	if (fd >= 0) {
 		unsigned char sha1[20];
-- 
2.10.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v1 1/2] sha1_file: open window into packfiles with CLOEXEC
  2016-09-05 21:11 ` [PATCH v1 1/2] sha1_file: open window into packfiles with CLOEXEC larsxschneider
@ 2016-09-05 22:27   ` Eric Wong
  2016-09-06  9:36     ` Jakub Narębski
  2016-09-06 11:38     ` Johannes Schindelin
  0 siblings, 2 replies; 15+ messages in thread
From: Eric Wong @ 2016-09-05 22:27 UTC (permalink / raw)
  To: larsxschneider; +Cc: git, gitster, tboegi, Johannes.Schindelin

larsxschneider@gmail.com wrote:
> All processes that the Git main process spawns inherit the open file
> descriptors of the main process. These leaked file descriptors can
> cause problems.


> -int git_open_noatime(const char *name)
> +int git_open_noatime_cloexec(const char *name)
>  {
> -	static int sha1_file_open_flag = O_NOATIME;
> +	static int sha1_file_open_flag = O_NOATIME | O_CLOEXEC;
>  
>  	for (;;) {
>  		int fd;

If there's real problems being caused by lack of cloexec
today, I think the F_SETFD fallback I proposed in
https://public-inbox.org/git/20160818173555.GA29253@starla/
will be necessary.

I question the need for the "_cloexec" suffixing in the
function name since the old function is going away entirely.

I prefer all FD-creating functions set cloexec by default
for FD > 2 to avoid inadvertantly leaking FDs.  So we
ought to use pipe2, accept4, socket(..., SOCK_CLOEXEC), etc...
and fallback to the racy+slower F_SETFD when not available.


Fwiw, Perl has been setting cloexec on FDs above $^F
(2, $SYSTEM_FD_MAX) for decades, and Ruby started
doing it a few years ago, too.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v1 1/2] sha1_file: open window into packfiles with CLOEXEC
  2016-09-05 22:27   ` Eric Wong
@ 2016-09-06  9:36     ` Jakub Narębski
  2016-09-06 11:38     ` Johannes Schindelin
  1 sibling, 0 replies; 15+ messages in thread
From: Jakub Narębski @ 2016-09-06  9:36 UTC (permalink / raw)
  To: Eric Wong, larsxschneider; +Cc: git, gitster, tboegi, Johannes.Schindelin

W dniu 06.09.2016 o 00:27, Eric Wong pisze:
> larsxschneider@gmail.com wrote:

>> -int git_open_noatime(const char *name)
>> +int git_open_noatime_cloexec(const char *name)
[...]
> 
> I question the need for the "_cloexec" suffixing in the
> function name since the old function is going away entirely.

On the other hand the new name is descriptive...

> 
> I prefer all FD-creating functions set cloexec by default
> for FD > 2 to avoid inadvertantly leaking FDs.  So we
> ought to use pipe2, accept4, socket(..., SOCK_CLOEXEC), etc...
> and fallback to the racy+slower F_SETFD when not available.
> 
> 
> Fwiw, Perl has been setting cloexec on FDs above $^F
> (2, $SYSTEM_FD_MAX) for decades, and Ruby started
> doing it a few years ago, too.
 


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v1 1/2] sha1_file: open window into packfiles with CLOEXEC
  2016-09-05 22:27   ` Eric Wong
  2016-09-06  9:36     ` Jakub Narębski
@ 2016-09-06 11:38     ` Johannes Schindelin
  2016-09-07 13:20       ` Lars Schneider
  1 sibling, 1 reply; 15+ messages in thread
From: Johannes Schindelin @ 2016-09-06 11:38 UTC (permalink / raw)
  To: Eric Wong; +Cc: larsxschneider, git, gitster, tboegi

Hi Eric & Lars,

On Mon, 5 Sep 2016, Eric Wong wrote:

> larsxschneider@gmail.com wrote:
> > All processes that the Git main process spawns inherit the open file
> > descriptors of the main process. These leaked file descriptors can
> > cause problems.
> 
> 
> > -int git_open_noatime(const char *name)
> > +int git_open_noatime_cloexec(const char *name)
> >  {
> > -	static int sha1_file_open_flag = O_NOATIME;
> > +	static int sha1_file_open_flag = O_NOATIME | O_CLOEXEC;
> >  
> >  	for (;;) {
> >  		int fd;
> 
> If there's real problems being caused by lack of cloexec
> today, I think the F_SETFD fallback I proposed in
> https://public-inbox.org/git/20160818173555.GA29253@starla/
> will be necessary.

Yes, it is good to have that patch available to go if we need it. I do not
think that we will need it, though, as the biggest problems that are
solved through the CLOEXEC flag are ones caused on Windows, when files
cannot be deleted or renamed because there are still (uselessly) open
handles referencing them.

> I question the need for the "_cloexec" suffixing in the
> function name since the old function is going away entirely.

Me, too. While it is correct, it makes things harder to read, so it may
even cause more harm than it does good.

> I prefer all FD-creating functions set cloexec by default
> for FD > 2 to avoid inadvertantly leaking FDs.  So we
> ought to use pipe2, accept4, socket(..., SOCK_CLOEXEC), etc...
> and fallback to the racy+slower F_SETFD when not available.

In the original Pull Request where the change was contributed to Git for
Windows, this was tested (actually, the code did not see whether fd > 2,
but simply assumed that all newly opened file descriptors would be > 2
anyway), and it failed:

https://github.com/git-for-windows/git/pull/755#issuecomment-220247972

So it appears that we would have to exclude at least the code path to `git
upload-pack` from that magic.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v1 2/2] read-cache: make sure file handles are not inherited by child processes
  2016-09-05 21:11 ` [PATCH v1 2/2] read-cache: make sure file handles are not inherited by child processes larsxschneider
@ 2016-09-06 11:41   ` Johannes Schindelin
  2016-09-06 21:06   ` Eric Wong
  1 sibling, 0 replies; 15+ messages in thread
From: Johannes Schindelin @ 2016-09-06 11:41 UTC (permalink / raw)
  To: Lars Schneider; +Cc: git, gitster, tboegi, e

Hi Lars,

On Mon, 5 Sep 2016, larsxschneider@gmail.com wrote:

> [... commit message ...]

Makes sense.

> diff --git a/read-cache.c b/read-cache.c
> index 491e52d..02f74d3 100644
> --- a/read-cache.c
> +++ b/read-cache.c
> @@ -156,7 +156,7 @@ void fill_stat_cache_info(struct cache_entry *ce, struct stat *st)
>  static int ce_compare_data(const struct cache_entry *ce, struct stat *st)
>  {
>  	int match = -1;
> -	int fd = open(ce->name, O_RDONLY);
> +	int fd = open(ce->name, O_RDONLY | O_CLOEXEC);

Eric's comment on 1/2 applies here, too, of course: should this cause any
problems on non-Windows platforms, we always have that FD_CLOEXEC thing
that we could probably use to fix it.

But let's cross that bridge when (or better: if) we get there.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v1 2/2] read-cache: make sure file handles are not inherited by child processes
  2016-09-05 21:11 ` [PATCH v1 2/2] read-cache: make sure file handles are not inherited by child processes larsxschneider
  2016-09-06 11:41   ` Johannes Schindelin
@ 2016-09-06 21:06   ` Eric Wong
  2016-09-07 13:39     ` Lars Schneider
  1 sibling, 1 reply; 15+ messages in thread
From: Eric Wong @ 2016-09-06 21:06 UTC (permalink / raw)
  To: larsxschneider; +Cc: git, gitster, tboegi, Johannes.Schindelin

larsxschneider@gmail.com wrote:
>  static int ce_compare_data(const struct cache_entry *ce, struct stat *st)
>  {
>  	int match = -1;
> -	int fd = open(ce->name, O_RDONLY);
> +	int fd = open(ce->name, O_RDONLY | O_CLOEXEC);
>  
>  	if (fd >= 0) {
>  		unsigned char sha1[20];

Also, this needs to check EINVAL when O_CLOEXEC != 0 the same
way create_tempfile currently does.  Somebody could be building
with modern headers but running an old kernel that doesn't
understand O_CLOEXEC.

There should probably be a open() wrapper for handling this case
since we're now up to 3 places where open(... O_CLOEXEC) is
used.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v1 1/2] sha1_file: open window into packfiles with CLOEXEC
  2016-09-06 11:38     ` Johannes Schindelin
@ 2016-09-07 13:20       ` Lars Schneider
  2016-09-07 18:17         ` Eric Wong
  0 siblings, 1 reply; 15+ messages in thread
From: Lars Schneider @ 2016-09-07 13:20 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Eric Wong, git, gitster, tboegi


> On 06 Sep 2016, at 13:38, Johannes Schindelin <johannes.schindelin@gmx.de> wrote:
> 
> Hi Eric & Lars,
> 
> On Mon, 5 Sep 2016, Eric Wong wrote:
> 
>> larsxschneider@gmail.com wrote:
>>> All processes that the Git main process spawns inherit the open file
>>> descriptors of the main process. These leaked file descriptors can
>>> cause problems.
>> 
>> 
>>> -int git_open_noatime(const char *name)
>>> +int git_open_noatime_cloexec(const char *name)
>>> {
>>> -	static int sha1_file_open_flag = O_NOATIME;
>>> +	static int sha1_file_open_flag = O_NOATIME | O_CLOEXEC;
>>> 
>>> 	for (;;) {
>>> 		int fd;
> 
>> I question the need for the "_cloexec" suffixing in the
>> function name since the old function is going away entirely.
> 
> Me, too. While it is correct, it makes things harder to read, so it may
> even cause more harm than it does good.

What name would you suggest? Leaving the name as-is seems misleading to me.
Maybe just "git_open()" ?


>> I prefer all FD-creating functions set cloexec by default
>> for FD > 2 to avoid inadvertantly leaking FDs.  So we
>> ought to use pipe2, accept4, socket(..., SOCK_CLOEXEC), etc...
>> and fallback to the racy+slower F_SETFD when not available.
> 
> In the original Pull Request where the change was contributed to Git for
> Windows, this was tested (actually, the code did not see whether fd > 2,
> but simply assumed that all newly opened file descriptors would be > 2
> anyway), and it failed:
> 
> https://github.com/git-for-windows/git/pull/755#issuecomment-220247972
> 
> So it appears that we would have to exclude at least the code path to `git
> upload-pack` from that magic.


I just realized that Dscho improved his original patch in GfW with a
fallback if CLOEXEC is not present.

I applied the same mechanism here. Would that be OK?

Thanks,
Lars

-       static int sha1_file_open_flag = O_NOATIME;
+       static int sha1_file_open_flag = O_NOATIME | O_CLOEXEC;

        for (;;) {
                int fd;
@@ -1471,12 +1471,17 @@ int git_open_noatime(const char *name)
                if (fd >= 0)
                        return fd;

-               /* Might the failure be due to O_NOATIME? */
-               if (errno != ENOENT && sha1_file_open_flag) {
-                       sha1_file_open_flag = 0;
+               /* Try again w/o O_CLOEXEC: the kernel might not support it */
+               if (O_CLOEXEC && errno == EINVAL && (sha1_file_open_flag & O_CLOEXEC)) {
+                       sha1_file_open_flag &= ~O_CLOEXEC;
                        continue;
                }

+               /* Might the failure be due to O_NOATIME? */
+               if (errno != ENOENT && (sha1_file_open_flag & O_NOATIME)) {
+                       sha1_file_open_flag &= ~O_NOATIME;
+                       continue;
+               }


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v1 2/2] read-cache: make sure file handles are not inherited by child processes
  2016-09-06 21:06   ` Eric Wong
@ 2016-09-07 13:39     ` Lars Schneider
  2016-09-07 18:10       ` Eric Wong
  0 siblings, 1 reply; 15+ messages in thread
From: Lars Schneider @ 2016-09-07 13:39 UTC (permalink / raw)
  To: Eric Wong; +Cc: Git Mailing List, gitster, tboegi, Johannes.Schindelin


> On 06 Sep 2016, at 23:06, Eric Wong <e@80x24.org> wrote:
> 
> larsxschneider@gmail.com wrote:
>> static int ce_compare_data(const struct cache_entry *ce, struct stat *st)
>> {
>> 	int match = -1;
>> -	int fd = open(ce->name, O_RDONLY);
>> +	int fd = open(ce->name, O_RDONLY | O_CLOEXEC);
>> 
>> 	if (fd >= 0) {
>> 		unsigned char sha1[20];
> 
> Also, this needs to check EINVAL when O_CLOEXEC != 0 the same
> way create_tempfile currently does.  Somebody could be building
> with modern headers but running an old kernel that doesn't
> understand O_CLOEXEC.
> 
> There should probably be a open() wrapper for handling this case
> since we're now up to 3 places where open(... O_CLOEXEC) is
> used.

Right! Actually "sha1_file.c:git_open_noatime()" is already a wrapper, no?
Can't we use this here? The O_NOATIME flag shouldn't hurt, right?

Thanks,
Lars


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v1 2/2] read-cache: make sure file handles are not inherited by child processes
  2016-09-07 13:39     ` Lars Schneider
@ 2016-09-07 18:10       ` Eric Wong
  2016-09-07 18:23         ` Junio C Hamano
  0 siblings, 1 reply; 15+ messages in thread
From: Eric Wong @ 2016-09-07 18:10 UTC (permalink / raw)
  To: Lars Schneider; +Cc: Git Mailing List, gitster, tboegi, Johannes.Schindelin

Lars Schneider <larsxschneider@gmail.com> wrote:
> > On 06 Sep 2016, at 23:06, Eric Wong <e@80x24.org> wrote:
> > larsxschneider@gmail.com wrote:
> >> static int ce_compare_data(const struct cache_entry *ce, struct stat *st)
> >> {
> >> 	int match = -1;
> >> -	int fd = open(ce->name, O_RDONLY);
> >> +	int fd = open(ce->name, O_RDONLY | O_CLOEXEC);
> >> 
> >> 	if (fd >= 0) {
> >> 		unsigned char sha1[20];
> > 
> > Also, this needs to check EINVAL when O_CLOEXEC != 0 the same
> > way create_tempfile currently does.  Somebody could be building
> > with modern headers but running an old kernel that doesn't
> > understand O_CLOEXEC.
> > 
> > There should probably be a open() wrapper for handling this case
> > since we're now up to 3 places where open(... O_CLOEXEC) is
> > used.
> 
> Right! Actually "sha1_file.c:git_open_noatime()" is already a wrapper, no?
> Can't we use this here? The O_NOATIME flag shouldn't hurt, right?

For ce_compare_data (and other O_RDONLY users), I guess
git_open_noatime is fine; and probably preferable because of
O_NOATIME.

We probably should be using O_NOATIME for all O_RDONLY cases
to get the last bit of performance out (especially since
non-modern-Linux systems probably still lack relatime).

However, create_tempfile needs O_RDWR|O_CREAT|O_EXCL
but I guess we can clean that up in another series.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v1 1/2] sha1_file: open window into packfiles with CLOEXEC
  2016-09-07 13:20       ` Lars Schneider
@ 2016-09-07 18:17         ` Eric Wong
  0 siblings, 0 replies; 15+ messages in thread
From: Eric Wong @ 2016-09-07 18:17 UTC (permalink / raw)
  To: Lars Schneider; +Cc: Johannes Schindelin, git, gitster, tboegi

Lars Schneider <larsxschneider@gmail.com> wrote:
> > On 06 Sep 2016, at 13:38, Johannes Schindelin <johannes.schindelin@gmx.de> wrote:
> > On Mon, 5 Sep 2016, Eric Wong wrote:
> >> larsxschneider@gmail.com wrote:
> >>> -int git_open_noatime(const char *name)
> >>> +int git_open_noatime_cloexec(const char *name)
> >>> {
> >>> -	static int sha1_file_open_flag = O_NOATIME;
> >>> +	static int sha1_file_open_flag = O_NOATIME | O_CLOEXEC;
> >>> 
> >>> 	for (;;) {
> >>> 		int fd;
> > 
> >> I question the need for the "_cloexec" suffixing in the
> >> function name since the old function is going away entirely.
> > 
> > Me, too. While it is correct, it makes things harder to read, so it may
> > even cause more harm than it does good.
> 
> What name would you suggest? Leaving the name as-is seems misleading to me.
> Maybe just "git_open()" ?

Maybe "_noatime" is useful in some cases, but maybe not *shrug*

My original point for removing the "_cloexec" suffix was that
(at least for Perl and Ruby), cloexec-by-default was so prevalent
in FD-creating syscalls that having the suffix wasn't needed.

> >> I prefer all FD-creating functions set cloexec by default
> >> for FD > 2 to avoid inadvertantly leaking FDs.  So we
> >> ought to use pipe2, accept4, socket(..., SOCK_CLOEXEC), etc...
> >> and fallback to the racy+slower F_SETFD when not available.


> I applied the same mechanism here. Would that be OK?
> 
> Thanks,
> Lars
> 
> -       static int sha1_file_open_flag = O_NOATIME;
> +       static int sha1_file_open_flag = O_NOATIME | O_CLOEXEC;
> 
>         for (;;) {
>                 int fd;
> @@ -1471,12 +1471,17 @@ int git_open_noatime(const char *name)
>                 if (fd >= 0)
>                         return fd;
> 
> -               /* Might the failure be due to O_NOATIME? */
> -               if (errno != ENOENT && sha1_file_open_flag) {
> -                       sha1_file_open_flag = 0;
> +               /* Try again w/o O_CLOEXEC: the kernel might not support it */
> +               if (O_CLOEXEC && errno == EINVAL && (sha1_file_open_flag & O_CLOEXEC)) {

80 columns overflow

> +                       sha1_file_open_flag &= ~O_CLOEXEC;
>                         continue;
>                 }
> 
> +               /* Might the failure be due to O_NOATIME? */
> +               if (errno != ENOENT && (sha1_file_open_flag & O_NOATIME)) {
> +                       sha1_file_open_flag &= ~O_NOATIME;
> +                       continue;
> +               }

But otherwise much better since it doesn't blindly zero
sha1_file_open_flag :>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v1 2/2] read-cache: make sure file handles are not inherited by child processes
  2016-09-07 18:10       ` Eric Wong
@ 2016-09-07 18:23         ` Junio C Hamano
  2016-09-08  5:57           ` Lars Schneider
  0 siblings, 1 reply; 15+ messages in thread
From: Junio C Hamano @ 2016-09-07 18:23 UTC (permalink / raw)
  To: Eric Wong; +Cc: Lars Schneider, Git Mailing List, tboegi, Johannes.Schindelin

Eric Wong <e@80x24.org> writes:

> We probably should be using O_NOATIME for all O_RDONLY cases
> to get the last bit of performance out (especially since
> non-modern-Linux systems probably still lack relatime).

No, please do not go there.

The user can read from a file in a working tree using "less",
"grep", etc., and they all update the atime, so should "git grep".
We do not use atime ourselves on these files but we should let
outside tools rely on the validity of atime (e.g. "what are the
files that were looked at yesterday?").

If you grep for noatime in our current codebase, you'd notice that
we use it only for files in objects/ hierarchy, and that makes very
good sense.  These files are what we create for our _sole_ use and
no other tools can peek at them and expect to get any useful
information out of them (we hear from time to time that virus
scanners leaving open file descriptors on them causing trouble, but
that is an example of a useless access), and that makes a file in
objects/ hierarchy a fair game for noatime optimization.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v1 2/2] read-cache: make sure file handles are not inherited by child processes
  2016-09-07 18:23         ` Junio C Hamano
@ 2016-09-08  5:57           ` Lars Schneider
  2016-09-08 17:37             ` Junio C Hamano
  0 siblings, 1 reply; 15+ messages in thread
From: Lars Schneider @ 2016-09-08  5:57 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Eric Wong, Git Mailing List, tboegi, Johannes.Schindelin


> On 07 Sep 2016, at 20:23, Junio C Hamano <gitster@pobox.com> wrote:
> 
> Eric Wong <e@80x24.org> writes:
> 
>> We probably should be using O_NOATIME for all O_RDONLY cases
>> to get the last bit of performance out (especially since
>> non-modern-Linux systems probably still lack relatime).
> 
> No, please do not go there.
> 
> The user can read from a file in a working tree using "less",
> "grep", etc., and they all update the atime, so should "git grep".
> We do not use atime ourselves on these files but we should let
> outside tools rely on the validity of atime (e.g. "what are the
> files that were looked at yesterday?").
> 
> If you grep for noatime in our current codebase, you'd notice that
> we use it only for files in objects/ hierarchy, and that makes very
> good sense.  These files are what we create for our _sole_ use and
> no other tools can peek at them and expect to get any useful
> information out of them (we hear from time to time that virus
> scanners leaving open file descriptors on them causing trouble, but
> that is an example of a useless access), and that makes a file in
> objects/ hierarchy a fair game for noatime optimization.

How do we deal with read-cache:ce_compare_data, though?

By your definition above we shouldn't use NOATIME since the read file
is not in objects/. However, the file read is not something the user
explicitly triggers. The read is part of the Git internal "clean"
machinery.

What would you suggest? Should I open the file with or without NOATIME?

Thanks,
Lars

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v1 2/2] read-cache: make sure file handles are not inherited by child processes
  2016-09-08  5:57           ` Lars Schneider
@ 2016-09-08 17:37             ` Junio C Hamano
  0 siblings, 0 replies; 15+ messages in thread
From: Junio C Hamano @ 2016-09-08 17:37 UTC (permalink / raw)
  To: Lars Schneider; +Cc: Eric Wong, Git Mailing List, tboegi, Johannes.Schindelin

Lars Schneider <larsxschneider@gmail.com> writes:

>>> We probably should be using O_NOATIME for all O_RDONLY cases
>>> to get the last bit of performance out (especially since
>>> non-modern-Linux systems probably still lack relatime).
>> 
>> No, please do not go there.
>> 
>> The user can read from a file in a working tree using "less",
>> "grep", etc., and they all update the atime, so should "git grep".
>> We do not use atime ourselves on these files but we should let
>> outside tools rely on the validity of atime (e.g. "what are the
>> files that were looked at yesterday?").
>> 
>> If you grep for noatime in our current codebase, you'd notice that
>> we use it only for files in objects/ hierarchy, and that makes very
>> good sense.  These files are what we create for our _sole_ use and
>> no other tools can peek at them and expect to get any useful
>> information out of them (we hear from time to time that virus
>> scanners leaving open file descriptors on them causing trouble, but
>> that is an example of a useless access), and that makes a file in
>> objects/ hierarchy a fair game for noatime optimization.
>
> How do we deal with read-cache:ce_compare_data, though?

We are using open() in our current code in that codepath, without
NOATIME.  We shouldn't start using git_open_noatime() merely for
convenience.

We would probably want to do a preliminary refactoring so that we
can say "we want only CLOEXEC and not NOATIME".  Perhaps we would
want to make the existing one in sha1_file.c to something like:

	int git_open_noatime(const char *name)
        {
		return git_open(name, GIT_OPEN_NOATIME);
	}

and then add

	#define GIT_OPEN_NOATIME 01

to cache.h and add

	int git_open(const char *name, unsigned int flag)
        {
        	static int open_noatime = O_NOATIME;

		for (;;) {
                	int fd, open_flag;

                        open_flag = 0;
                        if (flag & GIT_OPEN_NOATIME)
                                open_flag |= open_noatime;

			errno = 0;
                        fd = open(name, O_RDONLY | open_flag);
                        if (fd >= 0)
                        	return fd;

			if (errno == ENOENT || !open_flag)
				return -1;

			/* The failure may be due to additional	flags */
                       	if ((flag & GIT_OPEN_NOATIME) &&
			    (open_flag & O_NOATIME)) {
				flag &= ~GIT_OPEN_NOATIME;
				open_noatime = 0;
			}
		}
	}

to wrapper.c in the first step, which is a "no-op refactoring" step.

Then add

	#define GIT_OPEN_CLOEXEC 02

and update git_open(), perhaps like so:

	int git_open(const char *name, unsigned int flag)
        {
        	static int open_noatime = O_NOATIME;
        	static int open_cloexec = O_CLOEXEC;

		for (;;) {
                	int fd, open_flag;

                        open_flag = 0;
                        if (flag & GIT_OPEN_NOATIME)
                                open_flag |= open_noatime;
                        if (flag & GIT_OPEN_CLOEXEC)
                                open_flag |= open_cloexec;

			errno = 0;
                        fd = open(name, O_RDONLY | open_flag);
                        if (fd >= 0)
                        	return fd;

			if (errno == ENOENT || !open_flag)
				return -1;

			/* The failure may be due to additional	flags */
                       	if ((flag & GIT_OPEN_NOATIME) &&
			    (open_flag & O_NOATIME)) {
				flag &= ~GIT_OPEN_NOATIME;
                                open_noatime = 0;
			}
                       	if ((flag & GIT_OPEN_CLOEXEC) &&
			    (open_flag & O_CLOEXEC)) {
				flag &= ~GIT_OPEN_CLOEXEC;
                                open_cloexec = 0;
			}
		}
	}

The retry logic is "if we were asked to do this flag, and if we did
pass that flag, then we know open() with that flag fails here, so we
won't waste time trying with it again", which came from the NOATIME
codepath we already have, but it may not match what we use CLOEXEC
for and may need to be adjusted.  I didn't think that part of the
code through.

Then the ce codepath that reads from the working tree would use

	git_open(ce->name, GIT_OPEN_CLOEXEC);

to obtain the file descriptor for reading, perhaps?

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2016-09-08 17:38 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-05 21:11 [PATCH v1 0/2] Use CLOEXEC to avoid fd leaks larsxschneider
2016-09-05 21:11 ` [PATCH v1 1/2] sha1_file: open window into packfiles with CLOEXEC larsxschneider
2016-09-05 22:27   ` Eric Wong
2016-09-06  9:36     ` Jakub Narębski
2016-09-06 11:38     ` Johannes Schindelin
2016-09-07 13:20       ` Lars Schneider
2016-09-07 18:17         ` Eric Wong
2016-09-05 21:11 ` [PATCH v1 2/2] read-cache: make sure file handles are not inherited by child processes larsxschneider
2016-09-06 11:41   ` Johannes Schindelin
2016-09-06 21:06   ` Eric Wong
2016-09-07 13:39     ` Lars Schneider
2016-09-07 18:10       ` Eric Wong
2016-09-07 18:23         ` Junio C Hamano
2016-09-08  5:57           ` Lars Schneider
2016-09-08 17:37             ` Junio C Hamano

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.