linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] statx: optimize copy of struct statx to userspace
@ 2017-03-11 21:45 Eric Biggers
  2017-03-12  1:24 ` Al Viro
  0 siblings, 1 reply; 9+ messages in thread
From: Eric Biggers @ 2017-03-11 21:45 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Al Viro, David Howells, linux-kernel, Eric Biggers

From: Eric Biggers <ebiggers@google.com>

I found that statx() was significantly slower than stat().  As a
microbenchmark, I compared 10,000,000 invocations of fstat() on a tmpfs
file to the same with statx() passed a NULL path:

	$ time ./stat_benchmark

	real	0m1.464s
	user	0m0.275s
	sys	0m1.187s

	$ time ./statx_benchmark

	real	0m5.530s
	user	0m0.281s
	sys	0m5.247s

statx is expected to be a little slower than stat because struct statx
is larger than struct stat, but not by *that* much.  It turns out that
most of the overhead was in copying struct statx to userspace,
apparently mostly in all the stac/clac instructions that got generated
for each __put_user() call.  (This was on x86_64, but some other
architectures, e.g. arm64, have something similar now too.)

stat() instead initializes its struct on the stack and copies it to
userspace with a single call to copy_to_user().  This turns out to be
much faster, and changing statx to do this makes it almost as fast as
stat:

	$ time ./statx_benchmark

	real	0m1.573s
	user	0m0.229s
	sys	0m1.344s

Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/stat.c | 72 +++++++++++++++++++++++++++++----------------------------------
 1 file changed, 33 insertions(+), 39 deletions(-)

diff --git a/fs/stat.c b/fs/stat.c
index fa0be59340cc..5cc267ec7865 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -509,46 +509,41 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, const char __user *, filename,
 }
 #endif /* __ARCH_WANT_STAT64 || __ARCH_WANT_COMPAT_STAT64 */
 
-static inline int __put_timestamp(struct timespec *kts,
-				  struct statx_timestamp __user *uts)
+static inline void init_statx_timestamp(struct statx_timestamp *uts,
+					const struct timespec *kts)
 {
-	return (__put_user(kts->tv_sec,		&uts->tv_sec		) ||
-		__put_user(kts->tv_nsec,	&uts->tv_nsec		) ||
-		__put_user(0,			&uts->__reserved	));
+	uts->tv_sec = kts->tv_sec;
+	uts->tv_nsec = kts->tv_nsec;
+	uts->__reserved = 0;
 }
 
-/*
- * Set the statx results.
- */
-static long statx_set_result(struct kstat *stat, struct statx __user *buffer)
+static int cp_statx(const struct kstat *stat, struct statx __user *buffer)
 {
-	uid_t uid = from_kuid_munged(current_user_ns(), stat->uid);
-	gid_t gid = from_kgid_munged(current_user_ns(), stat->gid);
-
-	if (__put_user(stat->result_mask,	&buffer->stx_mask	) ||
-	    __put_user(stat->mode,		&buffer->stx_mode	) ||
-	    __clear_user(&buffer->__spare0, sizeof(buffer->__spare0))	  ||
-	    __put_user(stat->nlink,		&buffer->stx_nlink	) ||
-	    __put_user(uid,			&buffer->stx_uid	) ||
-	    __put_user(gid,			&buffer->stx_gid	) ||
-	    __put_user(stat->attributes,	&buffer->stx_attributes	) ||
-	    __put_user(stat->blksize,		&buffer->stx_blksize	) ||
-	    __put_user(MAJOR(stat->rdev),	&buffer->stx_rdev_major	) ||
-	    __put_user(MINOR(stat->rdev),	&buffer->stx_rdev_minor	) ||
-	    __put_user(MAJOR(stat->dev),	&buffer->stx_dev_major	) ||
-	    __put_user(MINOR(stat->dev),	&buffer->stx_dev_minor	) ||
-	    __put_timestamp(&stat->atime,	&buffer->stx_atime	) ||
-	    __put_timestamp(&stat->btime,	&buffer->stx_btime	) ||
-	    __put_timestamp(&stat->ctime,	&buffer->stx_ctime	) ||
-	    __put_timestamp(&stat->mtime,	&buffer->stx_mtime	) ||
-	    __put_user(stat->ino,		&buffer->stx_ino	) ||
-	    __put_user(stat->size,		&buffer->stx_size	) ||
-	    __put_user(stat->blocks,		&buffer->stx_blocks	) ||
-	    __clear_user(&buffer->__spare1, sizeof(buffer->__spare1))	  ||
-	    __clear_user(&buffer->__spare2, sizeof(buffer->__spare2)))
-		return -EFAULT;
-
-	return 0;
+	struct statx tmp;
+
+	tmp.stx_mask = stat->result_mask;
+	tmp.stx_blksize = stat->blksize;
+	tmp.stx_attributes = stat->attributes;
+	tmp.stx_nlink = stat->nlink;
+	tmp.stx_uid = from_kuid_munged(current_user_ns(), stat->uid);
+	tmp.stx_gid = from_kgid_munged(current_user_ns(), stat->gid);
+	tmp.stx_mode = stat->mode;
+	memset(tmp.__spare0, 0, sizeof(tmp.__spare0));
+	tmp.stx_ino = stat->ino;
+	tmp.stx_size = stat->size;
+	tmp.stx_blocks = stat->blocks;
+	memset(tmp.__spare1, 0, sizeof(tmp.__spare1));
+	init_statx_timestamp(&tmp.stx_atime, &stat->atime);
+	init_statx_timestamp(&tmp.stx_btime, &stat->btime);
+	init_statx_timestamp(&tmp.stx_ctime, &stat->ctime);
+	init_statx_timestamp(&tmp.stx_mtime, &stat->mtime);
+	tmp.stx_rdev_major = MAJOR(stat->rdev);
+	tmp.stx_rdev_minor = MINOR(stat->rdev);
+	tmp.stx_dev_major = MAJOR(stat->dev);
+	tmp.stx_dev_minor = MINOR(stat->dev);
+	memset(tmp.__spare2, 0, sizeof(tmp.__spare2));
+
+	return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
 }
 
 /**
@@ -572,8 +567,6 @@ SYSCALL_DEFINE5(statx,
 
 	if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE)
 		return -EINVAL;
-	if (!access_ok(VERIFY_WRITE, buffer, sizeof(*buffer)))
-		return -EFAULT;
 
 	if (filename)
 		error = vfs_statx(dfd, filename, flags, &stat, mask);
@@ -581,7 +574,8 @@ SYSCALL_DEFINE5(statx,
 		error = vfs_statx_fd(dfd, &stat, mask, flags);
 	if (error)
 		return error;
-	return statx_set_result(&stat, buffer);
+
+	return cp_statx(&stat, buffer);
 }
 
 /* Caller is here responsible for sufficient locking (ie. inode->i_lock) */
-- 
2.12.0

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] statx: optimize copy of struct statx to userspace
  2017-03-11 21:45 [PATCH v2] statx: optimize copy of struct statx to userspace Eric Biggers
@ 2017-03-12  1:24 ` Al Viro
  2017-03-12  2:16   ` Eric Biggers
  0 siblings, 1 reply; 9+ messages in thread
From: Al Viro @ 2017-03-12  1:24 UTC (permalink / raw)
  To: Eric Biggers; +Cc: linux-fsdevel, David Howells, linux-kernel, Eric Biggers

On Sat, Mar 11, 2017 at 01:45:55PM -0800, Eric Biggers wrote:
> From: Eric Biggers <ebiggers@google.com>
> 
> I found that statx() was significantly slower than stat().  As a
> microbenchmark, I compared 10,000,000 invocations of fstat() on a tmpfs
> file to the same with statx() passed a NULL path:

Umm...

> +	struct statx tmp;
> +
> +	tmp.stx_mask = stat->result_mask;
> +	tmp.stx_blksize = stat->blksize;
> +	tmp.stx_attributes = stat->attributes;
> +	tmp.stx_nlink = stat->nlink;
> +	tmp.stx_uid = from_kuid_munged(current_user_ns(), stat->uid);
> +	tmp.stx_gid = from_kgid_munged(current_user_ns(), stat->gid);
> +	tmp.stx_mode = stat->mode;
> +	memset(tmp.__spare0, 0, sizeof(tmp.__spare0));
> +	tmp.stx_ino = stat->ino;
> +	tmp.stx_size = stat->size;
> +	tmp.stx_blocks = stat->blocks;
> +	memset(tmp.__spare1, 0, sizeof(tmp.__spare1));
> +	init_statx_timestamp(&tmp.stx_atime, &stat->atime);
> +	init_statx_timestamp(&tmp.stx_btime, &stat->btime);
> +	init_statx_timestamp(&tmp.stx_ctime, &stat->ctime);
> +	init_statx_timestamp(&tmp.stx_mtime, &stat->mtime);
> +	tmp.stx_rdev_major = MAJOR(stat->rdev);
> +	tmp.stx_rdev_minor = MINOR(stat->rdev);
> +	tmp.stx_dev_major = MAJOR(stat->dev);
> +	tmp.stx_dev_minor = MINOR(stat->dev);
> +	memset(tmp.__spare2, 0, sizeof(tmp.__spare2));
> +
> +	return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;

That relies upon there being no padding in the damn structure.
It's true and probably will be true on any target, but
	a) it's bloody well worth stating explicitly
and
	b) struct statx tmp = {.stx_mask = stat->result_mask};
will get rid of those memset() you've got there by implicit
zeroing of fields missing in partial structure initializer.
Padding is *not* included into that, but you are relying upon
having no padding anyway...

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] statx: optimize copy of struct statx to userspace
  2017-03-12  1:24 ` Al Viro
@ 2017-03-12  2:16   ` Eric Biggers
  2017-03-12  2:29     ` Al Viro
  0 siblings, 1 reply; 9+ messages in thread
From: Eric Biggers @ 2017-03-12  2:16 UTC (permalink / raw)
  To: Al Viro; +Cc: linux-fsdevel, David Howells, linux-kernel, Eric Biggers

Hi Al,

On Sun, Mar 12, 2017 at 01:24:15AM +0000, Al Viro wrote:
> On Sat, Mar 11, 2017 at 01:45:55PM -0800, Eric Biggers wrote:
> > From: Eric Biggers <ebiggers@google.com>
> > 
> > I found that statx() was significantly slower than stat().  As a
> > microbenchmark, I compared 10,000,000 invocations of fstat() on a tmpfs
> > file to the same with statx() passed a NULL path:
> 
> Umm...
> 

Well, it's a silly benchmark, but stat performance is important, and usually
things are cached already so most of the time is just overhead --- which this
measures.  And since nothing actually uses statx() yet, you can't do a benchmark
just by running some command like 'git status' or whatever.

> > +	tmp.stx_dev_major = MAJOR(stat->dev);
> > +	tmp.stx_dev_minor = MINOR(stat->dev);
> > +	memset(tmp.__spare2, 0, sizeof(tmp.__spare2));
> > +
> > +	return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
> 
> That relies upon there being no padding in the damn structure.
> It's true and probably will be true on any target, but
> 	a) it's bloody well worth stating explicitly
> and
> 	b) struct statx tmp = {.stx_mask = stat->result_mask};
> will get rid of those memset() you've got there by implicit
> zeroing of fields missing in partial structure initializer.
> Padding is *not* included into that, but you are relying upon
> having no padding anyway...

If padding is a concern at all (AFAICS it's not actually an issue now with
struct statx, but people tend to have different opinions on how careful they
want to be with padding), then I think we'll just have to start by memsetting
the whole struct to 0.

stat() actually does that, except that it's overridden on some architectures,
like x86, to only memset the actual reserved fields.  I had kind of assumed that
as long as we're defining a new struct that has the same layout on all
architectures, with no padding, that we'd want to take the opportunity to only
memset the actual reserved fields, to make it a bit faster.  And yes, a
designated initializer would make this look a bit nicer, though it might have to
be changed later if any future statx() extensions involve fields that are
unioned or conditionally written.

Note that another approach would be to copy the fields individually, but with
unsafe_put_user() instead of __put_user().  That would avoid the overhead of
user_access_begin()/user_access_end() which was the main performance problem.
However, the infrastructure around this doesn't seem to be ready quite yet:
there aren't good implementations of unsafe_put_user() yet, nor is there an
unsafe_clear_user() yet.

- Eric

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] statx: optimize copy of struct statx to userspace
  2017-03-12  2:16   ` Eric Biggers
@ 2017-03-12  2:29     ` Al Viro
  2017-03-12  4:02       ` Eric Biggers
  0 siblings, 1 reply; 9+ messages in thread
From: Al Viro @ 2017-03-12  2:29 UTC (permalink / raw)
  To: Eric Biggers; +Cc: linux-fsdevel, David Howells, linux-kernel, Eric Biggers

On Sat, Mar 11, 2017 at 06:16:55PM -0800, Eric Biggers wrote:
> Hi Al,
> 
> On Sun, Mar 12, 2017 at 01:24:15AM +0000, Al Viro wrote:
> > On Sat, Mar 11, 2017 at 01:45:55PM -0800, Eric Biggers wrote:
> > > From: Eric Biggers <ebiggers@google.com>
> > > 
> > > I found that statx() was significantly slower than stat().  As a
> > > microbenchmark, I compared 10,000,000 invocations of fstat() on a tmpfs
> > > file to the same with statx() passed a NULL path:
> > 
> > Umm...
> > 
> 
> Well, it's a silly benchmark, but stat performance is important, and usually
> things are cached already so most of the time is just overhead --- which this
> measures.  And since nothing actually uses statx() yet, you can't do a benchmark
> just by running some command like 'git status' or whatever.

Oh, I agree that multiple __put_user() are wrong; I also agree that bulk copy is
the right approach (when we get the unsafe stuff right, we can revisit that, but
I suspect that on quite a few architectures a bulk copy will still give better
time, no matter what).

> If padding is a concern at all (AFAICS it's not actually an issue now with
> struct statx, but people tend to have different opinions on how careful they
> want to be with padding), then I think we'll just have to start by memsetting
> the whole struct to 0.

My point is simply that it's worth a comment in that code.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] statx: optimize copy of struct statx to userspace
  2017-03-12  2:29     ` Al Viro
@ 2017-03-12  4:02       ` Eric Biggers
  2017-03-12  6:01         ` Eric Biggers
  0 siblings, 1 reply; 9+ messages in thread
From: Eric Biggers @ 2017-03-12  4:02 UTC (permalink / raw)
  To: Al Viro; +Cc: linux-fsdevel, David Howells, linux-kernel, Eric Biggers

On Sun, Mar 12, 2017 at 02:29:27AM +0000, Al Viro wrote:
> 
> Oh, I agree that multiple __put_user() are wrong; I also agree that bulk copy is
> the right approach (when we get the unsafe stuff right, we can revisit that, but
> I suspect that on quite a few architectures a bulk copy will still give better
> time, no matter what).
> 
> > If padding is a concern at all (AFAICS it's not actually an issue now with
> > struct statx, but people tend to have different opinions on how careful they
> > want to be with padding), then I think we'll just have to start by memsetting
> > the whole struct to 0.
> 
> My point is simply that it's worth a comment in that code.

Okay, thanks.  I'll add a comment about the padding assumption, and I think I'll
take the suggestion to use a designated initializer.  Then at least all *fields*
get initialized by default.  And if in the future someone wants to conditionally
initialize fields, then they can use ?: or they can do it after the initializer.
Either way, at least they won't be able to forget to zero some field.

- Eric

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] statx: optimize copy of struct statx to userspace
  2017-03-12  4:02       ` Eric Biggers
@ 2017-03-12  6:01         ` Eric Biggers
  2017-03-13  4:34           ` Andreas Dilger
  0 siblings, 1 reply; 9+ messages in thread
From: Eric Biggers @ 2017-03-12  6:01 UTC (permalink / raw)
  To: Al Viro; +Cc: linux-fsdevel, David Howells, linux-kernel, Eric Biggers

On Sat, Mar 11, 2017 at 08:02:06PM -0800, Eric Biggers wrote:
> On Sun, Mar 12, 2017 at 02:29:27AM +0000, Al Viro wrote:
> > 
> > Oh, I agree that multiple __put_user() are wrong; I also agree that bulk copy is
> > the right approach (when we get the unsafe stuff right, we can revisit that, but
> > I suspect that on quite a few architectures a bulk copy will still give better
> > time, no matter what).
> > 
> > > If padding is a concern at all (AFAICS it's not actually an issue now with
> > > struct statx, but people tend to have different opinions on how careful they
> > > want to be with padding), then I think we'll just have to start by memsetting
> > > the whole struct to 0.
> > 
> > My point is simply that it's worth a comment in that code.
> 
> Okay, thanks.  I'll add a comment about the padding assumption, and I think I'll
> take the suggestion to use a designated initializer.  Then at least all *fields*
> get initialized by default.  And if in the future someone wants to conditionally
> initialize fields, then they can use ?: or they can do it after the initializer.
> Either way, at least they won't be able to forget to zero some field.
> 
> - Eric

Okay, well, I may have changed my mind again...  I tried the designated
initializer on x86_64 with gcc 4.8 and 6.3, and also on arm64 with gcc 4.8.  In
each case, it was compiled into first zeroing all 256 bytes of the struct, just
like memset(&tmp, 0, sizeof(tmp)).  Yes, this was with
CC_OPTIMIZE_FOR_PERFORMANCE=y.  So I think we might as well just write the full
memset(), making it completely clear that everything is initialized.  (This is
especially useful for people who are auditing code paths like this for
information leaks.)  Also, a smart compiler could potentially optimize away
parts of the memset() anyway...

Eric

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] statx: optimize copy of struct statx to userspace
  2017-03-12  6:01         ` Eric Biggers
@ 2017-03-13  4:34           ` Andreas Dilger
  2017-03-13 10:27             ` Florian Weimer
  0 siblings, 1 reply; 9+ messages in thread
From: Andreas Dilger @ 2017-03-13  4:34 UTC (permalink / raw)
  To: Eric Biggers
  Cc: Al Viro, linux-fsdevel, David Howells, linux-kernel, Eric Biggers

[-- Attachment #1: Type: text/plain, Size: 3707 bytes --]

On Mar 11, 2017, at 11:01 PM, Eric Biggers <ebiggers3@gmail.com> wrote:
> 
> On Sat, Mar 11, 2017 at 08:02:06PM -0800, Eric Biggers wrote:
>> On Sun, Mar 12, 2017 at 02:29:27AM +0000, Al Viro wrote:
>>> 
>>> Oh, I agree that multiple __put_user() are wrong; I also agree that bulk
>>> copy is the right approach (when we get the unsafe stuff right, we can
>>> revisit that, but I suspect that on quite a few architectures a bulk copy
>>> will still give better time, no matter what).
>>> 
>>>> If padding is a concern at all (AFAICS it's not actually an issue now
>>>> with struct statx, but people tend to have different opinions on how
>>>> careful they want to be with padding), then I think we'll just have to
>>>> start by memsetting the whole struct to 0.
>>> 
>>> My point is simply that it's worth a comment in that code.
>> 
>> Okay, thanks.  I'll add a comment about the padding assumption, and I think
>> I'll take the suggestion to use a designated initializer.  Then at least
>> all *fields* get initialized by default.  And if in the future someone
>> wants to conditionally initialize fields, then they can use ?: or they can
>> do it after the initializer.  Either way, at least they won't be able to
>> forget to zero some field.
> 
> Okay, well, I may have changed my mind again...  I tried the designated
> initializer on x86_64 with gcc 4.8 and 6.3, and also on arm64 with gcc 4.8.
> In each case, it was compiled into first zeroing all 256 bytes of the struct,
> just like memset(&tmp, 0, sizeof(tmp)).  Yes, this was with
> CC_OPTIMIZE_FOR_PERFORMANCE=y.  So I think we might as well just write the
> full memset(), making it completely clear that everything is initialized.
> (This is especially useful for people who are auditing code paths like this
> for information leaks.)  Also, a smart compiler could potentially optimize
> away parts of the memset() anyway...

Not that it is a huge deal either way, but I'd think it is harder for the
compiler to optimize across a function call boundary like memset() vs. a
struct initialization in the same function where it can see that all but
a few of the fields are being overwritten immediately before they are used.

I don't think the designated initializer is any less clear to the reader
that the struct is zeroed out compared to using memset().  Possibly the
best compromise is to use a designated initializer that specifies all of
the known fields, and leaves it to the compiler to initialize unset fields
or padding.  That avoids double zeroing without any risk of exposing unset
fields to userspace:

static int cp_statx(const struct kstat *stat, struct statx __user *buffer)
{
	struct statx tmp = {
		.stx_mask = stat->result_mask;
		.stx_blksize = stat->blksize;
		.stx_attributes = stat->attributes;
		.stx_nlink = stat->nlink;
		.stx_uid = from_kuid_munged(current_user_ns(), stat->uid);
		.stx_gid = from_kgid_munged(current_user_ns(), stat->gid);
		.stx_mode = stat->mode;
		.stx_ino = stat->ino;
		.stx_size = stat->size;
		.stx_blocks = stat->blocks;
		.stx_atime.tv_sec = stat->atime.tv_sec;
		.stx_atime.tv_nsec = stat->atime.tv_nsec;
		.stx_btime.tv_sec = stat->btime.tv_sec;
		.stx_btime.tv_nsec = stat->btime.tv_nsec;
		.stx_ctime.tv_sec = stat->ctime.tv_sec;
		.stx_ctime.tv_nsec = stat->ctime.tv_nsec;
		.stx_mtime.tv_sec = stat->mtime.tv_sec;
		.stx_mtime.tv_nsec = stat->mtime.tv_nsec;
		.stx_rdev_major = MAJOR(stat->rdev);
		.stx_rdev_minor = MINOR(stat->rdev);
		.stx_dev_major = MAJOR(stat->dev);
		.stx_dev_minor = MINOR(stat->dev);
	};

	return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
}

Cheers, Andreas






[-- Attachment #2: Message signed with OpenPGP --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] statx: optimize copy of struct statx to userspace
  2017-03-13  4:34           ` Andreas Dilger
@ 2017-03-13 10:27             ` Florian Weimer
  2017-03-13 18:11               ` Eric Biggers
  0 siblings, 1 reply; 9+ messages in thread
From: Florian Weimer @ 2017-03-13 10:27 UTC (permalink / raw)
  To: Andreas Dilger, Eric Biggers
  Cc: Al Viro, linux-fsdevel, David Howells, linux-kernel, Eric Biggers

On 03/13/2017 05:34 AM, Andreas Dilger wrote:
> Not that it is a huge deal either way, but I'd think it is harder for the
> compiler to optimize across a function call boundary like memset() vs. a
> struct initialization in the same function where it can see that all but
> a few of the fields are being overwritten immediately before they are used.

GCC treats memset as a function call only if options such as 
-ffreestanding or -fno-builtin are enabled, or if memset is redefined in 
a header file.  Does the kernel do this?

> I don't think the designated initializer is any less clear to the reader
> that the struct is zeroed out compared to using memset().  Possibly the
> best compromise is to use a designated initializer that specifies all of
> the known fields, and leaves it to the compiler to initialize unset fields
> or padding.

GCC will not always initialize padding if you specify a designated 
initializer because padding values are unspeficied and their value does 
not matter from a language point of view.

Thanks,
Florian

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] statx: optimize copy of struct statx to userspace
  2017-03-13 10:27             ` Florian Weimer
@ 2017-03-13 18:11               ` Eric Biggers
  0 siblings, 0 replies; 9+ messages in thread
From: Eric Biggers @ 2017-03-13 18:11 UTC (permalink / raw)
  To: Florian Weimer
  Cc: Andreas Dilger, Al Viro, linux-fsdevel, David Howells,
	linux-kernel, Eric Biggers

On Mon, Mar 13, 2017 at 11:27:32AM +0100, Florian Weimer wrote:
> On 03/13/2017 05:34 AM, Andreas Dilger wrote:
> >Not that it is a huge deal either way, but I'd think it is harder for the
> >compiler to optimize across a function call boundary like memset() vs. a
> >struct initialization in the same function where it can see that all but
> >a few of the fields are being overwritten immediately before they are used.
> 
> GCC treats memset as a function call only if options such as
> -ffreestanding or -fno-builtin are enabled, or if memset is
> redefined in a header file.  Does the kernel do this?
> 

No, it does not.  So gcc treats memset() as a request to clear memory, not as a
request to call a function called "memset()" specifically.  On x86_64 it's
compiling it into a "rep stos" instruction.

- Eric

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2017-03-13 18:11 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-11 21:45 [PATCH v2] statx: optimize copy of struct statx to userspace Eric Biggers
2017-03-12  1:24 ` Al Viro
2017-03-12  2:16   ` Eric Biggers
2017-03-12  2:29     ` Al Viro
2017-03-12  4:02       ` Eric Biggers
2017-03-12  6:01         ` Eric Biggers
2017-03-13  4:34           ` Andreas Dilger
2017-03-13 10:27             ` Florian Weimer
2017-03-13 18:11               ` Eric Biggers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).