linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] statx: optimize copy of struct statx to userspace
@ 2017-03-11 21:45 Eric Biggers
  2017-03-12  1:24 ` Al Viro
  0 siblings, 1 reply; 9+ messages in thread
From: Eric Biggers @ 2017-03-11 21:45 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Al Viro, David Howells, linux-kernel, Eric Biggers

From: Eric Biggers <ebiggers@google.com>

I found that statx() was significantly slower than stat().  As a
microbenchmark, I compared 10,000,000 invocations of fstat() on a tmpfs
file to the same with statx() passed a NULL path:

	$ time ./stat_benchmark

	real	0m1.464s
	user	0m0.275s
	sys	0m1.187s

	$ time ./statx_benchmark

	real	0m5.530s
	user	0m0.281s
	sys	0m5.247s

statx is expected to be a little slower than stat because struct statx
is larger than struct stat, but not by *that* much.  It turns out that
most of the overhead was in copying struct statx to userspace,
apparently mostly in all the stac/clac instructions that got generated
for each __put_user() call.  (This was on x86_64, but some other
architectures, e.g. arm64, have something similar now too.)

stat() instead initializes its struct on the stack and copies it to
userspace with a single call to copy_to_user().  This turns out to be
much faster, and changing statx to do this makes it almost as fast as
stat:

	$ time ./statx_benchmark

	real	0m1.573s
	user	0m0.229s
	sys	0m1.344s

Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/stat.c | 72 +++++++++++++++++++++++++++++----------------------------------
 1 file changed, 33 insertions(+), 39 deletions(-)

diff --git a/fs/stat.c b/fs/stat.c
index fa0be59340cc..5cc267ec7865 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -509,46 +509,41 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, const char __user *, filename,
 }
 #endif /* __ARCH_WANT_STAT64 || __ARCH_WANT_COMPAT_STAT64 */
 
-static inline int __put_timestamp(struct timespec *kts,
-				  struct statx_timestamp __user *uts)
+static inline void init_statx_timestamp(struct statx_timestamp *uts,
+					const struct timespec *kts)
 {
-	return (__put_user(kts->tv_sec,		&uts->tv_sec		) ||
-		__put_user(kts->tv_nsec,	&uts->tv_nsec		) ||
-		__put_user(0,			&uts->__reserved	));
+	uts->tv_sec = kts->tv_sec;
+	uts->tv_nsec = kts->tv_nsec;
+	uts->__reserved = 0;
 }
 
-/*
- * Set the statx results.
- */
-static long statx_set_result(struct kstat *stat, struct statx __user *buffer)
+static int cp_statx(const struct kstat *stat, struct statx __user *buffer)
 {
-	uid_t uid = from_kuid_munged(current_user_ns(), stat->uid);
-	gid_t gid = from_kgid_munged(current_user_ns(), stat->gid);
-
-	if (__put_user(stat->result_mask,	&buffer->stx_mask	) ||
-	    __put_user(stat->mode,		&buffer->stx_mode	) ||
-	    __clear_user(&buffer->__spare0, sizeof(buffer->__spare0))	  ||
-	    __put_user(stat->nlink,		&buffer->stx_nlink	) ||
-	    __put_user(uid,			&buffer->stx_uid	) ||
-	    __put_user(gid,			&buffer->stx_gid	) ||
-	    __put_user(stat->attributes,	&buffer->stx_attributes	) ||
-	    __put_user(stat->blksize,		&buffer->stx_blksize	) ||
-	    __put_user(MAJOR(stat->rdev),	&buffer->stx_rdev_major	) ||
-	    __put_user(MINOR(stat->rdev),	&buffer->stx_rdev_minor	) ||
-	    __put_user(MAJOR(stat->dev),	&buffer->stx_dev_major	) ||
-	    __put_user(MINOR(stat->dev),	&buffer->stx_dev_minor	) ||
-	    __put_timestamp(&stat->atime,	&buffer->stx_atime	) ||
-	    __put_timestamp(&stat->btime,	&buffer->stx_btime	) ||
-	    __put_timestamp(&stat->ctime,	&buffer->stx_ctime	) ||
-	    __put_timestamp(&stat->mtime,	&buffer->stx_mtime	) ||
-	    __put_user(stat->ino,		&buffer->stx_ino	) ||
-	    __put_user(stat->size,		&buffer->stx_size	) ||
-	    __put_user(stat->blocks,		&buffer->stx_blocks	) ||
-	    __clear_user(&buffer->__spare1, sizeof(buffer->__spare1))	  ||
-	    __clear_user(&buffer->__spare2, sizeof(buffer->__spare2)))
-		return -EFAULT;
-
-	return 0;
+	struct statx tmp;
+
+	tmp.stx_mask = stat->result_mask;
+	tmp.stx_blksize = stat->blksize;
+	tmp.stx_attributes = stat->attributes;
+	tmp.stx_nlink = stat->nlink;
+	tmp.stx_uid = from_kuid_munged(current_user_ns(), stat->uid);
+	tmp.stx_gid = from_kgid_munged(current_user_ns(), stat->gid);
+	tmp.stx_mode = stat->mode;
+	memset(tmp.__spare0, 0, sizeof(tmp.__spare0));
+	tmp.stx_ino = stat->ino;
+	tmp.stx_size = stat->size;
+	tmp.stx_blocks = stat->blocks;
+	memset(tmp.__spare1, 0, sizeof(tmp.__spare1));
+	init_statx_timestamp(&tmp.stx_atime, &stat->atime);
+	init_statx_timestamp(&tmp.stx_btime, &stat->btime);
+	init_statx_timestamp(&tmp.stx_ctime, &stat->ctime);
+	init_statx_timestamp(&tmp.stx_mtime, &stat->mtime);
+	tmp.stx_rdev_major = MAJOR(stat->rdev);
+	tmp.stx_rdev_minor = MINOR(stat->rdev);
+	tmp.stx_dev_major = MAJOR(stat->dev);
+	tmp.stx_dev_minor = MINOR(stat->dev);
+	memset(tmp.__spare2, 0, sizeof(tmp.__spare2));
+
+	return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
 }
 
 /**
@@ -572,8 +567,6 @@ SYSCALL_DEFINE5(statx,
 
 	if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE)
 		return -EINVAL;
-	if (!access_ok(VERIFY_WRITE, buffer, sizeof(*buffer)))
-		return -EFAULT;
 
 	if (filename)
 		error = vfs_statx(dfd, filename, flags, &stat, mask);
@@ -581,7 +574,8 @@ SYSCALL_DEFINE5(statx,
 		error = vfs_statx_fd(dfd, &stat, mask, flags);
 	if (error)
 		return error;
-	return statx_set_result(&stat, buffer);
+
+	return cp_statx(&stat, buffer);
 }
 
 /* Caller is here responsible for sufficient locking (ie. inode->i_lock) */
-- 
2.12.0

^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2017-03-13 18:11 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-11 21:45 [PATCH v2] statx: optimize copy of struct statx to userspace Eric Biggers
2017-03-12  1:24 ` Al Viro
2017-03-12  2:16   ` Eric Biggers
2017-03-12  2:29     ` Al Viro
2017-03-12  4:02       ` Eric Biggers
2017-03-12  6:01         ` Eric Biggers
2017-03-13  4:34           ` Andreas Dilger
2017-03-13 10:27             ` Florian Weimer
2017-03-13 18:11               ` Eric Biggers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).