v1 -> v2 - Separate statx and open in io_kiocb - Remove external declarations for unused statx interfaces This patch set is a fix for the liburing statx test failure. The test fails with a "Miscompare between io_uring and statx" error because the statx system call path has additional processing in vfs_statx(): stat->result_mask |= STATX_MNT_ID; if (path.mnt->mnt_root == path.dentry) stat->attributes |= STATX_ATTR_MOUNT_ROOT; stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT; which then results in different result_mask values. Allowing the system call to be invoked directly simplifies the io_uring interface and avoids potential future incompatibilities. I'm not sure if there was other reasoning fort not doing so initially. One issue I cannot account for is the difference in "used" memory reported by free(1) after running the statx a large (10000) number of times. The difference is significant ~100k and doesn't really change after dropping caches. I enabled memory leak detection and couldn't see anything related to the test. Bijan Mottahedeh (4): io_uring: add io_statx structure statx: allow system call to be invoked from io_uring io_uring: call statx directly statx: hide interfaces no longer used by io_uring fs/internal.h | 4 ++-- fs/io_uring.c | 72 +++++++++++++++-------------------------------------------- fs/stat.c | 37 +++++++++++++++++------------- 3 files changed, 42 insertions(+), 71 deletions(-) -- 1.8.3.1
Separate statx data from open in io_kiocb. No functional changes. Signed-off-by: Bijan Mottahedeh <bijan.mottahedeh@oracle.com> --- fs/io_uring.c | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 654e1c7..fba0ddb 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -424,11 +424,7 @@ struct io_sr_msg { struct io_open { struct file *file; int dfd; - union { - unsigned mask; - }; struct filename *filename; - struct statx __user *buffer; struct open_how how; unsigned long nofile; }; @@ -480,6 +476,15 @@ struct io_provide_buf { __u16 bid; }; +struct io_statx { + struct file *file; + int dfd; + unsigned int mask; + unsigned int flags; + struct filename *filename; + struct statx __user *buffer; +}; + struct io_async_connect { struct sockaddr_storage address; }; @@ -621,6 +626,7 @@ struct io_kiocb { struct io_epoll epoll; struct io_splice splice; struct io_provide_buf pbuf; + struct io_statx statx; }; struct io_async_ctx *io; @@ -3379,19 +3385,19 @@ static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (req->flags & REQ_F_NEED_CLEANUP) return 0; - req->open.dfd = READ_ONCE(sqe->fd); - req->open.mask = READ_ONCE(sqe->len); + req->statx.dfd = READ_ONCE(sqe->fd); + req->statx.mask = READ_ONCE(sqe->len); fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); - req->open.buffer = u64_to_user_ptr(READ_ONCE(sqe->addr2)); - req->open.how.flags = READ_ONCE(sqe->statx_flags); + req->statx.buffer = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + req->statx.flags = READ_ONCE(sqe->statx_flags); - if (vfs_stat_set_lookup_flags(&lookup_flags, req->open.how.flags)) + if (vfs_stat_set_lookup_flags(&lookup_flags, req->statx.flags)) return -EINVAL; - req->open.filename = getname_flags(fname, lookup_flags, NULL); - if (IS_ERR(req->open.filename)) { - ret = PTR_ERR(req->open.filename); - req->open.filename = NULL; + req->statx.filename = getname_flags(fname, lookup_flags, NULL); + if (IS_ERR(req->statx.filename)) { + ret = PTR_ERR(req->statx.filename); + req->statx.filename = NULL; return ret; } @@ -3401,7 +3407,7 @@ static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) static int io_statx(struct io_kiocb *req, bool force_nonblock) { - struct io_open *ctx = &req->open; + struct io_statx *ctx = &req->statx; unsigned lookup_flags; struct path path; struct kstat stat; @@ -3414,7 +3420,7 @@ static int io_statx(struct io_kiocb *req, bool force_nonblock) return -EAGAIN; } - if (vfs_stat_set_lookup_flags(&lookup_flags, ctx->how.flags)) + if (vfs_stat_set_lookup_flags(&lookup_flags, ctx->flags)) return -EINVAL; retry: @@ -3426,7 +3432,7 @@ static int io_statx(struct io_kiocb *req, bool force_nonblock) if (ret) goto err; - ret = vfs_getattr(&path, &stat, ctx->mask, ctx->how.flags); + ret = vfs_getattr(&path, &stat, ctx->mask, ctx->flags); path_put(&path); if (retry_estale(ret, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; -- 1.8.3.1
This is a prepatory patch to allow io_uring to invoke statx directly. Signed-off-by: Bijan Mottahedeh <bijan.mottahedeh@oracle.com> --- fs/internal.h | 2 ++ fs/stat.c | 32 +++++++++++++++++++------------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/fs/internal.h b/fs/internal.h index c9ff00f..614a559 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -201,3 +201,5 @@ int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, */ unsigned vfs_stat_set_lookup_flags(unsigned *lookup_flags, int flags); int cp_statx(const struct kstat *stat, struct statx __user *buffer); +int do_statx(int dfd, const char __user *filename, unsigned flags, + unsigned int mask, struct statx __user *buffer); diff --git a/fs/stat.c b/fs/stat.c index 3213d1b..bc5d2e81 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -577,6 +577,24 @@ static long cp_new_stat64(struct kstat *stat, struct stat64 __user *statbuf) return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0; } +int do_statx(int dfd, const char __user *filename, unsigned flags, + unsigned int mask, struct statx __user *buffer) +{ + struct kstat stat; + int error; + + if (mask & STATX__RESERVED) + return -EINVAL; + if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE) + return -EINVAL; + + error = vfs_statx(dfd, filename, flags, &stat, mask); + if (error) + return error; + + return cp_statx(&stat, buffer); +} + /** * sys_statx - System call to get enhanced stats * @dfd: Base directory to pathwalk from *or* fd to stat. @@ -593,19 +611,7 @@ static long cp_new_stat64(struct kstat *stat, struct stat64 __user *statbuf) unsigned int, mask, struct statx __user *, buffer) { - struct kstat stat; - int error; - - if (mask & STATX__RESERVED) - return -EINVAL; - if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE) - return -EINVAL; - - error = vfs_statx(dfd, filename, flags, &stat, mask); - if (error) - return error; - - return cp_statx(&stat, buffer); + return do_statx(dfd, filename, flags, mask, buffer); } #ifdef CONFIG_COMPAT -- 1.8.3.1
Calling statx directly both simplifies the interface and avoids potential incompatibilities between sync and async invokations. Signed-off-by: Bijan Mottahedeh <bijan.mottahedeh@oracle.com> --- fs/io_uring.c | 50 ++++---------------------------------------------- 1 file changed, 4 insertions(+), 46 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index fba0ddb..e068ee5 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -481,7 +481,7 @@ struct io_statx { int dfd; unsigned int mask; unsigned int flags; - struct filename *filename; + const char __user *filename; struct statx __user *buffer; }; @@ -3374,43 +3374,23 @@ static int io_fadvise(struct io_kiocb *req, bool force_nonblock) static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { - const char __user *fname; - unsigned lookup_flags; - int ret; - if (sqe->ioprio || sqe->buf_index) return -EINVAL; if (req->flags & REQ_F_FIXED_FILE) return -EBADF; - if (req->flags & REQ_F_NEED_CLEANUP) - return 0; req->statx.dfd = READ_ONCE(sqe->fd); req->statx.mask = READ_ONCE(sqe->len); - fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); + req->statx.filename = u64_to_user_ptr(READ_ONCE(sqe->addr)); req->statx.buffer = u64_to_user_ptr(READ_ONCE(sqe->addr2)); req->statx.flags = READ_ONCE(sqe->statx_flags); - if (vfs_stat_set_lookup_flags(&lookup_flags, req->statx.flags)) - return -EINVAL; - - req->statx.filename = getname_flags(fname, lookup_flags, NULL); - if (IS_ERR(req->statx.filename)) { - ret = PTR_ERR(req->statx.filename); - req->statx.filename = NULL; - return ret; - } - - req->flags |= REQ_F_NEED_CLEANUP; return 0; } static int io_statx(struct io_kiocb *req, bool force_nonblock) { struct io_statx *ctx = &req->statx; - unsigned lookup_flags; - struct path path; - struct kstat stat; int ret; if (force_nonblock) { @@ -3420,29 +3400,9 @@ static int io_statx(struct io_kiocb *req, bool force_nonblock) return -EAGAIN; } - if (vfs_stat_set_lookup_flags(&lookup_flags, ctx->flags)) - return -EINVAL; - -retry: - /* filename_lookup() drops it, keep a reference */ - ctx->filename->refcnt++; - - ret = filename_lookup(ctx->dfd, ctx->filename, lookup_flags, &path, - NULL); - if (ret) - goto err; + ret = do_statx(ctx->dfd, ctx->filename, ctx->flags, ctx->mask, + ctx->buffer); - ret = vfs_getattr(&path, &stat, ctx->mask, ctx->flags); - path_put(&path); - if (retry_estale(ret, lookup_flags)) { - lookup_flags |= LOOKUP_REVAL; - goto retry; - } - if (!ret) - ret = cp_statx(&stat, ctx->buffer); -err: - putname(ctx->filename); - req->flags &= ~REQ_F_NEED_CLEANUP; if (ret < 0) req_set_fail_links(req); io_cqring_add_event(req, ret); @@ -5204,8 +5164,6 @@ static void io_cleanup_req(struct io_kiocb *req) break; case IORING_OP_OPENAT: case IORING_OP_OPENAT2: - case IORING_OP_STATX: - putname(req->open.filename); break; case IORING_OP_SPLICE: case IORING_OP_TEE: -- 1.8.3.1
The io_uring interfaces have been replaced by do_statx() and are no longer needed. Signed-off-by: Bijan Mottahedeh <bijan.mottahedeh@oracle.com> --- fs/internal.h | 2 -- fs/stat.c | 5 +++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/internal.h b/fs/internal.h index 614a559..fcb47cc 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -199,7 +199,5 @@ int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, /* * fs/stat.c: */ -unsigned vfs_stat_set_lookup_flags(unsigned *lookup_flags, int flags); -int cp_statx(const struct kstat *stat, struct statx __user *buffer); int do_statx(int dfd, const char __user *filename, unsigned flags, unsigned int mask, struct statx __user *buffer); diff --git a/fs/stat.c b/fs/stat.c index bc5d2e81..44f8ad3 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -156,7 +156,8 @@ int vfs_statx_fd(unsigned int fd, struct kstat *stat, } EXPORT_SYMBOL(vfs_statx_fd); -inline unsigned vfs_stat_set_lookup_flags(unsigned *lookup_flags, int flags) +static inline unsigned vfs_stat_set_lookup_flags(unsigned *lookup_flags, + int flags) { if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT | AT_EMPTY_PATH | KSTAT_QUERY_FLAGS)) != 0) @@ -542,7 +543,7 @@ static long cp_new_stat64(struct kstat *stat, struct stat64 __user *statbuf) } #endif /* __ARCH_WANT_STAT64 || __ARCH_WANT_COMPAT_STAT64 */ -noinline_for_stack int +static noinline_for_stack int cp_statx(const struct kstat *stat, struct statx __user *buffer) { struct statx tmp; -- 1.8.3.1
On 5/22/20 10:31 PM, Bijan Mottahedeh wrote:
> v1 -> v2
>
> - Separate statx and open in io_kiocb
> - Remove external declarations for unused statx interfaces
>
> This patch set is a fix for the liburing statx test failure.
>
> The test fails with a "Miscompare between io_uring and statx" error
> because the statx system call path has additional processing in vfs_statx():
>
> stat->result_mask |= STATX_MNT_ID;
> if (path.mnt->mnt_root == path.dentry)
> stat->attributes |= STATX_ATTR_MOUNT_ROOT;
> stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT;
>
> which then results in different result_mask values.
>
> Allowing the system call to be invoked directly simplifies the io_uring
> interface and avoids potential future incompatibilities. I'm not sure
> if there was other reasoning fort not doing so initially.
>
> One issue I cannot account for is the difference in "used" memory reported
> by free(1) after running the statx a large (10000) number of times.
>
> The difference is significant ~100k and doesn't really change after
> dropping caches.
>
> I enabled memory leak detection and couldn't see anything related to the test.
>
> Bijan Mottahedeh (4):
> io_uring: add io_statx structure
> statx: allow system call to be invoked from io_uring
> io_uring: call statx directly
> statx: hide interfaces no longer used by io_uring
>
> fs/internal.h | 4 ++--
> fs/io_uring.c | 72 +++++++++++++++--------------------------------------------
> fs/stat.c | 37 +++++++++++++++++-------------
> 3 files changed, 42 insertions(+), 71 deletions(-)
Thanks, this looks better. For a bit of history, the initial attempt was
to do the statx without async offload if we could do so without blocking.
Without that, we may as well simplify it.
--
Jens Axboe
On Tue, May 26 2020 at 16:59:23 -0600, Jens Axboe quoth thus: > On 5/22/20 10:31 PM, Bijan Mottahedeh wrote: > > v1 -> v2 > > > > - Separate statx and open in io_kiocb > > - Remove external declarations for unused statx interfaces > > > > This patch set is a fix for the liburing statx test failure. > > > > The test fails with a "Miscompare between io_uring and statx" error > > because the statx system call path has additional processing in vfs_statx(): > > > > stat->result_mask |= STATX_MNT_ID; > > if (path.mnt->mnt_root == path.dentry) > > stat->attributes |= STATX_ATTR_MOUNT_ROOT; > > stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT; > > > > which then results in different result_mask values. > > > > Allowing the system call to be invoked directly simplifies the io_uring > > interface and avoids potential future incompatibilities. I'm not sure > > if there was other reasoning fort not doing so initially. > > > > One issue I cannot account for is the difference in "used" memory reported > > by free(1) after running the statx a large (10000) number of times. > > > > The difference is significant ~100k and doesn't really change after > > dropping caches. > > > > I enabled memory leak detection and couldn't see anything related to the test. > > > > Bijan Mottahedeh (4): > > io_uring: add io_statx structure > > statx: allow system call to be invoked from io_uring > > io_uring: call statx directly > > statx: hide interfaces no longer used by io_uring > > > > fs/internal.h | 4 ++-- > > fs/io_uring.c | 72 +++++++++++++++-------------------------------------------- > > fs/stat.c | 37 +++++++++++++++++------------- > > 3 files changed, 42 insertions(+), 71 deletions(-) > > Thanks, this looks better. For a bit of history, the initial attempt was > to do the statx without async offload if we could do so without blocking. > Without that, we may as well simplify it. I was thinking that there may be use cases for allowing IOSQE_FIXED_FILE + AT_EMPTY_PATH. This sounds like it would make such a thing more difficult. > -- > Jens Axboe