All of lore.kernel.org
 help / color / mirror / Atom feed
* io_uring's openat doesn't work with large (2G+) files
@ 2020-04-08 14:51 Dmitry Kadashev
  2020-04-08 15:19 ` Jens Axboe
  0 siblings, 1 reply; 13+ messages in thread
From: Dmitry Kadashev @ 2020-04-08 14:51 UTC (permalink / raw)
  To: io-uring

[-- Attachment #1: Type: text/plain, Size: 1226 bytes --]

Hi,

io_uring's openat seems to produce FDs that are incompatible with
large files (>2GB). If a file (smaller than 2GB) is opened using
io_uring's openat then writes -- both using io_uring and just sync
pwrite() -- past that threshold fail with EFBIG. If such a file is
opened with sync openat, then both io_uring's writes and sync writes
succeed. And if the file is larger than 2GB then io_uring's openat
fails right away, while the sync one works.

Kernel versions: 5.6.0-rc2, 5.6.0.

A couple of reproducers attached, one demos successful open with
failed writes afterwards, and another failing open (in comparison with
sync  calls).

The output of the former one for example:

*** sync openat
openat succeeded
sync write at offset 0
write succeeded
sync write at offset 4294967296
write succeeded

*** sync openat
openat succeeded
io_uring write at offset 0
write succeeded
io_uring write at offset 4294967296
write succeeded

*** io_uring openat
openat succeeded
sync write at offset 0
write succeeded
sync write at offset 4294967296
write failed: File too large

*** io_uring openat
openat succeeded
io_uring write at offset 0
write succeeded
io_uring write at offset 4294967296
write failed: File too large

-- 
Dmitry

[-- Attachment #2: test-io_uring-write-large-offset.c --]
[-- Type: text/x-csrc, Size: 3061 bytes --]

#include <liburing.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/resource.h>
#include <unistd.h>

static const int RSIZE = 2;
static const int OPEN_FLAGS = O_RDWR | O_CREAT;
static const mode_t OPEN_MODE = S_IRUSR | S_IWUSR;

#define DIE(...) do {\
		fprintf(stderr, __VA_ARGS__);\
		abort();\
	} while(0);

void do_write(struct io_uring *ring, int sync, int fd, off_t offset)
{
	fprintf(stderr, "%s write at offset %lld\n", sync ? "sync": "io_uring", offset);
	char buf[] = "some test write buf";
	int res;
	if (sync) {
		res = pwrite(fd, buf, sizeof(buf), offset);
		if (res < 0) {
			res = -errno;
		}
	}
	else {
		struct io_uring_sqe *sqe;
		sqe = io_uring_get_sqe(ring);
		if (!sqe) {
			fprintf(stderr, "failed to get sqe\n");
			return;
		}
		io_uring_prep_write(sqe, fd, buf, sizeof(buf), offset);
		int ret = io_uring_submit(ring);
		if (ret < 0) {
			fprintf(stderr, "failed to submit write: %s\n", strerror(-ret));
			return;
		}

		struct io_uring_cqe *cqe;
		ret = io_uring_wait_cqe(ring, &cqe);
		res = cqe->res;
		io_uring_cqe_seen(ring, cqe);
		if (ret < 0) {
			fprintf(stderr, "wait_cqe failed: %s\n", strerror(-ret));
			return;
		}
	}
	if (res < 0) {
		fprintf(stderr, "write failed: %s\n", strerror(-res));
	}
	else {
		fprintf(stderr, "write succeeded\n");
	}
}

void test_open_write(struct io_uring *ring, int sync_open, int sync_write, int dfd, const char* fn)
{
	fprintf(stderr, "\n*** %s openat\n", sync_open ? "sync" : "io_uring");
	struct io_uring_sqe *sqe;
	int fd = -1;
	if (sync_open) {
		fd = openat(dfd, fn, OPEN_FLAGS, OPEN_MODE);
		if (fd < 0) {
			fd = -errno;
		}
	}
	else {
		sqe = io_uring_get_sqe(ring);
		if (!sqe) {
			fprintf(stderr, "failed to get sqe\n");
			return;
		}
		io_uring_prep_openat(sqe, dfd, fn, OPEN_FLAGS, OPEN_MODE);
		int ret = io_uring_submit(ring);
		if (ret < 0) {
			fprintf(stderr, "failed to submit openat: %s\n", strerror(-ret));
			return;
		}

		struct io_uring_cqe *cqe;
		ret = io_uring_wait_cqe(ring, &cqe);
		fd = cqe->res;
		io_uring_cqe_seen(ring, cqe);
		if (ret < 0) {
			fprintf(stderr, "wait_cqe failed: %s\n", strerror(-ret));
			return;
		}
	}
	if (fd < 0) {
		fprintf(stderr, "openat failed: %s\n", strerror(-fd));
	}
	else {
		fprintf(stderr, "openat succeeded\n");
		do_write(ring, sync_write, fd, 0);
		do_write(ring, sync_write, fd, 1ull << 32);
		close(fd);
	}
}

int main()
{
	int dfd = open("/tmp", O_RDONLY | O_DIRECTORY);
	if (dfd < 0) {
		DIE("open /tmp: %s\n", strerror(errno));
	}
	struct io_uring ring;
	int ret = io_uring_queue_init(RSIZE, &ring, 0);
	if (ret < 0) {
		DIE("failed to init io_uring: %s\n", strerror(-ret));
	}

	test_open_write(&ring, 1, 1, dfd, "io_uring_openat_write_test1");
	test_open_write(&ring, 1, 0, dfd, "io_uring_openat_write_test2");
	test_open_write(&ring, 0, 1, dfd, "io_uring_openat_write_test3");
	test_open_write(&ring, 0, 0, dfd, "io_uring_openat_write_test4");

	io_uring_queue_exit(&ring);
	close(dfd);
	return 0;
}

[-- Attachment #3: test-io_uring-openat-large-file.c --]
[-- Type: text/x-csrc, Size: 2255 bytes --]

#include <liburing.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/resource.h>
#include <unistd.h>

#define DIE(...) do {\
		fprintf(stderr, __VA_ARGS__);\
		abort();\
	} while(0);

static const int RSIZE = 2;
static const int OPEN_FLAGS = O_RDWR | O_CREAT;
static const mode_t OPEN_MODE = S_IRUSR | S_IWUSR;

void open_sync(int dfd, const char* fn)
{
	int fd = openat(dfd, fn, OPEN_FLAGS, OPEN_MODE);
	if (fd < 0) {
		fprintf(stderr, "sync open failed: %s\n", strerror(errno));
	}
	else {
		fprintf(stderr, "sync open succeeded\n");
		close(fd);
	}
}

void open_io_uring(struct io_uring *ring, int dfd, const char* fn)
{
	struct io_uring_sqe *sqe;
	sqe = io_uring_get_sqe(ring);
	if (!sqe) {
		fprintf(stderr, "failed to get sqe\n");
		return;
	}
	io_uring_prep_openat(sqe, dfd, fn, OPEN_FLAGS, OPEN_MODE);
	int ret = io_uring_submit(ring);
	if (ret < 0) {
		fprintf(stderr, "failed to submit openat: %s\n", strerror(-ret));
		return;
	}

	struct io_uring_cqe *cqe;
	ret = io_uring_wait_cqe(ring, &cqe);
	int fd = cqe->res;
	io_uring_cqe_seen(ring, cqe);
	if (ret < 0) {
		fprintf(stderr, "wait_cqe failed: %s\n", strerror(-ret));
	}
	else if (fd < 0) {
		fprintf(stderr, "io_uring openat failed: %s\n", strerror(-fd));
	}
	else {
		fprintf(stderr, "io_uring openat succeeded\n");
		close(fd);
	}
}

int prepare_file(int dfd, const char* fn)
{
	const char buf[] = "foo";
	int fd = openat(dfd, fn, OPEN_FLAGS, OPEN_MODE);
	if (fd < 0) {
		fprintf(stderr, "prepare/open: %s\n", strerror(errno));
		return -1;
	}
	int res = pwrite(fd, buf, sizeof(buf), 1ull << 32);
	if (res < 0) {
		fprintf(stderr, "prepare/pwrite: %s\n", strerror(errno));
	}
	close(fd);
	return res < 0 ? res : 0;
}

int main()
{
	const char *fn = "io_uring_openat_test";
	int dfd = open("/tmp", O_RDONLY | O_DIRECTORY);
	if (dfd < 0) {
		DIE("open /tmp: %s\n", strerror(errno));
	}
	struct io_uring ring;
	int ret = io_uring_queue_init(RSIZE, &ring, 0);
	if (ret < 0) {
		DIE("failed to init io_uring: %s\n", strerror(-ret));
	}

	if (!prepare_file(dfd, fn)) {
		open_sync(dfd, fn);
		open_io_uring(&ring, dfd, fn);
	}

	io_uring_queue_exit(&ring);
	close(dfd);
	return 0;
}

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: io_uring's openat doesn't work with large (2G+) files
  2020-04-08 14:51 io_uring's openat doesn't work with large (2G+) files Dmitry Kadashev
@ 2020-04-08 15:19 ` Jens Axboe
  2020-04-08 15:30   ` Dmitry Kadashev
  0 siblings, 1 reply; 13+ messages in thread
From: Jens Axboe @ 2020-04-08 15:19 UTC (permalink / raw)
  To: Dmitry Kadashev, io-uring

On 4/8/20 7:51 AM, Dmitry Kadashev wrote:
> Hi,
> 
> io_uring's openat seems to produce FDs that are incompatible with
> large files (>2GB). If a file (smaller than 2GB) is opened using
> io_uring's openat then writes -- both using io_uring and just sync
> pwrite() -- past that threshold fail with EFBIG. If such a file is
> opened with sync openat, then both io_uring's writes and sync writes
> succeed. And if the file is larger than 2GB then io_uring's openat
> fails right away, while the sync one works.
> 
> Kernel versions: 5.6.0-rc2, 5.6.0.
> 
> A couple of reproducers attached, one demos successful open with
> failed writes afterwards, and another failing open (in comparison with
> sync  calls).
> 
> The output of the former one for example:
> 
> *** sync openat
> openat succeeded
> sync write at offset 0
> write succeeded
> sync write at offset 4294967296
> write succeeded
> 
> *** sync openat
> openat succeeded
> io_uring write at offset 0
> write succeeded
> io_uring write at offset 4294967296
> write succeeded
> 
> *** io_uring openat
> openat succeeded
> sync write at offset 0
> write succeeded
> sync write at offset 4294967296
> write failed: File too large
> 
> *** io_uring openat
> openat succeeded
> io_uring write at offset 0
> write succeeded
> io_uring write at offset 4294967296
> write failed: File too large

Can you try with this one? Seems like only openat2 gets it set,
not openat...


diff --git a/fs/io_uring.c b/fs/io_uring.c
index 79bd22289d73..63eb7efe10f2 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2957,6 +2957,8 @@ static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	req->open.how.mode = READ_ONCE(sqe->len);
 	fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
 	req->open.how.flags = READ_ONCE(sqe->open_flags);
+	if (force_o_largefile())
+		req->open.how.flags |= O_LARGEFILE;
 
 	req->open.filename = getname(fname);
 	if (IS_ERR(req->open.filename)) {

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: io_uring's openat doesn't work with large (2G+) files
  2020-04-08 15:19 ` Jens Axboe
@ 2020-04-08 15:30   ` Dmitry Kadashev
  2020-04-08 15:36     ` Jens Axboe
  0 siblings, 1 reply; 13+ messages in thread
From: Dmitry Kadashev @ 2020-04-08 15:30 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring

On Wed, Apr 8, 2020 at 10:19 PM Jens Axboe <axboe@kernel.dk> wrote:
>
> On 4/8/20 7:51 AM, Dmitry Kadashev wrote:
> > Hi,
> >
> > io_uring's openat seems to produce FDs that are incompatible with
> > large files (>2GB). If a file (smaller than 2GB) is opened using
> > io_uring's openat then writes -- both using io_uring and just sync
> > pwrite() -- past that threshold fail with EFBIG. If such a file is
> > opened with sync openat, then both io_uring's writes and sync writes
> > succeed. And if the file is larger than 2GB then io_uring's openat
> > fails right away, while the sync one works.
> >
> > Kernel versions: 5.6.0-rc2, 5.6.0.
> >
> > A couple of reproducers attached, one demos successful open with
> > failed writes afterwards, and another failing open (in comparison with
> > sync  calls).
> >
> > The output of the former one for example:
> >
> > *** sync openat
> > openat succeeded
> > sync write at offset 0
> > write succeeded
> > sync write at offset 4294967296
> > write succeeded
> >
> > *** sync openat
> > openat succeeded
> > io_uring write at offset 0
> > write succeeded
> > io_uring write at offset 4294967296
> > write succeeded
> >
> > *** io_uring openat
> > openat succeeded
> > sync write at offset 0
> > write succeeded
> > sync write at offset 4294967296
> > write failed: File too large
> >
> > *** io_uring openat
> > openat succeeded
> > io_uring write at offset 0
> > write succeeded
> > io_uring write at offset 4294967296
> > write failed: File too large
>
> Can you try with this one? Seems like only openat2 gets it set,
> not openat...

I've tried specifying O_LARGEFILE explicitly, that did not change the
behavior. Is this good enough? Much faster for me to check this way
that rebuilding the kernel. But if necessary I can do that.

Also, forgot to mention, this is on x86_64, not sure if O_LARGEFILE is
necessary to do 2G+ files there?

>
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 79bd22289d73..63eb7efe10f2 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -2957,6 +2957,8 @@ static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>         req->open.how.mode = READ_ONCE(sqe->len);
>         fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
>         req->open.how.flags = READ_ONCE(sqe->open_flags);
> +       if (force_o_largefile())
> +               req->open.how.flags |= O_LARGEFILE;
>
>         req->open.filename = getname(fname);
>         if (IS_ERR(req->open.filename)) {
>
> --
> Jens Axboe
>

-- 
Dmitry

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: io_uring's openat doesn't work with large (2G+) files
  2020-04-08 15:30   ` Dmitry Kadashev
@ 2020-04-08 15:36     ` Jens Axboe
  2020-04-08 15:41       ` Dmitry Kadashev
  0 siblings, 1 reply; 13+ messages in thread
From: Jens Axboe @ 2020-04-08 15:36 UTC (permalink / raw)
  To: Dmitry Kadashev; +Cc: io-uring

On 4/8/20 8:30 AM, Dmitry Kadashev wrote:
> On Wed, Apr 8, 2020 at 10:19 PM Jens Axboe <axboe@kernel.dk> wrote:
>>
>> On 4/8/20 7:51 AM, Dmitry Kadashev wrote:
>>> Hi,
>>>
>>> io_uring's openat seems to produce FDs that are incompatible with
>>> large files (>2GB). If a file (smaller than 2GB) is opened using
>>> io_uring's openat then writes -- both using io_uring and just sync
>>> pwrite() -- past that threshold fail with EFBIG. If such a file is
>>> opened with sync openat, then both io_uring's writes and sync writes
>>> succeed. And if the file is larger than 2GB then io_uring's openat
>>> fails right away, while the sync one works.
>>>
>>> Kernel versions: 5.6.0-rc2, 5.6.0.
>>>
>>> A couple of reproducers attached, one demos successful open with
>>> failed writes afterwards, and another failing open (in comparison with
>>> sync  calls).
>>>
>>> The output of the former one for example:
>>>
>>> *** sync openat
>>> openat succeeded
>>> sync write at offset 0
>>> write succeeded
>>> sync write at offset 4294967296
>>> write succeeded
>>>
>>> *** sync openat
>>> openat succeeded
>>> io_uring write at offset 0
>>> write succeeded
>>> io_uring write at offset 4294967296
>>> write succeeded
>>>
>>> *** io_uring openat
>>> openat succeeded
>>> sync write at offset 0
>>> write succeeded
>>> sync write at offset 4294967296
>>> write failed: File too large
>>>
>>> *** io_uring openat
>>> openat succeeded
>>> io_uring write at offset 0
>>> write succeeded
>>> io_uring write at offset 4294967296
>>> write failed: File too large
>>
>> Can you try with this one? Seems like only openat2 gets it set,
>> not openat...
> 
> I've tried specifying O_LARGEFILE explicitly, that did not change the
> behavior. Is this good enough? Much faster for me to check this way
> that rebuilding the kernel. But if necessary I can do that.

Not sure O_LARGEFILE settings is going to do it for x86-64, the patch
should fix it though. Might have worked on 32-bit, though.

> Also, forgot to mention, this is on x86_64, not sure if O_LARGEFILE is
> necessary to do 2G+ files there?

Internally, yes.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: io_uring's openat doesn't work with large (2G+) files
  2020-04-08 15:36     ` Jens Axboe
@ 2020-04-08 15:41       ` Dmitry Kadashev
  2020-04-08 15:49         ` Jens Axboe
  0 siblings, 1 reply; 13+ messages in thread
From: Dmitry Kadashev @ 2020-04-08 15:41 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring

On Wed, Apr 8, 2020 at 10:36 PM Jens Axboe <axboe@kernel.dk> wrote:
>
> On 4/8/20 8:30 AM, Dmitry Kadashev wrote:
> > On Wed, Apr 8, 2020 at 10:19 PM Jens Axboe <axboe@kernel.dk> wrote:
> >>
> >> On 4/8/20 7:51 AM, Dmitry Kadashev wrote:
> >>> Hi,
> >>>
> >>> io_uring's openat seems to produce FDs that are incompatible with
> >>> large files (>2GB). If a file (smaller than 2GB) is opened using
> >>> io_uring's openat then writes -- both using io_uring and just sync
> >>> pwrite() -- past that threshold fail with EFBIG. If such a file is
> >>> opened with sync openat, then both io_uring's writes and sync writes
> >>> succeed. And if the file is larger than 2GB then io_uring's openat
> >>> fails right away, while the sync one works.
> >>>
> >>> Kernel versions: 5.6.0-rc2, 5.6.0.
> >>>
> >>> A couple of reproducers attached, one demos successful open with
> >>> failed writes afterwards, and another failing open (in comparison with
> >>> sync  calls).
> >>>
> >>> The output of the former one for example:
> >>>
> >>> *** sync openat
> >>> openat succeeded
> >>> sync write at offset 0
> >>> write succeeded
> >>> sync write at offset 4294967296
> >>> write succeeded
> >>>
> >>> *** sync openat
> >>> openat succeeded
> >>> io_uring write at offset 0
> >>> write succeeded
> >>> io_uring write at offset 4294967296
> >>> write succeeded
> >>>
> >>> *** io_uring openat
> >>> openat succeeded
> >>> sync write at offset 0
> >>> write succeeded
> >>> sync write at offset 4294967296
> >>> write failed: File too large
> >>>
> >>> *** io_uring openat
> >>> openat succeeded
> >>> io_uring write at offset 0
> >>> write succeeded
> >>> io_uring write at offset 4294967296
> >>> write failed: File too large
> >>
> >> Can you try with this one? Seems like only openat2 gets it set,
> >> not openat...
> >
> > I've tried specifying O_LARGEFILE explicitly, that did not change the
> > behavior. Is this good enough? Much faster for me to check this way
> > that rebuilding the kernel. But if necessary I can do that.
>
> Not sure O_LARGEFILE settings is going to do it for x86-64, the patch
> should fix it though. Might have worked on 32-bit, though.

OK, will test.

>
> > Also, forgot to mention, this is on x86_64, not sure if O_LARGEFILE is
> > necessary to do 2G+ files there?
>
> Internally, yes.
>
> --
> Jens Axboe
>

-- 
Dmitry

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: io_uring's openat doesn't work with large (2G+) files
  2020-04-08 15:41       ` Dmitry Kadashev
@ 2020-04-08 15:49         ` Jens Axboe
  2020-04-08 16:12           ` Dmitry Kadashev
  0 siblings, 1 reply; 13+ messages in thread
From: Jens Axboe @ 2020-04-08 15:49 UTC (permalink / raw)
  To: Dmitry Kadashev; +Cc: io-uring

On 4/8/20 8:41 AM, Dmitry Kadashev wrote:
> On Wed, Apr 8, 2020 at 10:36 PM Jens Axboe <axboe@kernel.dk> wrote:
>>
>> On 4/8/20 8:30 AM, Dmitry Kadashev wrote:
>>> On Wed, Apr 8, 2020 at 10:19 PM Jens Axboe <axboe@kernel.dk> wrote:
>>>>
>>>> On 4/8/20 7:51 AM, Dmitry Kadashev wrote:
>>>>> Hi,
>>>>>
>>>>> io_uring's openat seems to produce FDs that are incompatible with
>>>>> large files (>2GB). If a file (smaller than 2GB) is opened using
>>>>> io_uring's openat then writes -- both using io_uring and just sync
>>>>> pwrite() -- past that threshold fail with EFBIG. If such a file is
>>>>> opened with sync openat, then both io_uring's writes and sync writes
>>>>> succeed. And if the file is larger than 2GB then io_uring's openat
>>>>> fails right away, while the sync one works.
>>>>>
>>>>> Kernel versions: 5.6.0-rc2, 5.6.0.
>>>>>
>>>>> A couple of reproducers attached, one demos successful open with
>>>>> failed writes afterwards, and another failing open (in comparison with
>>>>> sync  calls).
>>>>>
>>>>> The output of the former one for example:
>>>>>
>>>>> *** sync openat
>>>>> openat succeeded
>>>>> sync write at offset 0
>>>>> write succeeded
>>>>> sync write at offset 4294967296
>>>>> write succeeded
>>>>>
>>>>> *** sync openat
>>>>> openat succeeded
>>>>> io_uring write at offset 0
>>>>> write succeeded
>>>>> io_uring write at offset 4294967296
>>>>> write succeeded
>>>>>
>>>>> *** io_uring openat
>>>>> openat succeeded
>>>>> sync write at offset 0
>>>>> write succeeded
>>>>> sync write at offset 4294967296
>>>>> write failed: File too large
>>>>>
>>>>> *** io_uring openat
>>>>> openat succeeded
>>>>> io_uring write at offset 0
>>>>> write succeeded
>>>>> io_uring write at offset 4294967296
>>>>> write failed: File too large
>>>>
>>>> Can you try with this one? Seems like only openat2 gets it set,
>>>> not openat...
>>>
>>> I've tried specifying O_LARGEFILE explicitly, that did not change the
>>> behavior. Is this good enough? Much faster for me to check this way
>>> that rebuilding the kernel. But if necessary I can do that.
>>
>> Not sure O_LARGEFILE settings is going to do it for x86-64, the patch
>> should fix it though. Might have worked on 32-bit, though.
> 
> OK, will test.

Great, thanks. FWIW, tested here, and it works for me.

Any objection to adding your test cases to the liburing regression
suite?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: io_uring's openat doesn't work with large (2G+) files
  2020-04-08 15:49         ` Jens Axboe
@ 2020-04-08 16:12           ` Dmitry Kadashev
  2020-04-08 16:26             ` Jens Axboe
  0 siblings, 1 reply; 13+ messages in thread
From: Dmitry Kadashev @ 2020-04-08 16:12 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring

On Wed, Apr 8, 2020 at 10:49 PM Jens Axboe <axboe@kernel.dk> wrote:
>
> On 4/8/20 8:41 AM, Dmitry Kadashev wrote:
> > On Wed, Apr 8, 2020 at 10:36 PM Jens Axboe <axboe@kernel.dk> wrote:
> >>
> >> On 4/8/20 8:30 AM, Dmitry Kadashev wrote:
> >>> On Wed, Apr 8, 2020 at 10:19 PM Jens Axboe <axboe@kernel.dk> wrote:
> >>>>
> >>>> On 4/8/20 7:51 AM, Dmitry Kadashev wrote:
> >>>>> Hi,
> >>>>>
> >>>>> io_uring's openat seems to produce FDs that are incompatible with
> >>>>> large files (>2GB). If a file (smaller than 2GB) is opened using
> >>>>> io_uring's openat then writes -- both using io_uring and just sync
> >>>>> pwrite() -- past that threshold fail with EFBIG. If such a file is
> >>>>> opened with sync openat, then both io_uring's writes and sync writes
> >>>>> succeed. And if the file is larger than 2GB then io_uring's openat
> >>>>> fails right away, while the sync one works.
> >>>>>
> >>>>> Kernel versions: 5.6.0-rc2, 5.6.0.
> >>>>>
> >>>>> A couple of reproducers attached, one demos successful open with
> >>>>> failed writes afterwards, and another failing open (in comparison with
> >>>>> sync  calls).
> >>>>>
> >>>>> The output of the former one for example:
> >>>>>
> >>>>> *** sync openat
> >>>>> openat succeeded
> >>>>> sync write at offset 0
> >>>>> write succeeded
> >>>>> sync write at offset 4294967296
> >>>>> write succeeded
> >>>>>
> >>>>> *** sync openat
> >>>>> openat succeeded
> >>>>> io_uring write at offset 0
> >>>>> write succeeded
> >>>>> io_uring write at offset 4294967296
> >>>>> write succeeded
> >>>>>
> >>>>> *** io_uring openat
> >>>>> openat succeeded
> >>>>> sync write at offset 0
> >>>>> write succeeded
> >>>>> sync write at offset 4294967296
> >>>>> write failed: File too large
> >>>>>
> >>>>> *** io_uring openat
> >>>>> openat succeeded
> >>>>> io_uring write at offset 0
> >>>>> write succeeded
> >>>>> io_uring write at offset 4294967296
> >>>>> write failed: File too large
> >>>>
> >>>> Can you try with this one? Seems like only openat2 gets it set,
> >>>> not openat...
> >>>
> >>> I've tried specifying O_LARGEFILE explicitly, that did not change the
> >>> behavior. Is this good enough? Much faster for me to check this way
> >>> that rebuilding the kernel. But if necessary I can do that.
> >>
> >> Not sure O_LARGEFILE settings is going to do it for x86-64, the patch
> >> should fix it though. Might have worked on 32-bit, though.
> >
> > OK, will test.
>
> Great, thanks. FWIW, tested here, and it works for me.

Great, will post results tomorrow.

>
> Any objection to adding your test cases to the liburing regression
> suite?

Feel free to!

>
> --
> Jens Axboe
>

-- 
Dmitry

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: io_uring's openat doesn't work with large (2G+) files
  2020-04-08 16:12           ` Dmitry Kadashev
@ 2020-04-08 16:26             ` Jens Axboe
  2020-04-09  3:50               ` Dmitry Kadashev
  0 siblings, 1 reply; 13+ messages in thread
From: Jens Axboe @ 2020-04-08 16:26 UTC (permalink / raw)
  To: Dmitry Kadashev; +Cc: io-uring

On 4/8/20 9:12 AM, Dmitry Kadashev wrote:
> On Wed, Apr 8, 2020 at 10:49 PM Jens Axboe <axboe@kernel.dk> wrote:
>>
>> On 4/8/20 8:41 AM, Dmitry Kadashev wrote:
>>> On Wed, Apr 8, 2020 at 10:36 PM Jens Axboe <axboe@kernel.dk> wrote:
>>>>
>>>> On 4/8/20 8:30 AM, Dmitry Kadashev wrote:
>>>>> On Wed, Apr 8, 2020 at 10:19 PM Jens Axboe <axboe@kernel.dk> wrote:
>>>>>>
>>>>>> On 4/8/20 7:51 AM, Dmitry Kadashev wrote:
>>>>>>> Hi,
>>>>>>>
>>>>>>> io_uring's openat seems to produce FDs that are incompatible with
>>>>>>> large files (>2GB). If a file (smaller than 2GB) is opened using
>>>>>>> io_uring's openat then writes -- both using io_uring and just sync
>>>>>>> pwrite() -- past that threshold fail with EFBIG. If such a file is
>>>>>>> opened with sync openat, then both io_uring's writes and sync writes
>>>>>>> succeed. And if the file is larger than 2GB then io_uring's openat
>>>>>>> fails right away, while the sync one works.
>>>>>>>
>>>>>>> Kernel versions: 5.6.0-rc2, 5.6.0.
>>>>>>>
>>>>>>> A couple of reproducers attached, one demos successful open with
>>>>>>> failed writes afterwards, and another failing open (in comparison with
>>>>>>> sync  calls).
>>>>>>>
>>>>>>> The output of the former one for example:
>>>>>>>
>>>>>>> *** sync openat
>>>>>>> openat succeeded
>>>>>>> sync write at offset 0
>>>>>>> write succeeded
>>>>>>> sync write at offset 4294967296
>>>>>>> write succeeded
>>>>>>>
>>>>>>> *** sync openat
>>>>>>> openat succeeded
>>>>>>> io_uring write at offset 0
>>>>>>> write succeeded
>>>>>>> io_uring write at offset 4294967296
>>>>>>> write succeeded
>>>>>>>
>>>>>>> *** io_uring openat
>>>>>>> openat succeeded
>>>>>>> sync write at offset 0
>>>>>>> write succeeded
>>>>>>> sync write at offset 4294967296
>>>>>>> write failed: File too large
>>>>>>>
>>>>>>> *** io_uring openat
>>>>>>> openat succeeded
>>>>>>> io_uring write at offset 0
>>>>>>> write succeeded
>>>>>>> io_uring write at offset 4294967296
>>>>>>> write failed: File too large
>>>>>>
>>>>>> Can you try with this one? Seems like only openat2 gets it set,
>>>>>> not openat...
>>>>>
>>>>> I've tried specifying O_LARGEFILE explicitly, that did not change the
>>>>> behavior. Is this good enough? Much faster for me to check this way
>>>>> that rebuilding the kernel. But if necessary I can do that.
>>>>
>>>> Not sure O_LARGEFILE settings is going to do it for x86-64, the patch
>>>> should fix it though. Might have worked on 32-bit, though.
>>>
>>> OK, will test.
>>
>> Great, thanks. FWIW, tested here, and it works for me.
> 
> Great, will post results tomorrow.

Thanks!

>> Any objection to adding your test cases to the liburing regression
>> suite?
> 
> Feel free to!

Great, done!

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: io_uring's openat doesn't work with large (2G+) files
  2020-04-08 16:26             ` Jens Axboe
@ 2020-04-09  3:50               ` Dmitry Kadashev
  2020-04-09 15:29                 ` Jens Axboe
  0 siblings, 1 reply; 13+ messages in thread
From: Dmitry Kadashev @ 2020-04-09  3:50 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring

On Wed, Apr 8, 2020 at 11:26 PM Jens Axboe <axboe@kernel.dk> wrote:
>
> On 4/8/20 9:12 AM, Dmitry Kadashev wrote:
> > On Wed, Apr 8, 2020 at 10:49 PM Jens Axboe <axboe@kernel.dk> wrote:
> >>
> >> On 4/8/20 8:41 AM, Dmitry Kadashev wrote:
> >>> On Wed, Apr 8, 2020 at 10:36 PM Jens Axboe <axboe@kernel.dk> wrote:
> >>>>
> >>>> On 4/8/20 8:30 AM, Dmitry Kadashev wrote:
> >>>>> On Wed, Apr 8, 2020 at 10:19 PM Jens Axboe <axboe@kernel.dk> wrote:
> >>>>>>
> >>>>>> On 4/8/20 7:51 AM, Dmitry Kadashev wrote:
> >>>>>>> Hi,
> >>>>>>>
> >>>>>>> io_uring's openat seems to produce FDs that are incompatible with
> >>>>>>> large files (>2GB). If a file (smaller than 2GB) is opened using
> >>>>>>> io_uring's openat then writes -- both using io_uring and just sync
> >>>>>>> pwrite() -- past that threshold fail with EFBIG. If such a file is
> >>>>>>> opened with sync openat, then both io_uring's writes and sync writes
> >>>>>>> succeed. And if the file is larger than 2GB then io_uring's openat
> >>>>>>> fails right away, while the sync one works.
> >>>>>>>
> >>>>>>> Kernel versions: 5.6.0-rc2, 5.6.0.
> >>>>>>>
> >>>>>>> A couple of reproducers attached, one demos successful open with
> >>>>>>> failed writes afterwards, and another failing open (in comparison with
> >>>>>>> sync  calls).
> >>>>>>>
> >>>>>>> The output of the former one for example:
> >>>>>>>
> >>>>>>> *** sync openat
> >>>>>>> openat succeeded
> >>>>>>> sync write at offset 0
> >>>>>>> write succeeded
> >>>>>>> sync write at offset 4294967296
> >>>>>>> write succeeded
> >>>>>>>
> >>>>>>> *** sync openat
> >>>>>>> openat succeeded
> >>>>>>> io_uring write at offset 0
> >>>>>>> write succeeded
> >>>>>>> io_uring write at offset 4294967296
> >>>>>>> write succeeded
> >>>>>>>
> >>>>>>> *** io_uring openat
> >>>>>>> openat succeeded
> >>>>>>> sync write at offset 0
> >>>>>>> write succeeded
> >>>>>>> sync write at offset 4294967296
> >>>>>>> write failed: File too large
> >>>>>>>
> >>>>>>> *** io_uring openat
> >>>>>>> openat succeeded
> >>>>>>> io_uring write at offset 0
> >>>>>>> write succeeded
> >>>>>>> io_uring write at offset 4294967296
> >>>>>>> write failed: File too large
> >>>>>>
> >>>>>> Can you try with this one? Seems like only openat2 gets it set,
> >>>>>> not openat...
> >>>>>
> >>>>> I've tried specifying O_LARGEFILE explicitly, that did not change the
> >>>>> behavior. Is this good enough? Much faster for me to check this way
> >>>>> that rebuilding the kernel. But if necessary I can do that.
> >>>>
> >>>> Not sure O_LARGEFILE settings is going to do it for x86-64, the patch
> >>>> should fix it though. Might have worked on 32-bit, though.
> >>>
> >>> OK, will test.
> >>
> >> Great, thanks. FWIW, tested here, and it works for me.
> >
> > Great, will post results tomorrow.
>
> Thanks!

With the patch applied it works perfectly, thanks.

>
> >> Any objection to adding your test cases to the liburing regression
> >> suite?
> >
> > Feel free to!
>
> Great, done!
>

-- 
Dmitry

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: io_uring's openat doesn't work with large (2G+) files
  2020-04-09  3:50               ` Dmitry Kadashev
@ 2020-04-09 15:29                 ` Jens Axboe
  2020-04-13  9:20                   ` Dmitry Kadashev
  0 siblings, 1 reply; 13+ messages in thread
From: Jens Axboe @ 2020-04-09 15:29 UTC (permalink / raw)
  To: Dmitry Kadashev; +Cc: io-uring

On 4/8/20 8:50 PM, Dmitry Kadashev wrote:
> On Wed, Apr 8, 2020 at 11:26 PM Jens Axboe <axboe@kernel.dk> wrote:
>>
>> On 4/8/20 9:12 AM, Dmitry Kadashev wrote:
>>> On Wed, Apr 8, 2020 at 10:49 PM Jens Axboe <axboe@kernel.dk> wrote:
>>>>
>>>> On 4/8/20 8:41 AM, Dmitry Kadashev wrote:
>>>>> On Wed, Apr 8, 2020 at 10:36 PM Jens Axboe <axboe@kernel.dk> wrote:
>>>>>>
>>>>>> On 4/8/20 8:30 AM, Dmitry Kadashev wrote:
>>>>>>> On Wed, Apr 8, 2020 at 10:19 PM Jens Axboe <axboe@kernel.dk> wrote:
>>>>>>>>
>>>>>>>> On 4/8/20 7:51 AM, Dmitry Kadashev wrote:
>>>>>>>>> Hi,
>>>>>>>>>
>>>>>>>>> io_uring's openat seems to produce FDs that are incompatible with
>>>>>>>>> large files (>2GB). If a file (smaller than 2GB) is opened using
>>>>>>>>> io_uring's openat then writes -- both using io_uring and just sync
>>>>>>>>> pwrite() -- past that threshold fail with EFBIG. If such a file is
>>>>>>>>> opened with sync openat, then both io_uring's writes and sync writes
>>>>>>>>> succeed. And if the file is larger than 2GB then io_uring's openat
>>>>>>>>> fails right away, while the sync one works.
>>>>>>>>>
>>>>>>>>> Kernel versions: 5.6.0-rc2, 5.6.0.
>>>>>>>>>
>>>>>>>>> A couple of reproducers attached, one demos successful open with
>>>>>>>>> failed writes afterwards, and another failing open (in comparison with
>>>>>>>>> sync  calls).
>>>>>>>>>
>>>>>>>>> The output of the former one for example:
>>>>>>>>>
>>>>>>>>> *** sync openat
>>>>>>>>> openat succeeded
>>>>>>>>> sync write at offset 0
>>>>>>>>> write succeeded
>>>>>>>>> sync write at offset 4294967296
>>>>>>>>> write succeeded
>>>>>>>>>
>>>>>>>>> *** sync openat
>>>>>>>>> openat succeeded
>>>>>>>>> io_uring write at offset 0
>>>>>>>>> write succeeded
>>>>>>>>> io_uring write at offset 4294967296
>>>>>>>>> write succeeded
>>>>>>>>>
>>>>>>>>> *** io_uring openat
>>>>>>>>> openat succeeded
>>>>>>>>> sync write at offset 0
>>>>>>>>> write succeeded
>>>>>>>>> sync write at offset 4294967296
>>>>>>>>> write failed: File too large
>>>>>>>>>
>>>>>>>>> *** io_uring openat
>>>>>>>>> openat succeeded
>>>>>>>>> io_uring write at offset 0
>>>>>>>>> write succeeded
>>>>>>>>> io_uring write at offset 4294967296
>>>>>>>>> write failed: File too large
>>>>>>>>
>>>>>>>> Can you try with this one? Seems like only openat2 gets it set,
>>>>>>>> not openat...
>>>>>>>
>>>>>>> I've tried specifying O_LARGEFILE explicitly, that did not change the
>>>>>>> behavior. Is this good enough? Much faster for me to check this way
>>>>>>> that rebuilding the kernel. But if necessary I can do that.
>>>>>>
>>>>>> Not sure O_LARGEFILE settings is going to do it for x86-64, the patch
>>>>>> should fix it though. Might have worked on 32-bit, though.
>>>>>
>>>>> OK, will test.
>>>>
>>>> Great, thanks. FWIW, tested here, and it works for me.
>>>
>>> Great, will post results tomorrow.
>>
>> Thanks!
> 
> With the patch applied it works perfectly, thanks.

Thanks for testing!

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: io_uring's openat doesn't work with large (2G+) files
  2020-04-09 15:29                 ` Jens Axboe
@ 2020-04-13  9:20                   ` Dmitry Kadashev
  2020-04-13 10:09                     ` Pavel Begunkov
  0 siblings, 1 reply; 13+ messages in thread
From: Dmitry Kadashev @ 2020-04-13  9:20 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring

On Thu, Apr 9, 2020 at 10:29 PM Jens Axboe <axboe@kernel.dk> wrote:
>
> On 4/8/20 8:50 PM, Dmitry Kadashev wrote:
> > On Wed, Apr 8, 2020 at 11:26 PM Jens Axboe <axboe@kernel.dk> wrote:
> >>
> >> On 4/8/20 9:12 AM, Dmitry Kadashev wrote:
> >>> On Wed, Apr 8, 2020 at 10:49 PM Jens Axboe <axboe@kernel.dk> wrote:
> >>>>
> >>>> On 4/8/20 8:41 AM, Dmitry Kadashev wrote:
> >>>>> On Wed, Apr 8, 2020 at 10:36 PM Jens Axboe <axboe@kernel.dk> wrote:
> >>>>>>
> >>>>>> On 4/8/20 8:30 AM, Dmitry Kadashev wrote:
> >>>>>>> On Wed, Apr 8, 2020 at 10:19 PM Jens Axboe <axboe@kernel.dk> wrote:
> >>>>>>>>
> >>>>>>>> On 4/8/20 7:51 AM, Dmitry Kadashev wrote:
> >>>>>>>>> Hi,
> >>>>>>>>>
> >>>>>>>>> io_uring's openat seems to produce FDs that are incompatible with
> >>>>>>>>> large files (>2GB). If a file (smaller than 2GB) is opened using
> >>>>>>>>> io_uring's openat then writes -- both using io_uring and just sync
> >>>>>>>>> pwrite() -- past that threshold fail with EFBIG. If such a file is
> >>>>>>>>> opened with sync openat, then both io_uring's writes and sync writes
> >>>>>>>>> succeed. And if the file is larger than 2GB then io_uring's openat
> >>>>>>>>> fails right away, while the sync one works.
> >>>>>>>>>
> >>>>>>>>> Kernel versions: 5.6.0-rc2, 5.6.0.
> >>>>>>>>>
> >>>>>>>>> A couple of reproducers attached, one demos successful open with
> >>>>>>>>> failed writes afterwards, and another failing open (in comparison with
> >>>>>>>>> sync  calls).
> >>>>>>>>>
> >>>>>>>>> The output of the former one for example:
> >>>>>>>>>
> >>>>>>>>> *** sync openat
> >>>>>>>>> openat succeeded
> >>>>>>>>> sync write at offset 0
> >>>>>>>>> write succeeded
> >>>>>>>>> sync write at offset 4294967296
> >>>>>>>>> write succeeded
> >>>>>>>>>
> >>>>>>>>> *** sync openat
> >>>>>>>>> openat succeeded
> >>>>>>>>> io_uring write at offset 0
> >>>>>>>>> write succeeded
> >>>>>>>>> io_uring write at offset 4294967296
> >>>>>>>>> write succeeded
> >>>>>>>>>
> >>>>>>>>> *** io_uring openat
> >>>>>>>>> openat succeeded
> >>>>>>>>> sync write at offset 0
> >>>>>>>>> write succeeded
> >>>>>>>>> sync write at offset 4294967296
> >>>>>>>>> write failed: File too large
> >>>>>>>>>
> >>>>>>>>> *** io_uring openat
> >>>>>>>>> openat succeeded
> >>>>>>>>> io_uring write at offset 0
> >>>>>>>>> write succeeded
> >>>>>>>>> io_uring write at offset 4294967296
> >>>>>>>>> write failed: File too large
> >>>>>>>>
> >>>>>>>> Can you try with this one? Seems like only openat2 gets it set,
> >>>>>>>> not openat...
> >>>>>>>
> >>>>>>> I've tried specifying O_LARGEFILE explicitly, that did not change the
> >>>>>>> behavior. Is this good enough? Much faster for me to check this way
> >>>>>>> that rebuilding the kernel. But if necessary I can do that.
> >>>>>>
> >>>>>> Not sure O_LARGEFILE settings is going to do it for x86-64, the patch
> >>>>>> should fix it though. Might have worked on 32-bit, though.
> >>>>>
> >>>>> OK, will test.
> >>>>
> >>>> Great, thanks. FWIW, tested here, and it works for me.
> >>>
> >>> Great, will post results tomorrow.
> >>
> >> Thanks!
> >
> > With the patch applied it works perfectly, thanks.
>
> Thanks for testing!

Can I ask if this is going to be merged into 5.6? Since it's a bug
(important enough from my perspective) in existing logic. Thanks.

-- 
Dmitry

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: io_uring's openat doesn't work with large (2G+) files
  2020-04-13  9:20                   ` Dmitry Kadashev
@ 2020-04-13 10:09                     ` Pavel Begunkov
  2020-04-13 10:19                       ` Dmitry Kadashev
  0 siblings, 1 reply; 13+ messages in thread
From: Pavel Begunkov @ 2020-04-13 10:09 UTC (permalink / raw)
  To: Dmitry Kadashev, Jens Axboe; +Cc: io-uring

On 4/13/2020 12:20 PM, Dmitry Kadashev wrote> Can I ask if this is going
to be merged into 5.6? Since it's a bug
> (important enough from my perspective) in existing logic. Thanks.

Yes, it's marked for 5.6

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: io_uring's openat doesn't work with large (2G+) files
  2020-04-13 10:09                     ` Pavel Begunkov
@ 2020-04-13 10:19                       ` Dmitry Kadashev
  0 siblings, 0 replies; 13+ messages in thread
From: Dmitry Kadashev @ 2020-04-13 10:19 UTC (permalink / raw)
  To: Pavel Begunkov; +Cc: Jens Axboe, io-uring

On Mon, Apr 13, 2020 at 5:09 PM Pavel Begunkov <asml.silence@gmail.com> wrote:
>
> On 4/13/2020 12:20 PM, Dmitry Kadashev wrote> Can I ask if this is going
> to be merged into 5.6? Since it's a bug
> > (important enough from my perspective) in existing logic. Thanks.
>
> Yes, it's marked for 5.6

Perfect, thanks!

-- 
Dmitry

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2020-04-13 10:20 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-08 14:51 io_uring's openat doesn't work with large (2G+) files Dmitry Kadashev
2020-04-08 15:19 ` Jens Axboe
2020-04-08 15:30   ` Dmitry Kadashev
2020-04-08 15:36     ` Jens Axboe
2020-04-08 15:41       ` Dmitry Kadashev
2020-04-08 15:49         ` Jens Axboe
2020-04-08 16:12           ` Dmitry Kadashev
2020-04-08 16:26             ` Jens Axboe
2020-04-09  3:50               ` Dmitry Kadashev
2020-04-09 15:29                 ` Jens Axboe
2020-04-13  9:20                   ` Dmitry Kadashev
2020-04-13 10:09                     ` Pavel Begunkov
2020-04-13 10:19                       ` Dmitry Kadashev

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.