All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dmitry Kadashev <dkadashev@gmail.com>
To: io-uring@vger.kernel.org
Subject: io_uring's openat doesn't work with large (2G+) files
Date: Wed, 8 Apr 2020 21:51:23 +0700	[thread overview]
Message-ID: <CAOKbgA4K4FzxTEoHHYcoOAe6oNwFvGbzcfch2sDmicJvf3Ydwg@mail.gmail.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 1226 bytes --]

Hi,

io_uring's openat seems to produce FDs that are incompatible with
large files (>2GB). If a file (smaller than 2GB) is opened using
io_uring's openat then writes -- both using io_uring and just sync
pwrite() -- past that threshold fail with EFBIG. If such a file is
opened with sync openat, then both io_uring's writes and sync writes
succeed. And if the file is larger than 2GB then io_uring's openat
fails right away, while the sync one works.

Kernel versions: 5.6.0-rc2, 5.6.0.

A couple of reproducers attached, one demos successful open with
failed writes afterwards, and another failing open (in comparison with
sync  calls).

The output of the former one for example:

*** sync openat
openat succeeded
sync write at offset 0
write succeeded
sync write at offset 4294967296
write succeeded

*** sync openat
openat succeeded
io_uring write at offset 0
write succeeded
io_uring write at offset 4294967296
write succeeded

*** io_uring openat
openat succeeded
sync write at offset 0
write succeeded
sync write at offset 4294967296
write failed: File too large

*** io_uring openat
openat succeeded
io_uring write at offset 0
write succeeded
io_uring write at offset 4294967296
write failed: File too large

-- 
Dmitry

[-- Attachment #2: test-io_uring-write-large-offset.c --]
[-- Type: text/x-csrc, Size: 3061 bytes --]

#include <liburing.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/resource.h>
#include <unistd.h>

static const int RSIZE = 2;
static const int OPEN_FLAGS = O_RDWR | O_CREAT;
static const mode_t OPEN_MODE = S_IRUSR | S_IWUSR;

#define DIE(...) do {\
		fprintf(stderr, __VA_ARGS__);\
		abort();\
	} while(0);

void do_write(struct io_uring *ring, int sync, int fd, off_t offset)
{
	fprintf(stderr, "%s write at offset %lld\n", sync ? "sync": "io_uring", offset);
	char buf[] = "some test write buf";
	int res;
	if (sync) {
		res = pwrite(fd, buf, sizeof(buf), offset);
		if (res < 0) {
			res = -errno;
		}
	}
	else {
		struct io_uring_sqe *sqe;
		sqe = io_uring_get_sqe(ring);
		if (!sqe) {
			fprintf(stderr, "failed to get sqe\n");
			return;
		}
		io_uring_prep_write(sqe, fd, buf, sizeof(buf), offset);
		int ret = io_uring_submit(ring);
		if (ret < 0) {
			fprintf(stderr, "failed to submit write: %s\n", strerror(-ret));
			return;
		}

		struct io_uring_cqe *cqe;
		ret = io_uring_wait_cqe(ring, &cqe);
		res = cqe->res;
		io_uring_cqe_seen(ring, cqe);
		if (ret < 0) {
			fprintf(stderr, "wait_cqe failed: %s\n", strerror(-ret));
			return;
		}
	}
	if (res < 0) {
		fprintf(stderr, "write failed: %s\n", strerror(-res));
	}
	else {
		fprintf(stderr, "write succeeded\n");
	}
}

void test_open_write(struct io_uring *ring, int sync_open, int sync_write, int dfd, const char* fn)
{
	fprintf(stderr, "\n*** %s openat\n", sync_open ? "sync" : "io_uring");
	struct io_uring_sqe *sqe;
	int fd = -1;
	if (sync_open) {
		fd = openat(dfd, fn, OPEN_FLAGS, OPEN_MODE);
		if (fd < 0) {
			fd = -errno;
		}
	}
	else {
		sqe = io_uring_get_sqe(ring);
		if (!sqe) {
			fprintf(stderr, "failed to get sqe\n");
			return;
		}
		io_uring_prep_openat(sqe, dfd, fn, OPEN_FLAGS, OPEN_MODE);
		int ret = io_uring_submit(ring);
		if (ret < 0) {
			fprintf(stderr, "failed to submit openat: %s\n", strerror(-ret));
			return;
		}

		struct io_uring_cqe *cqe;
		ret = io_uring_wait_cqe(ring, &cqe);
		fd = cqe->res;
		io_uring_cqe_seen(ring, cqe);
		if (ret < 0) {
			fprintf(stderr, "wait_cqe failed: %s\n", strerror(-ret));
			return;
		}
	}
	if (fd < 0) {
		fprintf(stderr, "openat failed: %s\n", strerror(-fd));
	}
	else {
		fprintf(stderr, "openat succeeded\n");
		do_write(ring, sync_write, fd, 0);
		do_write(ring, sync_write, fd, 1ull << 32);
		close(fd);
	}
}

int main()
{
	int dfd = open("/tmp", O_RDONLY | O_DIRECTORY);
	if (dfd < 0) {
		DIE("open /tmp: %s\n", strerror(errno));
	}
	struct io_uring ring;
	int ret = io_uring_queue_init(RSIZE, &ring, 0);
	if (ret < 0) {
		DIE("failed to init io_uring: %s\n", strerror(-ret));
	}

	test_open_write(&ring, 1, 1, dfd, "io_uring_openat_write_test1");
	test_open_write(&ring, 1, 0, dfd, "io_uring_openat_write_test2");
	test_open_write(&ring, 0, 1, dfd, "io_uring_openat_write_test3");
	test_open_write(&ring, 0, 0, dfd, "io_uring_openat_write_test4");

	io_uring_queue_exit(&ring);
	close(dfd);
	return 0;
}

[-- Attachment #3: test-io_uring-openat-large-file.c --]
[-- Type: text/x-csrc, Size: 2255 bytes --]

#include <liburing.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/resource.h>
#include <unistd.h>

#define DIE(...) do {\
		fprintf(stderr, __VA_ARGS__);\
		abort();\
	} while(0);

static const int RSIZE = 2;
static const int OPEN_FLAGS = O_RDWR | O_CREAT;
static const mode_t OPEN_MODE = S_IRUSR | S_IWUSR;

void open_sync(int dfd, const char* fn)
{
	int fd = openat(dfd, fn, OPEN_FLAGS, OPEN_MODE);
	if (fd < 0) {
		fprintf(stderr, "sync open failed: %s\n", strerror(errno));
	}
	else {
		fprintf(stderr, "sync open succeeded\n");
		close(fd);
	}
}

void open_io_uring(struct io_uring *ring, int dfd, const char* fn)
{
	struct io_uring_sqe *sqe;
	sqe = io_uring_get_sqe(ring);
	if (!sqe) {
		fprintf(stderr, "failed to get sqe\n");
		return;
	}
	io_uring_prep_openat(sqe, dfd, fn, OPEN_FLAGS, OPEN_MODE);
	int ret = io_uring_submit(ring);
	if (ret < 0) {
		fprintf(stderr, "failed to submit openat: %s\n", strerror(-ret));
		return;
	}

	struct io_uring_cqe *cqe;
	ret = io_uring_wait_cqe(ring, &cqe);
	int fd = cqe->res;
	io_uring_cqe_seen(ring, cqe);
	if (ret < 0) {
		fprintf(stderr, "wait_cqe failed: %s\n", strerror(-ret));
	}
	else if (fd < 0) {
		fprintf(stderr, "io_uring openat failed: %s\n", strerror(-fd));
	}
	else {
		fprintf(stderr, "io_uring openat succeeded\n");
		close(fd);
	}
}

int prepare_file(int dfd, const char* fn)
{
	const char buf[] = "foo";
	int fd = openat(dfd, fn, OPEN_FLAGS, OPEN_MODE);
	if (fd < 0) {
		fprintf(stderr, "prepare/open: %s\n", strerror(errno));
		return -1;
	}
	int res = pwrite(fd, buf, sizeof(buf), 1ull << 32);
	if (res < 0) {
		fprintf(stderr, "prepare/pwrite: %s\n", strerror(errno));
	}
	close(fd);
	return res < 0 ? res : 0;
}

int main()
{
	const char *fn = "io_uring_openat_test";
	int dfd = open("/tmp", O_RDONLY | O_DIRECTORY);
	if (dfd < 0) {
		DIE("open /tmp: %s\n", strerror(errno));
	}
	struct io_uring ring;
	int ret = io_uring_queue_init(RSIZE, &ring, 0);
	if (ret < 0) {
		DIE("failed to init io_uring: %s\n", strerror(-ret));
	}

	if (!prepare_file(dfd, fn)) {
		open_sync(dfd, fn);
		open_io_uring(&ring, dfd, fn);
	}

	io_uring_queue_exit(&ring);
	close(dfd);
	return 0;
}

             reply	other threads:[~2020-04-08 14:51 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-04-08 14:51 Dmitry Kadashev [this message]
2020-04-08 15:19 ` io_uring's openat doesn't work with large (2G+) files Jens Axboe
2020-04-08 15:30   ` Dmitry Kadashev
2020-04-08 15:36     ` Jens Axboe
2020-04-08 15:41       ` Dmitry Kadashev
2020-04-08 15:49         ` Jens Axboe
2020-04-08 16:12           ` Dmitry Kadashev
2020-04-08 16:26             ` Jens Axboe
2020-04-09  3:50               ` Dmitry Kadashev
2020-04-09 15:29                 ` Jens Axboe
2020-04-13  9:20                   ` Dmitry Kadashev
2020-04-13 10:09                     ` Pavel Begunkov
2020-04-13 10:19                       ` Dmitry Kadashev

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAOKbgA4K4FzxTEoHHYcoOAe6oNwFvGbzcfch2sDmicJvf3Ydwg@mail.gmail.com \
    --to=dkadashev@gmail.com \
    --cc=io-uring@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.