io-uring.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 0/4] fsstress,fsx: add io_uring test and do some fix
@ 2020-08-23  6:30 Zorro Lang
  2020-08-23  6:30 ` [PATCH v3 1/4] fsstress: add IO_URING read and write operations Zorro Lang
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Zorro Lang @ 2020-08-23  6:30 UTC (permalink / raw)
  To: fstests; +Cc: axboe, io-uring

This patchset tries to add new IO_URING test into fsstress [1/4] and fsx [4/4].
And then do some changes and bug fix by the way [2/4 and 3/4].

fsstress and fsx are important tools in xfstests to do filesystem I/Os test,
lots of test cases use it. So add IO_URING operation into fsstress and fsx
will help to cover IO_URING test from fs side.

I'm not an IO_URING expert, so cc io-uring@ list, please feel free to
tell me if you find something wrong or have any suggestions to improve
the test.

V2 did below changes:
1) 1/4 change the definition of URING_ENTRIES to 1
2) 2/4 change the difinition of AIO_ENTRIES to 1, undo an unrelated changed line
3) 4/4 turn to use io_uring_prep_readv/io_uring_prep_writev, due to old
       liburing(0.2-2) doesn't support io_uring_prep_read/io_uring_prep_write.

V3 changed io_uring_submit(&ring) to io_uring_submit_and_wait(&ring, 1). I'm
not sure if this's the real mean of Jens Axboe's review point, please check.
  https://marc.info/?l=fstests&m=159811932808057&w=2

Thanks,
Zorro




^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v3 1/4] fsstress: add IO_URING read and write operations
  2020-08-23  6:30 [PATCH v3 0/4] fsstress,fsx: add io_uring test and do some fix Zorro Lang
@ 2020-08-23  6:30 ` Zorro Lang
  2020-09-03 12:42   ` Brian Foster
  2020-08-23  6:30 ` [PATCH v3 2/4] fsstress: reduce the number of events when io_setup Zorro Lang
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 12+ messages in thread
From: Zorro Lang @ 2020-08-23  6:30 UTC (permalink / raw)
  To: fstests; +Cc: axboe, io-uring

IO_URING is a new feature of curent linux kernel, add basic IO_URING
read/write into fsstess to cover this kind of IO testing.

Signed-off-by: Zorro Lang <zlang@redhat.com>
---
 README                 |   4 +-
 configure.ac           |   1 +
 include/builddefs.in   |   1 +
 ltp/Makefile           |   5 ++
 ltp/fsstress.c         | 139 ++++++++++++++++++++++++++++++++++++++++-
 m4/Makefile            |   1 +
 m4/package_liburing.m4 |   4 ++
 7 files changed, 152 insertions(+), 3 deletions(-)
 create mode 100644 m4/package_liburing.m4

diff --git a/README b/README
index d0e23fcd..ae0f804d 100644
--- a/README
+++ b/README
@@ -8,13 +8,13 @@ _______________________
 	sudo apt-get install xfslibs-dev uuid-dev libtool-bin \
 	e2fsprogs automake gcc libuuid1 quota attr libattr1-dev make \
 	libacl1-dev libaio-dev xfsprogs libgdbm-dev gawk fio dbench \
-	uuid-runtime python sqlite3
+	uuid-runtime python sqlite3 liburing-dev
   For Fedora, RHEL, or CentOS:
 	yum install acl attr automake bc dbench dump e2fsprogs fio \
 	gawk gcc indent libtool lvm2 make psmisc quota sed \
 	xfsdump xfsprogs \
 	libacl-devel libattr-devel libaio-devel libuuid-devel \
-	xfsprogs-devel btrfs-progs-devel python sqlite
+	xfsprogs-devel btrfs-progs-devel python sqlite liburing-devel
 	(Older distributions may require xfsprogs-qa-devel as well.)
 	(Note that for RHEL and CentOS, you may need the EPEL repo.)
 - run make
diff --git a/configure.ac b/configure.ac
index 4bb50b32..8922c47e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -61,6 +61,7 @@ AC_PACKAGE_NEED_ACLINIT_LIBACL
 
 AC_PACKAGE_WANT_GDBM
 AC_PACKAGE_WANT_AIO
+AC_PACKAGE_WANT_URING
 AC_PACKAGE_WANT_DMAPI
 AC_PACKAGE_WANT_LINUX_FIEMAP_H
 AC_PACKAGE_WANT_FALLOCATE
diff --git a/include/builddefs.in b/include/builddefs.in
index e7894b1a..fded3230 100644
--- a/include/builddefs.in
+++ b/include/builddefs.in
@@ -61,6 +61,7 @@ RPM_VERSION     = @rpm_version@
 ENABLE_SHARED = @enable_shared@
 HAVE_DB = @have_db@
 HAVE_AIO = @have_aio@
+HAVE_URING = @have_uring@
 HAVE_FALLOCATE = @have_fallocate@
 HAVE_OPEN_BY_HANDLE_AT = @have_open_by_handle_at@
 HAVE_DMAPI = @have_dmapi@
diff --git a/ltp/Makefile b/ltp/Makefile
index ebf40336..198d930f 100644
--- a/ltp/Makefile
+++ b/ltp/Makefile
@@ -24,6 +24,11 @@ LCFLAGS += -DAIO
 LLDLIBS += -laio -lpthread
 endif
 
+ifeq ($(HAVE_URING), true)
+LCFLAGS += -DURING
+LLDLIBS += -luring
+endif
+
 ifeq ($(HAVE_LIBBTRFSUTIL), true)
 LLDLIBS += -lbtrfsutil
 endif
diff --git a/ltp/fsstress.c b/ltp/fsstress.c
index 709fdeec..7a0e278a 100644
--- a/ltp/fsstress.c
+++ b/ltp/fsstress.c
@@ -30,6 +30,11 @@
 #include <libaio.h>
 io_context_t	io_ctx;
 #endif
+#ifdef URING
+#include <liburing.h>
+#define URING_ENTRIES	1
+struct io_uring	ring;
+#endif
 #include <sys/syscall.h>
 #include <sys/xattr.h>
 
@@ -139,6 +144,8 @@ typedef enum {
 	OP_TRUNCATE,
 	OP_UNLINK,
 	OP_UNRESVSP,
+	OP_URING_READ,
+	OP_URING_WRITE,
 	OP_WRITE,
 	OP_WRITEV,
 	OP_LAST
@@ -267,6 +274,8 @@ void	sync_f(int, long);
 void	truncate_f(int, long);
 void	unlink_f(int, long);
 void	unresvsp_f(int, long);
+void	uring_read_f(int, long);
+void	uring_write_f(int, long);
 void	write_f(int, long);
 void	writev_f(int, long);
 char	*xattr_flag_to_string(int);
@@ -335,6 +344,8 @@ opdesc_t	ops[] = {
 	{ OP_TRUNCATE, "truncate", truncate_f, 2, 1 },
 	{ OP_UNLINK, "unlink", unlink_f, 1, 1 },
 	{ OP_UNRESVSP, "unresvsp", unresvsp_f, 1, 1 },
+	{ OP_URING_READ, "uring_read", uring_read_f, 1, 0 },
+	{ OP_URING_WRITE, "uring_write", uring_write_f, 1, 1 },
 	{ OP_WRITE, "write", write_f, 4, 1 },
 	{ OP_WRITEV, "writev", writev_f, 4, 1 },
 }, *ops_end;
@@ -692,6 +703,12 @@ int main(int argc, char **argv)
 				fprintf(stderr, "io_setup failed");
 				exit(1);
 			}
+#endif
+#ifdef URING
+			if (io_uring_queue_init(URING_ENTRIES, &ring, 0)) {
+				fprintf(stderr, "io_uring_queue_init failed\n");
+				exit(1);
+			}
 #endif
 			for (i = 0; !loops || (i < loops); i++)
 				doproc();
@@ -701,7 +718,9 @@ int main(int argc, char **argv)
 				return 1;
 			}
 #endif
-
+#ifdef URING
+			io_uring_queue_exit(&ring);
+#endif
 			cleanup_flist();
 			free(freq_table);
 			return 0;
@@ -2170,6 +2189,108 @@ do_aio_rw(int opno, long r, int flags)
 }
 #endif
 
+#ifdef URING
+void
+do_uring_rw(int opno, long r, int flags)
+{
+	char		*buf;
+	int		e;
+	pathname_t	f;
+	int		fd;
+	size_t		len;
+	int64_t		lr;
+	off64_t		off;
+	struct stat64	stb;
+	int		v;
+	char		st[1024];
+	struct io_uring_sqe	*sqe;
+	struct io_uring_cqe	*cqe;
+	struct iovec	iovec;
+	int		iswrite = (flags & (O_WRONLY | O_RDWR)) ? 1 : 0;
+
+	init_pathname(&f);
+	if (!get_fname(FT_REGFILE, r, &f, NULL, NULL, &v)) {
+		if (v)
+			printf("%d/%d: do_uring_rw - no filename\n", procid, opno);
+		goto uring_out3;
+	}
+	fd = open_path(&f, flags);
+	e = fd < 0 ? errno : 0;
+	check_cwd();
+	if (fd < 0) {
+		if (v)
+			printf("%d/%d: do_uring_rw - open %s failed %d\n",
+			       procid, opno, f.path, e);
+		goto uring_out3;
+	}
+	if (fstat64(fd, &stb) < 0) {
+		if (v)
+			printf("%d/%d: do_uring_rw - fstat64 %s failed %d\n",
+			       procid, opno, f.path, errno);
+		goto uring_out2;
+	}
+	inode_info(st, sizeof(st), &stb, v);
+	if (!iswrite && stb.st_size == 0) {
+		if (v)
+			printf("%d/%d: do_uring_rw - %s%s zero size\n", procid, opno,
+			       f.path, st);
+		goto uring_out2;
+	}
+	sqe = io_uring_get_sqe(&ring);
+	if (!sqe) {
+		if (v)
+			printf("%d/%d: do_uring_rw - io_uring_get_sqe failed\n",
+			       procid, opno);
+		goto uring_out2;
+	}
+	lr = ((int64_t)random() << 32) + random();
+	len = (random() % FILELEN_MAX) + 1;
+	buf = malloc(len);
+	if (!buf) {
+		if (v)
+			printf("%d/%d: do_uring_rw - malloc failed\n",
+			       procid, opno);
+		goto uring_out2;
+	}
+	iovec.iov_base = buf;
+	iovec.iov_len = len;
+	if (iswrite) {
+		off = (off64_t)(lr % MIN(stb.st_size + (1024 * 1024), MAXFSIZE));
+		off %= maxfsize;
+		memset(buf, nameseq & 0xff, len);
+		io_uring_prep_writev(sqe, fd, &iovec, 1, off);
+	} else {
+		off = (off64_t)(lr % stb.st_size);
+		io_uring_prep_readv(sqe, fd, &iovec, 1, off);
+	}
+
+	if ((e = io_uring_submit_and_wait(&ring, 1)) != 1) {
+		if (v)
+			printf("%d/%d: %s - io_uring_submit failed %d\n", procid, opno,
+			       iswrite ? "uring_write" : "uring_read", e);
+		goto uring_out1;
+	}
+	if ((e = io_uring_wait_cqe(&ring, &cqe)) < 0) {
+		if (v)
+			printf("%d/%d: %s - io_uring_wait_cqe failed %d\n", procid, opno,
+			       iswrite ? "uring_write" : "uring_read", e);
+		goto uring_out1;
+	}
+	if (v)
+		printf("%d/%d: %s %s%s [%lld, %d(res=%d)] %d\n",
+		       procid, opno, iswrite ? "uring_write" : "uring_read",
+		       f.path, st, (long long)off, (int)len, cqe->res, e);
+	io_uring_cqe_seen(&ring, cqe);
+
+ uring_out1:
+	free(buf);
+ uring_out2:
+	close(fd);
+ uring_out3:
+	free_pathname(&f);
+}
+#endif
+
 void
 aread_f(int opno, long r)
 {
@@ -5044,6 +5165,22 @@ unresvsp_f(int opno, long r)
 	close(fd);
 }
 
+void
+uring_read_f(int opno, long r)
+{
+#ifdef URING
+	do_uring_rw(opno, r, O_RDONLY);
+#endif
+}
+
+void
+uring_write_f(int opno, long r)
+{
+#ifdef URING
+	do_uring_rw(opno, r, O_WRONLY);
+#endif
+}
+
 void
 write_f(int opno, long r)
 {
diff --git a/m4/Makefile b/m4/Makefile
index 7fbff822..0352534d 100644
--- a/m4/Makefile
+++ b/m4/Makefile
@@ -14,6 +14,7 @@ LSRCFILES = \
 	package_dmapidev.m4 \
 	package_globals.m4 \
 	package_libcdev.m4 \
+	package_liburing.m4 \
 	package_ncurses.m4 \
 	package_pthread.m4 \
 	package_ssldev.m4 \
diff --git a/m4/package_liburing.m4 b/m4/package_liburing.m4
new file mode 100644
index 00000000..c92cc02a
--- /dev/null
+++ b/m4/package_liburing.m4
@@ -0,0 +1,4 @@
+AC_DEFUN([AC_PACKAGE_WANT_URING],
+  [ AC_CHECK_HEADERS(liburing.h, [ have_uring=true ], [ have_uring=false ])
+    AC_SUBST(have_uring)
+  ])
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 2/4] fsstress: reduce the number of events when io_setup
  2020-08-23  6:30 [PATCH v3 0/4] fsstress,fsx: add io_uring test and do some fix Zorro Lang
  2020-08-23  6:30 ` [PATCH v3 1/4] fsstress: add IO_URING read and write operations Zorro Lang
@ 2020-08-23  6:30 ` Zorro Lang
  2020-09-03 12:42   ` Brian Foster
  2020-08-23  6:30 ` [PATCH v3 3/4] fsstress: fix memory leak in do_aio_rw Zorro Lang
  2020-08-23  6:30 ` [PATCH v3 4/4] fsx: add IO_URING test Zorro Lang
  3 siblings, 1 reply; 12+ messages in thread
From: Zorro Lang @ 2020-08-23  6:30 UTC (permalink / raw)
  To: fstests; +Cc: axboe, io-uring

The original number(128) of aio events for io_setup too big. When try
to run lots of fsstress processes(e.g. -p 1000) always hit io_setup
EAGAIN error, due to the nr_events exceeds the limit of available
events. Due to each fsstress process only does once libaio read/write
operation each time. So reduce the aio events number to 1, to make more
fsstress processes can do AIO test.

Signed-off-by: Zorro Lang <zlang@redhat.com>
---
 ltp/fsstress.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/ltp/fsstress.c b/ltp/fsstress.c
index 7a0e278a..ef2017a8 100644
--- a/ltp/fsstress.c
+++ b/ltp/fsstress.c
@@ -28,6 +28,7 @@
 #endif
 #ifdef AIO
 #include <libaio.h>
+#define AIO_ENTRIES	1
 io_context_t	io_ctx;
 #endif
 #ifdef URING
@@ -699,8 +700,8 @@ int main(int argc, char **argv)
 			}
 			procid = i;
 #ifdef AIO
-			if (io_setup(128, &io_ctx) != 0) {
-				fprintf(stderr, "io_setup failed");
+			if (io_setup(AIO_ENTRIES, &io_ctx) != 0) {
+				fprintf(stderr, "io_setup failed\n");
 				exit(1);
 			}
 #endif
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 3/4] fsstress: fix memory leak in do_aio_rw
  2020-08-23  6:30 [PATCH v3 0/4] fsstress,fsx: add io_uring test and do some fix Zorro Lang
  2020-08-23  6:30 ` [PATCH v3 1/4] fsstress: add IO_URING read and write operations Zorro Lang
  2020-08-23  6:30 ` [PATCH v3 2/4] fsstress: reduce the number of events when io_setup Zorro Lang
@ 2020-08-23  6:30 ` Zorro Lang
  2020-09-03 12:43   ` Brian Foster
  2020-08-23  6:30 ` [PATCH v3 4/4] fsx: add IO_URING test Zorro Lang
  3 siblings, 1 reply; 12+ messages in thread
From: Zorro Lang @ 2020-08-23  6:30 UTC (permalink / raw)
  To: fstests; +Cc: axboe, io-uring

If io_submit or io_getevents fails, the do_aio_rw() won't free the
"buf" and cause memory leak.

Signed-off-by: Zorro Lang <zlang@redhat.com>
---
 ltp/fsstress.c | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/ltp/fsstress.c b/ltp/fsstress.c
index ef2017a8..17b024b5 100644
--- a/ltp/fsstress.c
+++ b/ltp/fsstress.c
@@ -2099,8 +2099,7 @@ do_aio_rw(int opno, long r, int flags)
 	if (!get_fname(FT_REGFILE, r, &f, NULL, NULL, &v)) {
 		if (v)
 			printf("%d/%d: do_aio_rw - no filename\n", procid, opno);
-		free_pathname(&f);
-		return;
+		goto aio_out3;
 	}
 	fd = open_path(&f, flags|O_DIRECT);
 	e = fd < 0 ? errno : 0;
@@ -2109,16 +2108,13 @@ do_aio_rw(int opno, long r, int flags)
 		if (v)
 			printf("%d/%d: do_aio_rw - open %s failed %d\n",
 			       procid, opno, f.path, e);
-		free_pathname(&f);
-		return;
+		goto aio_out3;
 	}
 	if (fstat64(fd, &stb) < 0) {
 		if (v)
 			printf("%d/%d: do_aio_rw - fstat64 %s failed %d\n",
 			       procid, opno, f.path, errno);
-		free_pathname(&f);
-		close(fd);
-		return;
+		goto aio_out2;
 	}
 	inode_info(st, sizeof(st), &stb, v);
 	if (!iswrite && stb.st_size == 0) {
@@ -2150,6 +2146,12 @@ do_aio_rw(int opno, long r, int flags)
 	else if (len > diob.d_maxiosz)
 		len = diob.d_maxiosz;
 	buf = memalign(diob.d_mem, len);
+	if (!buf) {
+		if (v)
+			printf("%d/%d: do_aio_rw - memalign failed\n",
+			       procid, opno);
+		goto aio_out2;
+	}
 
 	if (iswrite) {
 		off = (off64_t)(lr % MIN(stb.st_size + (1024 * 1024), MAXFSIZE));
@@ -2166,27 +2168,26 @@ do_aio_rw(int opno, long r, int flags)
 		if (v)
 			printf("%d/%d: %s - io_submit failed %d\n",
 			       procid, opno, iswrite ? "awrite" : "aread", e);
-		free_pathname(&f);
-		close(fd);
-		return;
+		goto aio_out1;
 	}
 	if ((e = io_getevents(io_ctx, 1, 1, &event, NULL)) != 1) {
 		if (v)
 			printf("%d/%d: %s - io_getevents failed %d\n",
 			       procid, opno, iswrite ? "awrite" : "aread", e);
-		free_pathname(&f);
-		close(fd);
-		return;
+		goto aio_out1;
 	}
 
 	e = event.res != len ? event.res2 : 0;
-	free(buf);
 	if (v)
 		printf("%d/%d: %s %s%s [%lld,%d] %d\n",
 		       procid, opno, iswrite ? "awrite" : "aread",
 		       f.path, st, (long long)off, (int)len, e);
-	free_pathname(&f);
+ aio_out1:
+	free(buf);
+ aio_out2:
 	close(fd);
+ aio_out3:
+	free_pathname(&f);
 }
 #endif
 
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 4/4] fsx: add IO_URING test
  2020-08-23  6:30 [PATCH v3 0/4] fsstress,fsx: add io_uring test and do some fix Zorro Lang
                   ` (2 preceding siblings ...)
  2020-08-23  6:30 ` [PATCH v3 3/4] fsstress: fix memory leak in do_aio_rw Zorro Lang
@ 2020-08-23  6:30 ` Zorro Lang
  2020-09-03 12:44   ` Brian Foster
  3 siblings, 1 reply; 12+ messages in thread
From: Zorro Lang @ 2020-08-23  6:30 UTC (permalink / raw)
  To: fstests; +Cc: axboe, io-uring

New IO_URING test for fsx, use -U option to enable IO_URING test.

Signed-off-by: Zorro Lang <zlang@redhat.com>
---
 ltp/fsx.c | 158 +++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 144 insertions(+), 14 deletions(-)

diff --git a/ltp/fsx.c b/ltp/fsx.c
index 7c76655a..05663528 100644
--- a/ltp/fsx.c
+++ b/ltp/fsx.c
@@ -34,6 +34,9 @@
 #ifdef AIO
 #include <libaio.h>
 #endif
+#ifdef URING
+#include <liburing.h>
+#endif
 #include <sys/syscall.h>
 
 #ifndef MAP_FILE
@@ -176,21 +179,17 @@ int	integrity = 0;			/* -i flag */
 int	fsxgoodfd = 0;
 int	o_direct;			/* -Z */
 int	aio = 0;
+int	uring = 0;
 int	mark_nr = 0;
 
 int page_size;
 int page_mask;
 int mmap_mask;
-#ifdef AIO
-int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
+int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
 #define READ 0
 #define WRITE 1
-#define fsxread(a,b,c,d)	aio_rw(READ, a,b,c,d)
-#define fsxwrite(a,b,c,d)	aio_rw(WRITE, a,b,c,d)
-#else
-#define fsxread(a,b,c,d)	read(a,b,c)
-#define fsxwrite(a,b,c,d)	write(a,b,c)
-#endif
+#define fsxread(a,b,c,d)	fsx_rw(READ, a,b,c,d)
+#define fsxwrite(a,b,c,d)	fsx_rw(WRITE, a,b,c,d)
 
 const char *replayops = NULL;
 const char *recordops = NULL;
@@ -2242,7 +2241,7 @@ void
 usage(void)
 {
 	fprintf(stdout, "usage: %s",
-		"fsx [-dknqxABEFJLOWZ] [-b opnum] [-c Prob] [-g filldata] [-i logdev] [-j logid] [-l flen] [-m start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] fname\n\
+		"fsx [-dknqxBEFJLOWZ][-A|-U] [-b opnum] [-c Prob] [-g filldata] [-i logdev] [-j logid] [-l flen] [-m start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] fname\n\
 	-b opnum: beginning operation number (default 1)\n\
 	-c P: 1 in P chance of file close+open at each op (default infinity)\n\
 	-d: debug output for all operations\n\
@@ -2265,7 +2264,10 @@ usage(void)
 	-y synchronize changes to a file\n"
 
 #ifdef AIO
-"	-A: Use the AIO system calls\n"
+"	-A: Use the AIO system calls, -A excludes -U\n"
+#endif
+#ifdef URING
+"	-U: Use the IO_URING system calls, -U excludes -A\n"
 #endif
 "	-D startingop: debug output starting at specified operation\n"
 #ifdef HAVE_LINUX_FALLOC_H
@@ -2425,13 +2427,131 @@ out_error:
 	errno = -ret;
 	return -1;
 }
+#endif
+
+#ifdef URING
+struct io_uring ring;
+#define URING_ENTRIES	1024
+int
+uring_setup()
+{
+	int ret;
+
+	ret = io_uring_queue_init(URING_ENTRIES, &ring, 0);
+	if (ret != 0) {
+		fprintf(stderr, "uring_setup: io_uring_queue_init failed: %s\n",
+                        strerror(ret));
+                return -1;
+        }
+        return 0;
+}
 
-int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+int
+__uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
 {
+	struct io_uring_sqe	*sqe;
+	struct io_uring_cqe	*cqe;
+	struct iovec		iovec;
 	int ret;
+	int res, res2 = 0;
+	char *p = buf;
+	unsigned l = len;
+	unsigned o = offset;
+
+
+	/*
+	 * Due to io_uring tries non-blocking IOs (especially read), that
+	 * always cause 'normal' short reading. To avoid this short read
+	 * fail, try to loop read/write (escpecilly read) data.
+	 */
+ uring_loop:
+	sqe = io_uring_get_sqe(&ring);
+	if (!sqe) {
+		fprintf(stderr, "uring_rw: io_uring_get_sqe failed: %s\n",
+		        strerror(errno));
+		return -1;
+        }
+
+	iovec.iov_base = p;
+	iovec.iov_len = l;
+	if (rw == READ) {
+		io_uring_prep_readv(sqe, fd, &iovec, 1, o);
+	} else {
+		io_uring_prep_writev(sqe, fd, &iovec, 1, o);
+	}
+
+	ret = io_uring_submit_and_wait(&ring, 1);
+	if (ret != 1) {
+		fprintf(stderr, "errcode=%d\n", -ret);
+		fprintf(stderr, "uring %s: io_uring_submit failed: %s\n",
+		        rw == READ ? "read":"write", strerror(-ret));
+		goto uring_error;
+	}
+
+	ret = io_uring_wait_cqe(&ring, &cqe);
+	if (ret < 0) {
+		if (ret == 0)
+			fprintf(stderr, "uring %s: no events available\n",
+			        rw == READ ? "read":"write");
+		else {
+			fprintf(stderr, "errcode=%d\n", -ret);
+			fprintf(stderr, "uring %s: io_uring_wait_cqe failed: %s\n",
+			        rw == READ ? "read":"write", strerror(-ret));
+		}
+		goto uring_error;
+	}
+	res = cqe->res;
+	io_uring_cqe_seen(&ring, cqe);
+
+	res2 += res;
+	if (len != res2) {
+		if (res > 0) {
+			o += res;
+			l -= res;
+			p += res;
+			if (l > 0)
+				goto uring_loop;
+		} else if (res < 0) {
+			ret = res;
+			fprintf(stderr, "errcode=%d\n", -ret);
+			fprintf(stderr, "uring %s: io_uring failed: %s\n",
+			        rw == READ ? "read":"write", strerror(-ret));
+			goto uring_error;
+		} else {
+			fprintf(stderr, "uring %s bad io length: %d instead of %u\n",
+			        rw == READ ? "read":"write", res2, len);
+		}
+	}
+	return res2;
+
+ uring_error:
+	/*
+	 * The caller expects error return in traditional libc
+	 * convention, i.e. -1 and the errno set to error.
+	 */
+	errno = -ret;
+	return -1;
+}
+#endif
+
+int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+{
+	int ret = -1;
 
 	if (aio) {
+#ifdef AIO
 		ret = __aio_rw(rw, fd, buf, len, offset);
+#elif
+		fprintf(stderr, "io_rw: need AIO support!\n");
+		exit(111);
+#endif
+	} else if (uring) {
+#ifdef URING
+		ret = __uring_rw(rw, fd, buf, len, offset);
+#elif
+		fprintf(stderr, "io_rw: need IO_URING support!\n");
+		exit(111);
+#endif
 	} else {
 		if (rw == READ)
 			ret = read(fd, buf, len);
@@ -2441,8 +2561,6 @@ int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
 	return ret;
 }
 
-#endif
-
 #define test_fallocate(mode) __test_fallocate(mode, #mode)
 
 int
@@ -2496,7 +2614,7 @@ main(int argc, char **argv)
 	setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
 
 	while ((ch = getopt_long(argc, argv,
-				 "b:c:dfg:i:j:kl:m:no:p:qr:s:t:w:xyABD:EFJKHzCILN:OP:RS:WXZ",
+				 "b:c:dfg:i:j:kl:m:no:p:qr:s:t:w:xyABD:EFJKHzCILN:OP:RS:UWXZ",
 				 longopts, NULL)) != EOF)
 		switch (ch) {
 		case 'b':
@@ -2604,6 +2722,9 @@ main(int argc, char **argv)
 		case 'A':
 		        aio = 1;
 			break;
+		case 'U':
+		        uring = 1;
+			break;
 		case 'D':
 			debugstart = getnum(optarg, &endp);
 			if (debugstart < 1)
@@ -2694,6 +2815,11 @@ main(int argc, char **argv)
 	if (argc != 1)
 		usage();
 
+	if (aio && uring) {
+		fprintf(stderr, "-A and -U shouldn't be used together\n");
+		usage();
+	}
+
 	if (integrity && !dirpath) {
 		fprintf(stderr, "option -i <logdev> requires -P <dirpath>\n");
 		usage();
@@ -2784,6 +2910,10 @@ main(int argc, char **argv)
 	if (aio) 
 		aio_setup();
 #endif
+#ifdef URING
+	if (uring)
+		uring_setup();
+#endif
 
 	if (!(o_flags & O_TRUNC)) {
 		off_t ret;
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 1/4] fsstress: add IO_URING read and write operations
  2020-08-23  6:30 ` [PATCH v3 1/4] fsstress: add IO_URING read and write operations Zorro Lang
@ 2020-09-03 12:42   ` Brian Foster
  2020-09-03 14:07     ` Jens Axboe
  0 siblings, 1 reply; 12+ messages in thread
From: Brian Foster @ 2020-09-03 12:42 UTC (permalink / raw)
  To: Zorro Lang; +Cc: fstests, axboe, io-uring

On Sun, Aug 23, 2020 at 02:30:29PM +0800, Zorro Lang wrote:
> IO_URING is a new feature of curent linux kernel, add basic IO_URING
> read/write into fsstess to cover this kind of IO testing.
> 
> Signed-off-by: Zorro Lang <zlang@redhat.com>
> ---
>  README                 |   4 +-
>  configure.ac           |   1 +
>  include/builddefs.in   |   1 +
>  ltp/Makefile           |   5 ++
>  ltp/fsstress.c         | 139 ++++++++++++++++++++++++++++++++++++++++-
>  m4/Makefile            |   1 +
>  m4/package_liburing.m4 |   4 ++
>  7 files changed, 152 insertions(+), 3 deletions(-)
>  create mode 100644 m4/package_liburing.m4
> 
...
> diff --git a/ltp/fsstress.c b/ltp/fsstress.c
> index 709fdeec..7a0e278a 100644
> --- a/ltp/fsstress.c
> +++ b/ltp/fsstress.c
...
> @@ -2170,6 +2189,108 @@ do_aio_rw(int opno, long r, int flags)
>  }
>  #endif
>  
> +#ifdef URING
> +void
> +do_uring_rw(int opno, long r, int flags)
> +{
> +	char		*buf;
> +	int		e;
> +	pathname_t	f;
> +	int		fd;
> +	size_t		len;
> +	int64_t		lr;
> +	off64_t		off;
> +	struct stat64	stb;
> +	int		v;
> +	char		st[1024];
> +	struct io_uring_sqe	*sqe;
> +	struct io_uring_cqe	*cqe;
> +	struct iovec	iovec;
> +	int		iswrite = (flags & (O_WRONLY | O_RDWR)) ? 1 : 0;
> +
> +	init_pathname(&f);
> +	if (!get_fname(FT_REGFILE, r, &f, NULL, NULL, &v)) {
> +		if (v)
> +			printf("%d/%d: do_uring_rw - no filename\n", procid, opno);
> +		goto uring_out3;
> +	}
> +	fd = open_path(&f, flags);
> +	e = fd < 0 ? errno : 0;
> +	check_cwd();
> +	if (fd < 0) {
> +		if (v)
> +			printf("%d/%d: do_uring_rw - open %s failed %d\n",
> +			       procid, opno, f.path, e);
> +		goto uring_out3;
> +	}
> +	if (fstat64(fd, &stb) < 0) {
> +		if (v)
> +			printf("%d/%d: do_uring_rw - fstat64 %s failed %d\n",
> +			       procid, opno, f.path, errno);
> +		goto uring_out2;
> +	}
> +	inode_info(st, sizeof(st), &stb, v);
> +	if (!iswrite && stb.st_size == 0) {
> +		if (v)
> +			printf("%d/%d: do_uring_rw - %s%s zero size\n", procid, opno,
> +			       f.path, st);
> +		goto uring_out2;
> +	}
> +	sqe = io_uring_get_sqe(&ring);
> +	if (!sqe) {
> +		if (v)
> +			printf("%d/%d: do_uring_rw - io_uring_get_sqe failed\n",
> +			       procid, opno);
> +		goto uring_out2;
> +	}

I'm not familiar with the io_uring bits, but do we have to do anything
to clean up this sqe object (or the cqe) before we return?

> +	lr = ((int64_t)random() << 32) + random();
> +	len = (random() % FILELEN_MAX) + 1;
> +	buf = malloc(len);
> +	if (!buf) {
> +		if (v)
> +			printf("%d/%d: do_uring_rw - malloc failed\n",
> +			       procid, opno);
> +		goto uring_out2;
> +	}
> +	iovec.iov_base = buf;
> +	iovec.iov_len = len;
> +	if (iswrite) {
> +		off = (off64_t)(lr % MIN(stb.st_size + (1024 * 1024), MAXFSIZE));
> +		off %= maxfsize;
> +		memset(buf, nameseq & 0xff, len);
> +		io_uring_prep_writev(sqe, fd, &iovec, 1, off);
> +	} else {
> +		off = (off64_t)(lr % stb.st_size);
> +		io_uring_prep_readv(sqe, fd, &iovec, 1, off);
> +	}
> +
> +	if ((e = io_uring_submit_and_wait(&ring, 1)) != 1) {
> +		if (v)
> +			printf("%d/%d: %s - io_uring_submit failed %d\n", procid, opno,
> +			       iswrite ? "uring_write" : "uring_read", e);
> +		goto uring_out1;
> +	}
> +	if ((e = io_uring_wait_cqe(&ring, &cqe)) < 0) {
> +		if (v)
> +			printf("%d/%d: %s - io_uring_wait_cqe failed %d\n", procid, opno,
> +			       iswrite ? "uring_write" : "uring_read", e);
> +		goto uring_out1;
> +	}
> +	if (v)
> +		printf("%d/%d: %s %s%s [%lld, %d(res=%d)] %d\n",
> +		       procid, opno, iswrite ? "uring_write" : "uring_read",
> +		       f.path, st, (long long)off, (int)len, cqe->res, e);
> +	io_uring_cqe_seen(&ring, cqe);
> +
> + uring_out1:
> +	free(buf);
> + uring_out2:
> +	close(fd);
> + uring_out3:
> +	free_pathname(&f);

It looks like the free_pathname() call is unconditional on exit. Could
we just initialize the other two variables properly and have something
like:

{
	...
out:
	if (buf)
		free(buf);
	if (fd != -1)
		close(fd);
	free_pathname(&f);
}

... and then we don't have to worry about using three different exit
labels in the right places?

Brian

> +}
> +#endif
> +
>  void
>  aread_f(int opno, long r)
>  {
> @@ -5044,6 +5165,22 @@ unresvsp_f(int opno, long r)
>  	close(fd);
>  }
>  
> +void
> +uring_read_f(int opno, long r)
> +{
> +#ifdef URING
> +	do_uring_rw(opno, r, O_RDONLY);
> +#endif
> +}
> +
> +void
> +uring_write_f(int opno, long r)
> +{
> +#ifdef URING
> +	do_uring_rw(opno, r, O_WRONLY);
> +#endif
> +}
> +
>  void
>  write_f(int opno, long r)
>  {
> diff --git a/m4/Makefile b/m4/Makefile
> index 7fbff822..0352534d 100644
> --- a/m4/Makefile
> +++ b/m4/Makefile
> @@ -14,6 +14,7 @@ LSRCFILES = \
>  	package_dmapidev.m4 \
>  	package_globals.m4 \
>  	package_libcdev.m4 \
> +	package_liburing.m4 \
>  	package_ncurses.m4 \
>  	package_pthread.m4 \
>  	package_ssldev.m4 \
> diff --git a/m4/package_liburing.m4 b/m4/package_liburing.m4
> new file mode 100644
> index 00000000..c92cc02a
> --- /dev/null
> +++ b/m4/package_liburing.m4
> @@ -0,0 +1,4 @@
> +AC_DEFUN([AC_PACKAGE_WANT_URING],
> +  [ AC_CHECK_HEADERS(liburing.h, [ have_uring=true ], [ have_uring=false ])
> +    AC_SUBST(have_uring)
> +  ])
> -- 
> 2.20.1
> 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 2/4] fsstress: reduce the number of events when io_setup
  2020-08-23  6:30 ` [PATCH v3 2/4] fsstress: reduce the number of events when io_setup Zorro Lang
@ 2020-09-03 12:42   ` Brian Foster
  0 siblings, 0 replies; 12+ messages in thread
From: Brian Foster @ 2020-09-03 12:42 UTC (permalink / raw)
  To: Zorro Lang; +Cc: fstests, axboe, io-uring

On Sun, Aug 23, 2020 at 02:30:30PM +0800, Zorro Lang wrote:
> The original number(128) of aio events for io_setup too big. When try
> to run lots of fsstress processes(e.g. -p 1000) always hit io_setup
> EAGAIN error, due to the nr_events exceeds the limit of available
> events. Due to each fsstress process only does once libaio read/write
> operation each time. So reduce the aio events number to 1, to make more
> fsstress processes can do AIO test.
> 
> Signed-off-by: Zorro Lang <zlang@redhat.com>
> ---

Reviewed-by: Brian Foster <bfoster@redhat.com>

>  ltp/fsstress.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/ltp/fsstress.c b/ltp/fsstress.c
> index 7a0e278a..ef2017a8 100644
> --- a/ltp/fsstress.c
> +++ b/ltp/fsstress.c
> @@ -28,6 +28,7 @@
>  #endif
>  #ifdef AIO
>  #include <libaio.h>
> +#define AIO_ENTRIES	1
>  io_context_t	io_ctx;
>  #endif
>  #ifdef URING
> @@ -699,8 +700,8 @@ int main(int argc, char **argv)
>  			}
>  			procid = i;
>  #ifdef AIO
> -			if (io_setup(128, &io_ctx) != 0) {
> -				fprintf(stderr, "io_setup failed");
> +			if (io_setup(AIO_ENTRIES, &io_ctx) != 0) {
> +				fprintf(stderr, "io_setup failed\n");
>  				exit(1);
>  			}
>  #endif
> -- 
> 2.20.1
> 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 3/4] fsstress: fix memory leak in do_aio_rw
  2020-08-23  6:30 ` [PATCH v3 3/4] fsstress: fix memory leak in do_aio_rw Zorro Lang
@ 2020-09-03 12:43   ` Brian Foster
  0 siblings, 0 replies; 12+ messages in thread
From: Brian Foster @ 2020-09-03 12:43 UTC (permalink / raw)
  To: Zorro Lang; +Cc: fstests, axboe, io-uring

On Sun, Aug 23, 2020 at 02:30:31PM +0800, Zorro Lang wrote:
> If io_submit or io_getevents fails, the do_aio_rw() won't free the
> "buf" and cause memory leak.
> 
> Signed-off-by: Zorro Lang <zlang@redhat.com>
> ---

Can we use the same approach here as suggested in patch 1 to reduce the
number of labels? Otherwise looks Ok to me..

Brian

>  ltp/fsstress.c | 31 ++++++++++++++++---------------
>  1 file changed, 16 insertions(+), 15 deletions(-)
> 
> diff --git a/ltp/fsstress.c b/ltp/fsstress.c
> index ef2017a8..17b024b5 100644
> --- a/ltp/fsstress.c
> +++ b/ltp/fsstress.c
> @@ -2099,8 +2099,7 @@ do_aio_rw(int opno, long r, int flags)
>  	if (!get_fname(FT_REGFILE, r, &f, NULL, NULL, &v)) {
>  		if (v)
>  			printf("%d/%d: do_aio_rw - no filename\n", procid, opno);
> -		free_pathname(&f);
> -		return;
> +		goto aio_out3;
>  	}
>  	fd = open_path(&f, flags|O_DIRECT);
>  	e = fd < 0 ? errno : 0;
> @@ -2109,16 +2108,13 @@ do_aio_rw(int opno, long r, int flags)
>  		if (v)
>  			printf("%d/%d: do_aio_rw - open %s failed %d\n",
>  			       procid, opno, f.path, e);
> -		free_pathname(&f);
> -		return;
> +		goto aio_out3;
>  	}
>  	if (fstat64(fd, &stb) < 0) {
>  		if (v)
>  			printf("%d/%d: do_aio_rw - fstat64 %s failed %d\n",
>  			       procid, opno, f.path, errno);
> -		free_pathname(&f);
> -		close(fd);
> -		return;
> +		goto aio_out2;
>  	}
>  	inode_info(st, sizeof(st), &stb, v);
>  	if (!iswrite && stb.st_size == 0) {
> @@ -2150,6 +2146,12 @@ do_aio_rw(int opno, long r, int flags)
>  	else if (len > diob.d_maxiosz)
>  		len = diob.d_maxiosz;
>  	buf = memalign(diob.d_mem, len);
> +	if (!buf) {
> +		if (v)
> +			printf("%d/%d: do_aio_rw - memalign failed\n",
> +			       procid, opno);
> +		goto aio_out2;
> +	}
>  
>  	if (iswrite) {
>  		off = (off64_t)(lr % MIN(stb.st_size + (1024 * 1024), MAXFSIZE));
> @@ -2166,27 +2168,26 @@ do_aio_rw(int opno, long r, int flags)
>  		if (v)
>  			printf("%d/%d: %s - io_submit failed %d\n",
>  			       procid, opno, iswrite ? "awrite" : "aread", e);
> -		free_pathname(&f);
> -		close(fd);
> -		return;
> +		goto aio_out1;
>  	}
>  	if ((e = io_getevents(io_ctx, 1, 1, &event, NULL)) != 1) {
>  		if (v)
>  			printf("%d/%d: %s - io_getevents failed %d\n",
>  			       procid, opno, iswrite ? "awrite" : "aread", e);
> -		free_pathname(&f);
> -		close(fd);
> -		return;
> +		goto aio_out1;
>  	}
>  
>  	e = event.res != len ? event.res2 : 0;
> -	free(buf);
>  	if (v)
>  		printf("%d/%d: %s %s%s [%lld,%d] %d\n",
>  		       procid, opno, iswrite ? "awrite" : "aread",
>  		       f.path, st, (long long)off, (int)len, e);
> -	free_pathname(&f);
> + aio_out1:
> +	free(buf);
> + aio_out2:
>  	close(fd);
> + aio_out3:
> +	free_pathname(&f);
>  }
>  #endif
>  
> -- 
> 2.20.1
> 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 4/4] fsx: add IO_URING test
  2020-08-23  6:30 ` [PATCH v3 4/4] fsx: add IO_URING test Zorro Lang
@ 2020-09-03 12:44   ` Brian Foster
  2020-09-06 15:55     ` Zorro Lang
  0 siblings, 1 reply; 12+ messages in thread
From: Brian Foster @ 2020-09-03 12:44 UTC (permalink / raw)
  To: Zorro Lang; +Cc: fstests, axboe, io-uring

On Sun, Aug 23, 2020 at 02:30:32PM +0800, Zorro Lang wrote:
> New IO_URING test for fsx, use -U option to enable IO_URING test.
> 
> Signed-off-by: Zorro Lang <zlang@redhat.com>
> ---

Note that this one doesn't compile if one of the ifdefs doesn't evaluate
true:

fsx.c:2551:6: error: #elif with no expression
 2551 | #elif
      |      ^
    [CC]    fsx
fsx.c: In function 'fsx_rw':
fsx.c:2551:6: error: #elif with no expression
 2551 | #elif
      |      ^
gmake[2]: *** [Makefile:52: fsx] Error 1
gmake[1]: *** [include/buildrules:30: ltp] Error 2
make: *** [Makefile:53: default] Error 2

I suspect you want to replace both of those with #else. Otherwise mostly
some aesthetic comments...

>  ltp/fsx.c | 158 +++++++++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 144 insertions(+), 14 deletions(-)
> 
> diff --git a/ltp/fsx.c b/ltp/fsx.c
> index 7c76655a..05663528 100644
> --- a/ltp/fsx.c
> +++ b/ltp/fsx.c
...
> @@ -176,21 +179,17 @@ int	integrity = 0;			/* -i flag */
>  int	fsxgoodfd = 0;
>  int	o_direct;			/* -Z */
>  int	aio = 0;
> +int	uring = 0;
>  int	mark_nr = 0;
>  
>  int page_size;
>  int page_mask;
>  int mmap_mask;
> -#ifdef AIO
> -int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
> +int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
>  #define READ 0
>  #define WRITE 1
> -#define fsxread(a,b,c,d)	aio_rw(READ, a,b,c,d)
> -#define fsxwrite(a,b,c,d)	aio_rw(WRITE, a,b,c,d)
> -#else
> -#define fsxread(a,b,c,d)	read(a,b,c)
> -#define fsxwrite(a,b,c,d)	write(a,b,c)
> -#endif
> +#define fsxread(a,b,c,d)	fsx_rw(READ, a,b,c,d)
> +#define fsxwrite(a,b,c,d)	fsx_rw(WRITE, a,b,c,d)
>  

Could we do the refactoring that introduces fsx_rw and shuffles around
some of the existing AIO in an initial refactoring patch?

>  const char *replayops = NULL;
>  const char *recordops = NULL;
...
> @@ -2425,13 +2427,131 @@ out_error:
>  	errno = -ret;
>  	return -1;
>  }
> +#endif
> +
> +#ifdef URING

A whitespace line here...

> +struct io_uring ring;
> +#define URING_ENTRIES	1024

... and here would help readability.

> +int
> +uring_setup()
> +{
> +	int ret;
> +
> +	ret = io_uring_queue_init(URING_ENTRIES, &ring, 0);
> +	if (ret != 0) {
> +		fprintf(stderr, "uring_setup: io_uring_queue_init failed: %s\n",
> +                        strerror(ret));
> +                return -1;
> +        }
> +        return 0;

Looks like some whitespace damage here.

Also, the fsstress patch has a io_uring_queue_exit() call but I don't
see one in this patch. Is that not needed?

> +}
>  
> -int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
> +int
> +__uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)

Do we still need the __ in the function names here and for __aio_rw()?

>  {
> +	struct io_uring_sqe	*sqe;
> +	struct io_uring_cqe	*cqe;
> +	struct iovec		iovec;
>  	int ret;
> +	int res, res2 = 0;
> +	char *p = buf;
> +	unsigned l = len;
> +	unsigned o = offset;
> +
> +
> +	/*
> +	 * Due to io_uring tries non-blocking IOs (especially read), that
> +	 * always cause 'normal' short reading. To avoid this short read
> +	 * fail, try to loop read/write (escpecilly read) data.
> +	 */
> + uring_loop:
> +	sqe = io_uring_get_sqe(&ring);
> +	if (!sqe) {
> +		fprintf(stderr, "uring_rw: io_uring_get_sqe failed: %s\n",
> +		        strerror(errno));
> +		return -1;
> +        }
> +
> +	iovec.iov_base = p;
> +	iovec.iov_len = l;
> +	if (rw == READ) {
> +		io_uring_prep_readv(sqe, fd, &iovec, 1, o);
> +	} else {
> +		io_uring_prep_writev(sqe, fd, &iovec, 1, o);
> +	}
> +
> +	ret = io_uring_submit_and_wait(&ring, 1);
> +	if (ret != 1) {
> +		fprintf(stderr, "errcode=%d\n", -ret);
> +		fprintf(stderr, "uring %s: io_uring_submit failed: %s\n",
> +		        rw == READ ? "read":"write", strerror(-ret));
> +		goto uring_error;
> +	}
> +
> +	ret = io_uring_wait_cqe(&ring, &cqe);
> +	if (ret < 0) {
> +		if (ret == 0)

That doesn't look right since we only get here if ret < 0.

> +			fprintf(stderr, "uring %s: no events available\n",
> +			        rw == READ ? "read":"write");
> +		else {
> +			fprintf(stderr, "errcode=%d\n", -ret);
> +			fprintf(stderr, "uring %s: io_uring_wait_cqe failed: %s\n",
> +			        rw == READ ? "read":"write", strerror(-ret));
> +		}
> +		goto uring_error;
> +	}
> +	res = cqe->res;
> +	io_uring_cqe_seen(&ring, cqe);
> +
> +	res2 += res;
> +	if (len != res2) {
> +		if (res > 0) {
> +			o += res;
> +			l -= res;
> +			p += res;
> +			if (l > 0)
> +				goto uring_loop;
> +		} else if (res < 0) {
> +			ret = res;
> +			fprintf(stderr, "errcode=%d\n", -ret);
> +			fprintf(stderr, "uring %s: io_uring failed: %s\n",
> +			        rw == READ ? "read":"write", strerror(-ret));
> +			goto uring_error;

Can we elevate the error checks into the top level rather than nesting
logic like this? It's a little confusing to read and it looks
particularly odd since we've already done res2 += res before we get
here.

Also I'm wondering if this whole function would read a little better as
a do {} while() loop rather than using a label and goto.

> +		} else {
> +			fprintf(stderr, "uring %s bad io length: %d instead of %u\n",
> +			        rw == READ ? "read":"write", res2, len);
> +		}
> +	}
> +	return res2;
> +
> + uring_error:
> +	/*
> +	 * The caller expects error return in traditional libc
> +	 * convention, i.e. -1 and the errno set to error.
> +	 */
> +	errno = -ret;
> +	return -1;
> +}
> +#endif
> +
> +int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
> +{
> +	int ret = -1;
>  
>  	if (aio) {
> +#ifdef AIO
>  		ret = __aio_rw(rw, fd, buf, len, offset);
> +#elif
> +		fprintf(stderr, "io_rw: need AIO support!\n");
> +		exit(111);
> +#endif
> +	} else if (uring) {
> +#ifdef URING
> +		ret = __uring_rw(rw, fd, buf, len, offset);
> +#elif
> +		fprintf(stderr, "io_rw: need IO_URING support!\n");
> +		exit(111);
> +#endif

I think the ifdefs would be cleaner if used to define stubbed out
variants of the associated functions. E.g.:

#ifdef URING
int
__uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
{
	<do uring I/O>
}
#else
int
__uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
{
	fprintf(stderr, "io_rw: need IO_URING support!\n");
	exit(111);
}
#endif

Brian

>  	} else {
>  		if (rw == READ)
>  			ret = read(fd, buf, len);
> @@ -2441,8 +2561,6 @@ int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
>  	return ret;
>  }
>  
> -#endif
> -
>  #define test_fallocate(mode) __test_fallocate(mode, #mode)
>  
>  int
> @@ -2496,7 +2614,7 @@ main(int argc, char **argv)
>  	setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
>  
>  	while ((ch = getopt_long(argc, argv,
> -				 "b:c:dfg:i:j:kl:m:no:p:qr:s:t:w:xyABD:EFJKHzCILN:OP:RS:WXZ",
> +				 "b:c:dfg:i:j:kl:m:no:p:qr:s:t:w:xyABD:EFJKHzCILN:OP:RS:UWXZ",
>  				 longopts, NULL)) != EOF)
>  		switch (ch) {
>  		case 'b':
> @@ -2604,6 +2722,9 @@ main(int argc, char **argv)
>  		case 'A':
>  		        aio = 1;
>  			break;
> +		case 'U':
> +		        uring = 1;
> +			break;
>  		case 'D':
>  			debugstart = getnum(optarg, &endp);
>  			if (debugstart < 1)
> @@ -2694,6 +2815,11 @@ main(int argc, char **argv)
>  	if (argc != 1)
>  		usage();
>  
> +	if (aio && uring) {
> +		fprintf(stderr, "-A and -U shouldn't be used together\n");
> +		usage();
> +	}
> +
>  	if (integrity && !dirpath) {
>  		fprintf(stderr, "option -i <logdev> requires -P <dirpath>\n");
>  		usage();
> @@ -2784,6 +2910,10 @@ main(int argc, char **argv)
>  	if (aio) 
>  		aio_setup();
>  #endif
> +#ifdef URING
> +	if (uring)
> +		uring_setup();
> +#endif
>  
>  	if (!(o_flags & O_TRUNC)) {
>  		off_t ret;
> -- 
> 2.20.1
> 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 1/4] fsstress: add IO_URING read and write operations
  2020-09-03 12:42   ` Brian Foster
@ 2020-09-03 14:07     ` Jens Axboe
  0 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2020-09-03 14:07 UTC (permalink / raw)
  To: Brian Foster, Zorro Lang; +Cc: fstests, io-uring

On 9/3/20 6:42 AM, Brian Foster wrote:
> On Sun, Aug 23, 2020 at 02:30:29PM +0800, Zorro Lang wrote:
>> IO_URING is a new feature of curent linux kernel, add basic IO_URING
>> read/write into fsstess to cover this kind of IO testing.
>>
>> Signed-off-by: Zorro Lang <zlang@redhat.com>
>> ---
>>  README                 |   4 +-
>>  configure.ac           |   1 +
>>  include/builddefs.in   |   1 +
>>  ltp/Makefile           |   5 ++
>>  ltp/fsstress.c         | 139 ++++++++++++++++++++++++++++++++++++++++-
>>  m4/Makefile            |   1 +
>>  m4/package_liburing.m4 |   4 ++
>>  7 files changed, 152 insertions(+), 3 deletions(-)
>>  create mode 100644 m4/package_liburing.m4
>>
> ...
>> diff --git a/ltp/fsstress.c b/ltp/fsstress.c
>> index 709fdeec..7a0e278a 100644
>> --- a/ltp/fsstress.c
>> +++ b/ltp/fsstress.c
> ...
>> @@ -2170,6 +2189,108 @@ do_aio_rw(int opno, long r, int flags)
>>  }
>>  #endif
>>  
>> +#ifdef URING
>> +void
>> +do_uring_rw(int opno, long r, int flags)
>> +{
>> +	char		*buf;
>> +	int		e;
>> +	pathname_t	f;
>> +	int		fd;
>> +	size_t		len;
>> +	int64_t		lr;
>> +	off64_t		off;
>> +	struct stat64	stb;
>> +	int		v;
>> +	char		st[1024];
>> +	struct io_uring_sqe	*sqe;
>> +	struct io_uring_cqe	*cqe;
>> +	struct iovec	iovec;
>> +	int		iswrite = (flags & (O_WRONLY | O_RDWR)) ? 1 : 0;
>> +
>> +	init_pathname(&f);
>> +	if (!get_fname(FT_REGFILE, r, &f, NULL, NULL, &v)) {
>> +		if (v)
>> +			printf("%d/%d: do_uring_rw - no filename\n", procid, opno);
>> +		goto uring_out3;
>> +	}
>> +	fd = open_path(&f, flags);
>> +	e = fd < 0 ? errno : 0;
>> +	check_cwd();
>> +	if (fd < 0) {
>> +		if (v)
>> +			printf("%d/%d: do_uring_rw - open %s failed %d\n",
>> +			       procid, opno, f.path, e);
>> +		goto uring_out3;
>> +	}
>> +	if (fstat64(fd, &stb) < 0) {
>> +		if (v)
>> +			printf("%d/%d: do_uring_rw - fstat64 %s failed %d\n",
>> +			       procid, opno, f.path, errno);
>> +		goto uring_out2;
>> +	}
>> +	inode_info(st, sizeof(st), &stb, v);
>> +	if (!iswrite && stb.st_size == 0) {
>> +		if (v)
>> +			printf("%d/%d: do_uring_rw - %s%s zero size\n", procid, opno,
>> +			       f.path, st);
>> +		goto uring_out2;
>> +	}
>> +	sqe = io_uring_get_sqe(&ring);
>> +	if (!sqe) {
>> +		if (v)
>> +			printf("%d/%d: do_uring_rw - io_uring_get_sqe failed\n",
>> +			       procid, opno);
>> +		goto uring_out2;
>> +	}
> 
> I'm not familiar with the io_uring bits, but do we have to do anything
> to clean up this sqe object (or the cqe) before we return?

The cqe/sqe resources are tied to the ring, so as long as
io_uring_queue_exit() is called, then they are freed. And it is after
the run, so looks fine to me.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 4/4] fsx: add IO_URING test
  2020-09-03 12:44   ` Brian Foster
@ 2020-09-06 15:55     ` Zorro Lang
  2020-09-06 16:27       ` Zorro Lang
  0 siblings, 1 reply; 12+ messages in thread
From: Zorro Lang @ 2020-09-06 15:55 UTC (permalink / raw)
  To: Brian Foster; +Cc: fstests, axboe, io-uring

On Thu, Sep 03, 2020 at 08:44:13AM -0400, Brian Foster wrote:
> On Sun, Aug 23, 2020 at 02:30:32PM +0800, Zorro Lang wrote:
> > New IO_URING test for fsx, use -U option to enable IO_URING test.
> > 
> > Signed-off-by: Zorro Lang <zlang@redhat.com>
> > ---
> 
> Note that this one doesn't compile if one of the ifdefs doesn't evaluate
> true:
> 
> fsx.c:2551:6: error: #elif with no expression
>  2551 | #elif
>       |      ^
>     [CC]    fsx
> fsx.c: In function 'fsx_rw':
> fsx.c:2551:6: error: #elif with no expression
>  2551 | #elif
>       |      ^
> gmake[2]: *** [Makefile:52: fsx] Error 1
> gmake[1]: *** [include/buildrules:30: ltp] Error 2
> make: *** [Makefile:53: default] Error 2
> 
> I suspect you want to replace both of those with #else. Otherwise mostly
> some aesthetic comments...

Sorry, that's truely a mistake, I'll fix it :)

> 
> >  ltp/fsx.c | 158 +++++++++++++++++++++++++++++++++++++++++++++++++-----
> >  1 file changed, 144 insertions(+), 14 deletions(-)
> > 
> > diff --git a/ltp/fsx.c b/ltp/fsx.c
> > index 7c76655a..05663528 100644
> > --- a/ltp/fsx.c
> > +++ b/ltp/fsx.c
> ...
> > @@ -176,21 +179,17 @@ int	integrity = 0;			/* -i flag */
> >  int	fsxgoodfd = 0;
> >  int	o_direct;			/* -Z */
> >  int	aio = 0;
> > +int	uring = 0;
> >  int	mark_nr = 0;
> >  
> >  int page_size;
> >  int page_mask;
> >  int mmap_mask;
> > -#ifdef AIO
> > -int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
> > +int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
> >  #define READ 0
> >  #define WRITE 1
> > -#define fsxread(a,b,c,d)	aio_rw(READ, a,b,c,d)
> > -#define fsxwrite(a,b,c,d)	aio_rw(WRITE, a,b,c,d)
> > -#else
> > -#define fsxread(a,b,c,d)	read(a,b,c)
> > -#define fsxwrite(a,b,c,d)	write(a,b,c)
> > -#endif
> > +#define fsxread(a,b,c,d)	fsx_rw(READ, a,b,c,d)
> > +#define fsxwrite(a,b,c,d)	fsx_rw(WRITE, a,b,c,d)
> >  
> 
> Could we do the refactoring that introduces fsx_rw and shuffles around
> some of the existing AIO in an initial refactoring patch?

May I save this pre-patch, if you don't insist on that :-P

> 
> >  const char *replayops = NULL;
> >  const char *recordops = NULL;
> ...
> > @@ -2425,13 +2427,131 @@ out_error:
> >  	errno = -ret;
> >  	return -1;
> >  }
> > +#endif
> > +
> > +#ifdef URING
> 
> A whitespace line here...
> 
> > +struct io_uring ring;
> > +#define URING_ENTRIES	1024
> 
> ... and here would help readability.
> 
> > +int
> > +uring_setup()
> > +{
> > +	int ret;
> > +
> > +	ret = io_uring_queue_init(URING_ENTRIES, &ring, 0);
> > +	if (ret != 0) {
> > +		fprintf(stderr, "uring_setup: io_uring_queue_init failed: %s\n",
> > +                        strerror(ret));
> > +                return -1;
> > +        }
> > +        return 0;
> 
> Looks like some whitespace damage here.
> 
> Also, the fsstress patch has a io_uring_queue_exit() call but I don't
> see one in this patch. Is that not needed?

There's not aio_destroy() either. I think due to fsstress is a multi-process
test, so it'd like to destroy io_uring or aio at each process end. But fsx is
a pure single process test, the io_uring or aio will destroyed when fsx exit.
I can add io_uring_queue_exit() and aio_destroy() if you think it would be
better.

> 
> > +}
> >  
> > -int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
> > +int
> > +__uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
> 
> Do we still need the __ in the function names here and for __aio_rw()?

I don't think it's needed. I use the "__" just due to the old __aio_rw() has. I
can remove both "__" of __aio_rw and __uring_rw.

> 
> >  {
> > +	struct io_uring_sqe	*sqe;
> > +	struct io_uring_cqe	*cqe;
> > +	struct iovec		iovec;
> >  	int ret;
> > +	int res, res2 = 0;
> > +	char *p = buf;
> > +	unsigned l = len;
> > +	unsigned o = offset;
> > +
> > +
> > +	/*
> > +	 * Due to io_uring tries non-blocking IOs (especially read), that
> > +	 * always cause 'normal' short reading. To avoid this short read
> > +	 * fail, try to loop read/write (escpecilly read) data.
> > +	 */
> > + uring_loop:
> > +	sqe = io_uring_get_sqe(&ring);
> > +	if (!sqe) {
> > +		fprintf(stderr, "uring_rw: io_uring_get_sqe failed: %s\n",
> > +		        strerror(errno));
> > +		return -1;
> > +        }
> > +
> > +	iovec.iov_base = p;
> > +	iovec.iov_len = l;
> > +	if (rw == READ) {
> > +		io_uring_prep_readv(sqe, fd, &iovec, 1, o);
> > +	} else {
> > +		io_uring_prep_writev(sqe, fd, &iovec, 1, o);
> > +	}
> > +
> > +	ret = io_uring_submit_and_wait(&ring, 1);
> > +	if (ret != 1) {
> > +		fprintf(stderr, "errcode=%d\n", -ret);
> > +		fprintf(stderr, "uring %s: io_uring_submit failed: %s\n",
> > +		        rw == READ ? "read":"write", strerror(-ret));
> > +		goto uring_error;
> > +	}
> > +
> > +	ret = io_uring_wait_cqe(&ring, &cqe);
> > +	if (ret < 0) {
> > +		if (ret == 0)
> 
> That doesn't look right since we only get here if ret < 0.

Thanks, it should be (ret <= 0)

> 
> > +			fprintf(stderr, "uring %s: no events available\n",
> > +			        rw == READ ? "read":"write");
> > +		else {
> > +			fprintf(stderr, "errcode=%d\n", -ret);
> > +			fprintf(stderr, "uring %s: io_uring_wait_cqe failed: %s\n",
> > +			        rw == READ ? "read":"write", strerror(-ret));
> > +		}
> > +		goto uring_error;
> > +	}
> > +	res = cqe->res;
> > +	io_uring_cqe_seen(&ring, cqe);
> > +
> > +	res2 += res;
> > +	if (len != res2) {
> > +		if (res > 0) {
> > +			o += res;
> > +			l -= res;
> > +			p += res;
> > +			if (l > 0)
> > +				goto uring_loop;
> > +		} else if (res < 0) {
> > +			ret = res;
> > +			fprintf(stderr, "errcode=%d\n", -ret);
> > +			fprintf(stderr, "uring %s: io_uring failed: %s\n",
> > +			        rw == READ ? "read":"write", strerror(-ret));
> > +			goto uring_error;
> 
> Can we elevate the error checks into the top level rather than nesting
> logic like this? It's a little confusing to read and it looks
> particularly odd since we've already done res2 += res before we get
> here.
> 
> Also I'm wondering if this whole function would read a little better as
> a do {} while() loop rather than using a label and goto.

Sure, I'll try to change that.

> 
> > +		} else {
> > +			fprintf(stderr, "uring %s bad io length: %d instead of %u\n",
> > +			        rw == READ ? "read":"write", res2, len);
> > +		}
> > +	}
> > +	return res2;
> > +
> > + uring_error:
> > +	/*
> > +	 * The caller expects error return in traditional libc
> > +	 * convention, i.e. -1 and the errno set to error.
> > +	 */
> > +	errno = -ret;
> > +	return -1;
> > +}
> > +#endif
> > +
> > +int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
> > +{
> > +	int ret = -1;
> >  
> >  	if (aio) {
> > +#ifdef AIO
> >  		ret = __aio_rw(rw, fd, buf, len, offset);
> > +#elif
> > +		fprintf(stderr, "io_rw: need AIO support!\n");
> > +		exit(111);
> > +#endif
> > +	} else if (uring) {
> > +#ifdef URING
> > +		ret = __uring_rw(rw, fd, buf, len, offset);
> > +#elif
> > +		fprintf(stderr, "io_rw: need IO_URING support!\n");
> > +		exit(111);
> > +#endif
> 
> I think the ifdefs would be cleaner if used to define stubbed out
> variants of the associated functions. E.g.:
> 
> #ifdef URING
> int
> __uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
> {
> 	<do uring I/O>
> }
> #else
> int
> __uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
> {
> 	fprintf(stderr, "io_rw: need IO_URING support!\n");
> 	exit(111);
> }
> #endif

Sure, will do that.

Thanks for your review, Brian!
Zorro

> 
> Brian
> 
> >  	} else {
> >  		if (rw == READ)
> >  			ret = read(fd, buf, len);
> > @@ -2441,8 +2561,6 @@ int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
> >  	return ret;
> >  }
> >  
> > -#endif
> > -
> >  #define test_fallocate(mode) __test_fallocate(mode, #mode)
> >  
> >  int
> > @@ -2496,7 +2614,7 @@ main(int argc, char **argv)
> >  	setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
> >  
> >  	while ((ch = getopt_long(argc, argv,
> > -				 "b:c:dfg:i:j:kl:m:no:p:qr:s:t:w:xyABD:EFJKHzCILN:OP:RS:WXZ",
> > +				 "b:c:dfg:i:j:kl:m:no:p:qr:s:t:w:xyABD:EFJKHzCILN:OP:RS:UWXZ",
> >  				 longopts, NULL)) != EOF)
> >  		switch (ch) {
> >  		case 'b':
> > @@ -2604,6 +2722,9 @@ main(int argc, char **argv)
> >  		case 'A':
> >  		        aio = 1;
> >  			break;
> > +		case 'U':
> > +		        uring = 1;
> > +			break;
> >  		case 'D':
> >  			debugstart = getnum(optarg, &endp);
> >  			if (debugstart < 1)
> > @@ -2694,6 +2815,11 @@ main(int argc, char **argv)
> >  	if (argc != 1)
> >  		usage();
> >  
> > +	if (aio && uring) {
> > +		fprintf(stderr, "-A and -U shouldn't be used together\n");
> > +		usage();
> > +	}
> > +
> >  	if (integrity && !dirpath) {
> >  		fprintf(stderr, "option -i <logdev> requires -P <dirpath>\n");
> >  		usage();
> > @@ -2784,6 +2910,10 @@ main(int argc, char **argv)
> >  	if (aio) 
> >  		aio_setup();
> >  #endif
> > +#ifdef URING
> > +	if (uring)
> > +		uring_setup();
> > +#endif
> >  
> >  	if (!(o_flags & O_TRUNC)) {
> >  		off_t ret;
> > -- 
> > 2.20.1
> > 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 4/4] fsx: add IO_URING test
  2020-09-06 15:55     ` Zorro Lang
@ 2020-09-06 16:27       ` Zorro Lang
  0 siblings, 0 replies; 12+ messages in thread
From: Zorro Lang @ 2020-09-06 16:27 UTC (permalink / raw)
  To: Brian Foster, fstests, axboe, io-uring

On Sun, Sep 06, 2020 at 11:55:16PM +0800, Zorro Lang wrote:
> On Thu, Sep 03, 2020 at 08:44:13AM -0400, Brian Foster wrote:
> > On Sun, Aug 23, 2020 at 02:30:32PM +0800, Zorro Lang wrote:
> > > New IO_URING test for fsx, use -U option to enable IO_URING test.
> > > 
> > > Signed-off-by: Zorro Lang <zlang@redhat.com>
> > > ---
> > 
> > Note that this one doesn't compile if one of the ifdefs doesn't evaluate
> > true:
> > 
> > fsx.c:2551:6: error: #elif with no expression
> >  2551 | #elif
> >       |      ^
> >     [CC]    fsx
> > fsx.c: In function 'fsx_rw':
> > fsx.c:2551:6: error: #elif with no expression
> >  2551 | #elif
> >       |      ^
> > gmake[2]: *** [Makefile:52: fsx] Error 1
> > gmake[1]: *** [include/buildrules:30: ltp] Error 2
> > make: *** [Makefile:53: default] Error 2
> > 
> > I suspect you want to replace both of those with #else. Otherwise mostly
> > some aesthetic comments...
> 
> Sorry, that's truely a mistake, I'll fix it :)
> 
> > 
> > >  ltp/fsx.c | 158 +++++++++++++++++++++++++++++++++++++++++++++++++-----
> > >  1 file changed, 144 insertions(+), 14 deletions(-)
> > > 
> > > diff --git a/ltp/fsx.c b/ltp/fsx.c
> > > index 7c76655a..05663528 100644
> > > --- a/ltp/fsx.c
> > > +++ b/ltp/fsx.c
> > ...
> > > @@ -176,21 +179,17 @@ int	integrity = 0;			/* -i flag */
> > >  int	fsxgoodfd = 0;
> > >  int	o_direct;			/* -Z */
> > >  int	aio = 0;
> > > +int	uring = 0;
> > >  int	mark_nr = 0;
> > >  
> > >  int page_size;
> > >  int page_mask;
> > >  int mmap_mask;
> > > -#ifdef AIO
> > > -int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
> > > +int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
> > >  #define READ 0
> > >  #define WRITE 1
> > > -#define fsxread(a,b,c,d)	aio_rw(READ, a,b,c,d)
> > > -#define fsxwrite(a,b,c,d)	aio_rw(WRITE, a,b,c,d)
> > > -#else
> > > -#define fsxread(a,b,c,d)	read(a,b,c)
> > > -#define fsxwrite(a,b,c,d)	write(a,b,c)
> > > -#endif
> > > +#define fsxread(a,b,c,d)	fsx_rw(READ, a,b,c,d)
> > > +#define fsxwrite(a,b,c,d)	fsx_rw(WRITE, a,b,c,d)
> > >  
> > 
> > Could we do the refactoring that introduces fsx_rw and shuffles around
> > some of the existing AIO in an initial refactoring patch?
> 
> May I save this pre-patch, if you don't insist on that :-P
> 
> > 
> > >  const char *replayops = NULL;
> > >  const char *recordops = NULL;
> > ...
> > > @@ -2425,13 +2427,131 @@ out_error:
> > >  	errno = -ret;
> > >  	return -1;
> > >  }
> > > +#endif
> > > +
> > > +#ifdef URING
> > 
> > A whitespace line here...
> > 
> > > +struct io_uring ring;
> > > +#define URING_ENTRIES	1024
> > 
> > ... and here would help readability.
> > 
> > > +int
> > > +uring_setup()
> > > +{
> > > +	int ret;
> > > +
> > > +	ret = io_uring_queue_init(URING_ENTRIES, &ring, 0);
> > > +	if (ret != 0) {
> > > +		fprintf(stderr, "uring_setup: io_uring_queue_init failed: %s\n",
> > > +                        strerror(ret));
> > > +                return -1;
> > > +        }
> > > +        return 0;
> > 
> > Looks like some whitespace damage here.
> > 
> > Also, the fsstress patch has a io_uring_queue_exit() call but I don't
> > see one in this patch. Is that not needed?
> 
> There's not aio_destroy() either. I think due to fsstress is a multi-process
> test, so it'd like to destroy io_uring or aio at each process end. But fsx is
> a pure single process test, the io_uring or aio will destroyed when fsx exit.
> I can add io_uring_queue_exit() and aio_destroy() if you think it would be
> better.
> 
> > 
> > > +}
> > >  
> > > -int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
> > > +int
> > > +__uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
> > 
> > Do we still need the __ in the function names here and for __aio_rw()?
> 
> I don't think it's needed. I use the "__" just due to the old __aio_rw() has. I
> can remove both "__" of __aio_rw and __uring_rw.
> 
> > 
> > >  {
> > > +	struct io_uring_sqe	*sqe;
> > > +	struct io_uring_cqe	*cqe;
> > > +	struct iovec		iovec;
> > >  	int ret;
> > > +	int res, res2 = 0;
> > > +	char *p = buf;
> > > +	unsigned l = len;
> > > +	unsigned o = offset;
> > > +
> > > +
> > > +	/*
> > > +	 * Due to io_uring tries non-blocking IOs (especially read), that
> > > +	 * always cause 'normal' short reading. To avoid this short read
> > > +	 * fail, try to loop read/write (escpecilly read) data.
> > > +	 */
> > > + uring_loop:
> > > +	sqe = io_uring_get_sqe(&ring);
> > > +	if (!sqe) {
> > > +		fprintf(stderr, "uring_rw: io_uring_get_sqe failed: %s\n",
> > > +		        strerror(errno));
> > > +		return -1;
> > > +        }
> > > +
> > > +	iovec.iov_base = p;
> > > +	iovec.iov_len = l;
> > > +	if (rw == READ) {
> > > +		io_uring_prep_readv(sqe, fd, &iovec, 1, o);
> > > +	} else {
> > > +		io_uring_prep_writev(sqe, fd, &iovec, 1, o);
> > > +	}
> > > +
> > > +	ret = io_uring_submit_and_wait(&ring, 1);
> > > +	if (ret != 1) {
> > > +		fprintf(stderr, "errcode=%d\n", -ret);
> > > +		fprintf(stderr, "uring %s: io_uring_submit failed: %s\n",
> > > +		        rw == READ ? "read":"write", strerror(-ret));
> > > +		goto uring_error;
> > > +	}
> > > +
> > > +	ret = io_uring_wait_cqe(&ring, &cqe);
> > > +	if (ret < 0) {
> > > +		if (ret == 0)
> > 
> > That doesn't look right since we only get here if ret < 0.
> 
> Thanks, it should be (ret <= 0)

Sorry, I just checked io_uring_wait_cqe() code, it returns 0 on success.
So my "if (ret == 0)" checking is totally wrong, I'll remove it :)

/*
 * Return an IO completion, waiting for it if necessary. Returns 0 with
 * cqe_ptr filled in on success, -errno on failure.
 */
static inline int io_uring_wait_cqe(struct io_uring *ring,
                                    struct io_uring_cqe **cqe_ptr)

> 
> > 
> > > +			fprintf(stderr, "uring %s: no events available\n",
> > > +			        rw == READ ? "read":"write");
> > > +		else {
> > > +			fprintf(stderr, "errcode=%d\n", -ret);
> > > +			fprintf(stderr, "uring %s: io_uring_wait_cqe failed: %s\n",
> > > +			        rw == READ ? "read":"write", strerror(-ret));
> > > +		}
> > > +		goto uring_error;
> > > +	}
> > > +	res = cqe->res;
> > > +	io_uring_cqe_seen(&ring, cqe);
> > > +
> > > +	res2 += res;
> > > +	if (len != res2) {
> > > +		if (res > 0) {
> > > +			o += res;
> > > +			l -= res;
> > > +			p += res;
> > > +			if (l > 0)
> > > +				goto uring_loop;
> > > +		} else if (res < 0) {
> > > +			ret = res;
> > > +			fprintf(stderr, "errcode=%d\n", -ret);
> > > +			fprintf(stderr, "uring %s: io_uring failed: %s\n",
> > > +			        rw == READ ? "read":"write", strerror(-ret));
> > > +			goto uring_error;
> > 
> > Can we elevate the error checks into the top level rather than nesting
> > logic like this? It's a little confusing to read and it looks
> > particularly odd since we've already done res2 += res before we get
> > here.
> > 
> > Also I'm wondering if this whole function would read a little better as
> > a do {} while() loop rather than using a label and goto.
> 
> Sure, I'll try to change that.
> 
> > 
> > > +		} else {
> > > +			fprintf(stderr, "uring %s bad io length: %d instead of %u\n",
> > > +			        rw == READ ? "read":"write", res2, len);
> > > +		}
> > > +	}
> > > +	return res2;
> > > +
> > > + uring_error:
> > > +	/*
> > > +	 * The caller expects error return in traditional libc
> > > +	 * convention, i.e. -1 and the errno set to error.
> > > +	 */
> > > +	errno = -ret;
> > > +	return -1;
> > > +}
> > > +#endif
> > > +
> > > +int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
> > > +{
> > > +	int ret = -1;
> > >  
> > >  	if (aio) {
> > > +#ifdef AIO
> > >  		ret = __aio_rw(rw, fd, buf, len, offset);
> > > +#elif
> > > +		fprintf(stderr, "io_rw: need AIO support!\n");
> > > +		exit(111);
> > > +#endif
> > > +	} else if (uring) {
> > > +#ifdef URING
> > > +		ret = __uring_rw(rw, fd, buf, len, offset);
> > > +#elif
> > > +		fprintf(stderr, "io_rw: need IO_URING support!\n");
> > > +		exit(111);
> > > +#endif
> > 
> > I think the ifdefs would be cleaner if used to define stubbed out
> > variants of the associated functions. E.g.:
> > 
> > #ifdef URING
> > int
> > __uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
> > {
> > 	<do uring I/O>
> > }
> > #else
> > int
> > __uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
> > {
> > 	fprintf(stderr, "io_rw: need IO_URING support!\n");
> > 	exit(111);
> > }
> > #endif
> 
> Sure, will do that.
> 
> Thanks for your review, Brian!
> Zorro
> 
> > 
> > Brian
> > 
> > >  	} else {
> > >  		if (rw == READ)
> > >  			ret = read(fd, buf, len);
> > > @@ -2441,8 +2561,6 @@ int aio_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
> > >  	return ret;
> > >  }
> > >  
> > > -#endif
> > > -
> > >  #define test_fallocate(mode) __test_fallocate(mode, #mode)
> > >  
> > >  int
> > > @@ -2496,7 +2614,7 @@ main(int argc, char **argv)
> > >  	setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
> > >  
> > >  	while ((ch = getopt_long(argc, argv,
> > > -				 "b:c:dfg:i:j:kl:m:no:p:qr:s:t:w:xyABD:EFJKHzCILN:OP:RS:WXZ",
> > > +				 "b:c:dfg:i:j:kl:m:no:p:qr:s:t:w:xyABD:EFJKHzCILN:OP:RS:UWXZ",
> > >  				 longopts, NULL)) != EOF)
> > >  		switch (ch) {
> > >  		case 'b':
> > > @@ -2604,6 +2722,9 @@ main(int argc, char **argv)
> > >  		case 'A':
> > >  		        aio = 1;
> > >  			break;
> > > +		case 'U':
> > > +		        uring = 1;
> > > +			break;
> > >  		case 'D':
> > >  			debugstart = getnum(optarg, &endp);
> > >  			if (debugstart < 1)
> > > @@ -2694,6 +2815,11 @@ main(int argc, char **argv)
> > >  	if (argc != 1)
> > >  		usage();
> > >  
> > > +	if (aio && uring) {
> > > +		fprintf(stderr, "-A and -U shouldn't be used together\n");
> > > +		usage();
> > > +	}
> > > +
> > >  	if (integrity && !dirpath) {
> > >  		fprintf(stderr, "option -i <logdev> requires -P <dirpath>\n");
> > >  		usage();
> > > @@ -2784,6 +2910,10 @@ main(int argc, char **argv)
> > >  	if (aio) 
> > >  		aio_setup();
> > >  #endif
> > > +#ifdef URING
> > > +	if (uring)
> > > +		uring_setup();
> > > +#endif
> > >  
> > >  	if (!(o_flags & O_TRUNC)) {
> > >  		off_t ret;
> > > -- 
> > > 2.20.1
> > > 


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2020-09-06 16:13 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-23  6:30 [PATCH v3 0/4] fsstress,fsx: add io_uring test and do some fix Zorro Lang
2020-08-23  6:30 ` [PATCH v3 1/4] fsstress: add IO_URING read and write operations Zorro Lang
2020-09-03 12:42   ` Brian Foster
2020-09-03 14:07     ` Jens Axboe
2020-08-23  6:30 ` [PATCH v3 2/4] fsstress: reduce the number of events when io_setup Zorro Lang
2020-09-03 12:42   ` Brian Foster
2020-08-23  6:30 ` [PATCH v3 3/4] fsstress: fix memory leak in do_aio_rw Zorro Lang
2020-09-03 12:43   ` Brian Foster
2020-08-23  6:30 ` [PATCH v3 4/4] fsx: add IO_URING test Zorro Lang
2020-09-03 12:44   ` Brian Foster
2020-09-06 15:55     ` Zorro Lang
2020-09-06 16:27       ` Zorro Lang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).