All of lore.kernel.org
 help / color / mirror / Atom feed
* Recent changes (master)
@ 2021-07-29 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2021-07-29 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 9ce6f6f2bd636e9678982b86d6992ed419634c31:

  Merge branch 'evelu-fix-engines' of https://github.com/ErwanAliasr1/fio (2021-07-25 16:48:02 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 7c8e6725155cae72a0a730d3c3a36776bc5621a3:

  Makefile: update libzbc git repository (2021-07-28 07:27:29 -0600)

----------------------------------------------------------------
Damien Le Moal (1):
      Makefile: update libzbc git repository

 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

---

Diff of recent changes:

diff --git a/Makefile b/Makefile
index 6b4b4122..5198f70e 100644
--- a/Makefile
+++ b/Makefile
@@ -651,7 +651,7 @@ test: fio
 fulltest:
 	sudo modprobe null_blk &&				 	\
 	if [ ! -e /usr/include/libzbc/zbc.h ]; then			\
-	  git clone https://github.com/hgst/libzbc &&		 	\
+	  git clone https://github.com/westerndigitalcorporation/libzbc && \
 	  (cd libzbc &&						 	\
 	   ./autogen.sh &&					 	\
 	   ./configure --prefix=/usr &&				 	\


^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-12-03 13:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-12-03 13:00 UTC (permalink / raw)
  To: fio

The following changes since commit 942d66c85ee8f007ea5f1097d097cf9a44b662a0:

  doc: update about size (2022-12-01 11:12:35 -0500)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 3afc2d8ac30c58372a1b7ccabaea0f3eae4ddaba:

  engines/libblkio: Share a single blkio instance among threads in same process (2022-12-02 16:24:03 -0500)

----------------------------------------------------------------
Alberto Faria (10):
      Add a libblkio engine
      Add engine flag FIO_SKIPPABLE_IOMEM_ALLOC
      engines/libblkio: Allow setting option mem/iomem
      engines/libblkio: Add support for poll queues
      engines/libblkio: Add option libblkio_vectored
      engines/libblkio: Add option libblkio_write_zeroes_on_trim
      engines/libblkio: Add option libblkio_wait_mode
      engines/libblkio: Add option libblkio_force_enable_completion_eventfd
      engines/libblkio: Add options for some driver-specific properties
      engines/libblkio: Share a single blkio instance among threads in same process

 HOWTO.rst                                 |  95 ++++
 Makefile                                  |   6 +
 configure                                 |  25 +
 engines/libblkio.c                        | 914 ++++++++++++++++++++++++++++++
 examples/libblkio-io_uring.fio            |  29 +
 examples/libblkio-virtio-blk-vfio-pci.fio |  29 +
 fio.1                                     |  78 +++
 ioengines.h                               |   2 +
 memory.c                                  |  22 +-
 optgroup.h                                |   2 +
 10 files changed, 1192 insertions(+), 10 deletions(-)
 create mode 100644 engines/libblkio.c
 create mode 100644 examples/libblkio-io_uring.fio
 create mode 100644 examples/libblkio-virtio-blk-vfio-pci.fio

---

Diff of recent changes:

diff --git a/HOWTO.rst b/HOWTO.rst
index 0aaf033a..5a5263c3 100644
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -2195,6 +2195,21 @@ I/O engine
 			the SPDK NVMe driver, or your own custom NVMe driver. The xnvme engine includes
 			engine specific options. (See https://xnvme.io).
 
+		**libblkio**
+			Use the libblkio library
+			(https://gitlab.com/libblkio/libblkio). The specific
+			*driver* to use must be set using
+			:option:`libblkio_driver`. If
+			:option:`mem`/:option:`iomem` is not specified, memory
+			allocation is delegated to libblkio (and so is
+			guaranteed to work with the selected *driver*). One
+			libblkio instance is used per process, so all jobs
+			setting option :option:`thread` will share a single
+			instance (with one queue per thread) and must specify
+			compatible options. Note that some drivers don't allow
+			several instances to access the same device or file
+			simultaneously, but allow it for threads.
+
 I/O engine specific parameters
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -2326,6 +2341,12 @@ with the caveat that when used on the command line, they must come after the
         by the application. The benefits are more efficient IO for high IOPS
         scenarios, and lower latencies for low queue depth IO.
 
+   [libblkio]
+
+	Use poll queues. This is incompatible with
+	:option:`libblkio_wait_mode=eventfd <libblkio_wait_mode>` and
+	:option:`libblkio_force_enable_completion_eventfd`.
+
    [pvsync2]
 
 	Set RWF_HIPRI on I/O, indicating to the kernel that it's of higher priority
@@ -2847,6 +2868,80 @@ with the caveat that when used on the command line, they must come after the
 
 	If this option is set. xnvme will use vectored read/write commands.
 
+.. option:: libblkio_driver=str : [libblkio]
+
+	The libblkio *driver* to use. Different drivers access devices through
+	different underlying interfaces. Available drivers depend on the
+	libblkio version in use and are listed at
+	https://libblkio.gitlab.io/libblkio/blkio.html#drivers
+
+.. option:: libblkio_path=str : [libblkio]
+
+	Sets the value of the driver-specific "path" property before connecting
+	the libblkio instance, which identifies the target device or file on
+	which to perform I/O. Its exact semantics are driver-dependent and not
+	all drivers may support it; see
+	https://libblkio.gitlab.io/libblkio/blkio.html#drivers
+
+.. option:: libblkio_pre_connect_props=str : [libblkio]
+
+	A colon-separated list of additional libblkio properties to be set after
+	creating but before connecting the libblkio instance. Each property must
+	have the format ``<name>=<value>``. Colons can be escaped as ``\:``.
+	These are set after the engine sets any other properties, so those can
+	be overriden. Available properties depend on the libblkio version in use
+	and are listed at
+	https://libblkio.gitlab.io/libblkio/blkio.html#properties
+
+.. option:: libblkio_num_entries=int : [libblkio]
+
+	Sets the value of the driver-specific "num-entries" property before
+	starting the libblkio instance. Its exact semantics are driver-dependent
+	and not all drivers may support it; see
+	https://libblkio.gitlab.io/libblkio/blkio.html#drivers
+
+.. option:: libblkio_queue_size=int : [libblkio]
+
+	Sets the value of the driver-specific "queue-size" property before
+	starting the libblkio instance. Its exact semantics are driver-dependent
+	and not all drivers may support it; see
+	https://libblkio.gitlab.io/libblkio/blkio.html#drivers
+
+.. option:: libblkio_pre_start_props=str : [libblkio]
+
+	A colon-separated list of additional libblkio properties to be set after
+	connecting but before starting the libblkio instance. Each property must
+	have the format ``<name>=<value>``. Colons can be escaped as ``\:``.
+	These are set after the engine sets any other properties, so those can
+	be overriden. Available properties depend on the libblkio version in use
+	and are listed at
+	https://libblkio.gitlab.io/libblkio/blkio.html#properties
+
+.. option:: libblkio_vectored : [libblkio]
+
+	Submit vectored read and write requests.
+
+.. option:: libblkio_write_zeroes_on_trim : [libblkio]
+
+	Submit trims as "write zeroes" requests instead of discard requests.
+
+.. option:: libblkio_wait_mode=str : [libblkio]
+
+	How to wait for completions:
+
+	**block** (default)
+		Use a blocking call to ``blkioq_do_io()``.
+	**eventfd**
+		Use a blocking call to ``read()`` on the completion eventfd.
+	**loop**
+		Use a busy loop with a non-blocking call to ``blkioq_do_io()``.
+
+.. option:: libblkio_force_enable_completion_eventfd : [libblkio]
+
+	Enable the queue's completion eventfd even when unused. This may impact
+	performance. The default is to enable it only if
+	:option:`libblkio_wait_mode=eventfd <libblkio_wait_mode>`.
+
 I/O depth
 ~~~~~~~~~
 
diff --git a/Makefile b/Makefile
index 7bd572d7..9fd8f59b 100644
--- a/Makefile
+++ b/Makefile
@@ -237,6 +237,12 @@ ifdef CONFIG_LIBXNVME
   xnvme_CFLAGS = $(LIBXNVME_CFLAGS)
   ENGINES += xnvme
 endif
+ifdef CONFIG_LIBBLKIO
+  libblkio_SRCS = engines/libblkio.c
+  libblkio_LIBS = $(LIBBLKIO_LIBS)
+  libblkio_CFLAGS = $(LIBBLKIO_CFLAGS)
+  ENGINES += libblkio
+endif
 ifeq ($(CONFIG_TARGET_OS), Linux)
   SOURCE += diskutil.c fifo.c blktrace.c cgroup.c trim.c engines/sg.c \
 		oslib/linux-dev-lookup.c engines/io_uring.c engines/nvme.c
diff --git a/configure b/configure
index 1b12d268..6d8e3a87 100755
--- a/configure
+++ b/configure
@@ -176,6 +176,7 @@ libiscsi="no"
 libnbd="no"
 libnfs=""
 xnvme=""
+libblkio=""
 libzbc=""
 dfs=""
 seed_buckets=""
@@ -248,6 +249,8 @@ for opt do
   ;;
   --disable-xnvme) xnvme="no"
   ;;
+  --disable-libblkio) libblkio="no"
+  ;;
   --disable-tcmalloc) disable_tcmalloc="yes"
   ;;
   --disable-libnfs) libnfs="no"
@@ -304,6 +307,7 @@ if test "$show_help" = "yes" ; then
   echo "--enable-libiscsi       Enable iscsi support"
   echo "--enable-libnbd         Enable libnbd (NBD engine) support"
   echo "--disable-xnvme         Disable xnvme support even if found"
+  echo "--disable-libblkio      Disable libblkio support even if found"
   echo "--disable-libzbc        Disable libzbc even if found"
   echo "--disable-tcmalloc      Disable tcmalloc support"
   echo "--dynamic-libengines    Lib-based ioengines as dynamic libraries"
@@ -2663,6 +2667,22 @@ if test "$xnvme" != "no" ; then
 fi
 print_config "xnvme engine" "$xnvme"
 
+##########################################
+# Check if we have libblkio
+if test "$libblkio" != "no" ; then
+  if check_min_lib_version blkio 1.0.0; then
+    libblkio="yes"
+    libblkio_cflags=$(pkg-config --cflags blkio)
+    libblkio_libs=$(pkg-config --libs blkio)
+  else
+    if test "$libblkio" = "yes" ; then
+      feature_not_found "libblkio" "libblkio-dev or libblkio-devel"
+    fi
+    libblkio="no"
+  fi
+fi
+print_config "libblkio engine" "$libblkio"
+
 ##########################################
 # check march=armv8-a+crc+crypto
 if test "$march_armv8_a_crc_crypto" != "yes" ; then
@@ -3276,6 +3296,11 @@ if test "$xnvme" = "yes" ; then
   echo "LIBXNVME_CFLAGS=$xnvme_cflags" >> $config_host_mak
   echo "LIBXNVME_LIBS=$xnvme_libs" >> $config_host_mak
 fi
+if test "$libblkio" = "yes" ; then
+  output_sym "CONFIG_LIBBLKIO"
+  echo "LIBBLKIO_CFLAGS=$libblkio_cflags" >> $config_host_mak
+  echo "LIBBLKIO_LIBS=$libblkio_libs" >> $config_host_mak
+fi
 if test "$dynamic_engines" = "yes" ; then
   output_sym "CONFIG_DYNAMIC_ENGINES"
 fi
diff --git a/engines/libblkio.c b/engines/libblkio.c
new file mode 100644
index 00000000..054aa800
--- /dev/null
+++ b/engines/libblkio.c
@@ -0,0 +1,914 @@
+/*
+ * libblkio engine
+ *
+ * IO engine using libblkio to access various block I/O interfaces:
+ * https://gitlab.com/libblkio/libblkio
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <blkio.h>
+
+#include "../fio.h"
+#include "../optgroup.h"
+#include "../options.h"
+#include "../parse.h"
+
+/* per-process state */
+static struct {
+	pthread_mutex_t mutex;
+	int initted_threads;
+	int initted_hipri_threads;
+	struct blkio *b;
+} proc_state = { PTHREAD_MUTEX_INITIALIZER, 0, 0, NULL };
+
+static void fio_blkio_proc_lock(void) {
+	int ret;
+	ret = pthread_mutex_lock(&proc_state.mutex);
+	assert(ret == 0);
+}
+
+static void fio_blkio_proc_unlock(void) {
+	int ret;
+	ret = pthread_mutex_unlock(&proc_state.mutex);
+	assert(ret == 0);
+}
+
+/* per-thread state */
+struct fio_blkio_data {
+	struct blkioq *q;
+	int completion_fd; /* may be -1 if not FIO_BLKIO_WAIT_MODE_EVENTFD */
+
+	bool has_mem_region; /* whether mem_region is valid */
+	struct blkio_mem_region mem_region; /* only if allocated by libblkio */
+
+	struct iovec *iovecs; /* for vectored requests */
+	struct blkio_completion *completions;
+};
+
+enum fio_blkio_wait_mode {
+	FIO_BLKIO_WAIT_MODE_BLOCK,
+	FIO_BLKIO_WAIT_MODE_EVENTFD,
+	FIO_BLKIO_WAIT_MODE_LOOP,
+};
+
+struct fio_blkio_options {
+	void *pad; /* option fields must not have offset 0 */
+
+	char *driver;
+
+	char *path;
+	char *pre_connect_props;
+
+	int num_entries;
+	int queue_size;
+	char *pre_start_props;
+
+	unsigned int hipri;
+	unsigned int vectored;
+	unsigned int write_zeroes_on_trim;
+	enum fio_blkio_wait_mode wait_mode;
+	unsigned int force_enable_completion_eventfd;
+};
+
+static struct fio_option options[] = {
+	{
+		.name	= "libblkio_driver",
+		.lname	= "libblkio driver name",
+		.type	= FIO_OPT_STR_STORE,
+		.off1	= offsetof(struct fio_blkio_options, driver),
+		.help	= "Name of the driver to be used by libblkio",
+		.category = FIO_OPT_C_ENGINE,
+		.group	= FIO_OPT_G_LIBBLKIO,
+	},
+	{
+		.name	= "libblkio_path",
+		.lname	= "libblkio \"path\" property",
+		.type	= FIO_OPT_STR_STORE,
+		.off1	= offsetof(struct fio_blkio_options, path),
+		.help	= "Value to set the \"path\" property to",
+		.category = FIO_OPT_C_ENGINE,
+		.group	= FIO_OPT_G_LIBBLKIO,
+	},
+	{
+		.name	= "libblkio_pre_connect_props",
+		.lname	= "Additional properties to be set before blkio_connect()",
+		.type	= FIO_OPT_STR_STORE,
+		.off1	= offsetof(struct fio_blkio_options, pre_connect_props),
+		.help	= "",
+		.category = FIO_OPT_C_ENGINE,
+		.group	= FIO_OPT_G_LIBBLKIO,
+	},
+	{
+		.name	= "libblkio_num_entries",
+		.lname	= "libblkio \"num-entries\" property",
+		.type	= FIO_OPT_INT,
+		.off1	= offsetof(struct fio_blkio_options, num_entries),
+		.help	= "Value to set the \"num-entries\" property to",
+		.minval	= 1,
+		.interval = 1,
+		.category = FIO_OPT_C_ENGINE,
+		.group	= FIO_OPT_G_LIBBLKIO,
+	},
+	{
+		.name	= "libblkio_queue_size",
+		.lname	= "libblkio \"queue-size\" property",
+		.type	= FIO_OPT_INT,
+		.off1	= offsetof(struct fio_blkio_options, queue_size),
+		.help	= "Value to set the \"queue-size\" property to",
+		.minval	= 1,
+		.interval = 1,
+		.category = FIO_OPT_C_ENGINE,
+		.group	= FIO_OPT_G_LIBBLKIO,
+	},
+	{
+		.name	= "libblkio_pre_start_props",
+		.lname	= "Additional properties to be set before blkio_start()",
+		.type	= FIO_OPT_STR_STORE,
+		.off1	= offsetof(struct fio_blkio_options, pre_start_props),
+		.help	= "",
+		.category = FIO_OPT_C_ENGINE,
+		.group	= FIO_OPT_G_LIBBLKIO,
+	},
+	{
+		.name	= "hipri",
+		.lname	= "Use poll queues",
+		.type	= FIO_OPT_STR_SET,
+		.off1	= offsetof(struct fio_blkio_options, hipri),
+		.help	= "Use poll queues",
+		.category = FIO_OPT_C_ENGINE,
+		.group	= FIO_OPT_G_LIBBLKIO,
+	},
+	{
+		.name	= "libblkio_vectored",
+		.lname	= "Use blkioq_{readv,writev}()",
+		.type	= FIO_OPT_STR_SET,
+		.off1	= offsetof(struct fio_blkio_options, vectored),
+		.help	= "Use blkioq_{readv,writev}() instead of blkioq_{read,write}()",
+		.category = FIO_OPT_C_ENGINE,
+		.group	= FIO_OPT_G_LIBBLKIO,
+	},
+	{
+		.name	= "libblkio_write_zeroes_on_trim",
+		.lname	= "Use blkioq_write_zeroes() for TRIM",
+		.type	= FIO_OPT_STR_SET,
+		.off1	= offsetof(struct fio_blkio_options,
+				   write_zeroes_on_trim),
+		.help	= "Use blkioq_write_zeroes() for TRIM instead of blkioq_discard()",
+		.category = FIO_OPT_C_ENGINE,
+		.group	= FIO_OPT_G_LIBBLKIO,
+	},
+	{
+		.name	= "libblkio_wait_mode",
+		.lname	= "How to wait for completions",
+		.type	= FIO_OPT_STR,
+		.off1	= offsetof(struct fio_blkio_options, wait_mode),
+		.help	= "How to wait for completions",
+		.def	= "block",
+		.posval = {
+			  { .ival = "block",
+			    .oval = FIO_BLKIO_WAIT_MODE_BLOCK,
+			    .help = "Blocking blkioq_do_io()",
+			  },
+			  { .ival = "eventfd",
+			    .oval = FIO_BLKIO_WAIT_MODE_EVENTFD,
+			    .help = "Blocking read() on the completion eventfd",
+			  },
+			  { .ival = "loop",
+			    .oval = FIO_BLKIO_WAIT_MODE_LOOP,
+			    .help = "Busy loop with non-blocking blkioq_do_io()",
+			  },
+		},
+		.category = FIO_OPT_C_ENGINE,
+		.group	= FIO_OPT_G_LIBBLKIO,
+	},
+	{
+		.name	= "libblkio_force_enable_completion_eventfd",
+		.lname	= "Force enable the completion eventfd, even if unused",
+		.type	= FIO_OPT_STR_SET,
+		.off1	= offsetof(struct fio_blkio_options,
+				   force_enable_completion_eventfd),
+		.help	= "This can impact performance",
+		.category = FIO_OPT_C_ENGINE,
+		.group	= FIO_OPT_G_LIBBLKIO,
+	},
+	{
+		.name = NULL,
+	},
+};
+
+static int fio_blkio_set_props_from_str(struct blkio *b, const char *opt_name,
+					const char *str) {
+	int ret = 0;
+	char *new_str, *name, *value;
+
+	if (!str)
+		return 0;
+
+	/* iteration can mutate string, so copy it */
+	new_str = strdup(str);
+	if (!new_str) {
+		log_err("fio: strdup() failed\n");
+		return 1;
+	}
+
+	/* iterate over property name-value pairs */
+	while ((name = get_next_str(&new_str))) {
+		/* split into property name and value */
+		value = strchr(name, '=');
+		if (!value) {
+			log_err("fio: missing '=' in option %s\n", opt_name);
+			ret = 1;
+			break;
+		}
+
+		*value = '\0';
+		++value;
+
+		/* strip whitespace from property name */
+		strip_blank_front(&name);
+		strip_blank_end(name);
+
+		if (name[0] == '\0') {
+			log_err("fio: empty property name in option %s\n",
+				opt_name);
+			ret = 1;
+			break;
+		}
+
+		/* strip whitespace from property value */
+		strip_blank_front(&value);
+		strip_blank_end(value);
+
+		/* set property */
+		if (blkio_set_str(b, name, value) != 0) {
+			log_err("fio: error setting property '%s' to '%s': %s\n",
+				name, value, blkio_get_error_msg());
+			ret = 1;
+			break;
+		}
+	}
+
+	free(new_str);
+	return ret;
+}
+
+/*
+ * Log the failure of a libblkio function.
+ *
+ * `(void)func` is to ensure `func` exists and prevent typos
+ */
+#define fio_blkio_log_err(func) \
+	({ \
+		(void)func; \
+		log_err("fio: %s() failed: %s\n", #func, \
+			blkio_get_error_msg()); \
+	})
+
+static bool possibly_null_strs_equal(const char *a, const char *b)
+{
+	return (!a && !b) || (a && b && strcmp(a, b) == 0);
+}
+
+/*
+ * Returns the total number of subjobs using the 'libblkio' ioengine and setting
+ * the 'thread' option in the entire workload that have the given value for the
+ * 'hipri' option.
+ */
+static int total_threaded_subjobs(bool hipri)
+{
+	struct thread_data *td;
+	unsigned int i;
+	int count = 0;
+
+	for_each_td(td, i) {
+		const struct fio_blkio_options *options = td->eo;
+		if (strcmp(td->o.ioengine, "libblkio") == 0 &&
+		    td->o.use_thread && (bool)options->hipri == hipri)
+			++count;
+	}
+
+	return count;
+}
+
+static struct {
+	bool set_up;
+	bool direct;
+	struct fio_blkio_options opts;
+} first_threaded_subjob = { 0 };
+
+static void fio_blkio_log_opt_compat_err(const char *option_name)
+{
+	log_err("fio: jobs using engine libblkio and sharing a process must agree on the %s option\n",
+		option_name);
+}
+
+/*
+ * If td represents a subjob with option 'thread', check if its options are
+ * compatible with those of other threaded subjobs that were already set up.
+ */
+static int fio_blkio_check_opt_compat(struct thread_data *td)
+{
+	const struct fio_blkio_options *options = td->eo, *prev_options;
+
+	if (!td->o.use_thread)
+		return 0; /* subjob doesn't use 'thread' */
+
+	if (!first_threaded_subjob.set_up) {
+		/* first subjob using 'thread', store options for later */
+		first_threaded_subjob.set_up	= true;
+		first_threaded_subjob.direct	= td->o.odirect;
+		first_threaded_subjob.opts	= *options;
+		return 0;
+	}
+
+	/* not first subjob using 'thread', check option compatibility */
+	prev_options = &first_threaded_subjob.opts;
+
+	if (td->o.odirect != first_threaded_subjob.direct) {
+		fio_blkio_log_opt_compat_err("direct/buffered");
+		return 1;
+	}
+
+	if (strcmp(options->driver, prev_options->driver) != 0) {
+		fio_blkio_log_opt_compat_err("libblkio_driver");
+		return 1;
+	}
+
+	if (!possibly_null_strs_equal(options->path, prev_options->path)) {
+		fio_blkio_log_opt_compat_err("libblkio_path");
+		return 1;
+	}
+
+	if (!possibly_null_strs_equal(options->pre_connect_props,
+				      prev_options->pre_connect_props)) {
+		fio_blkio_log_opt_compat_err("libblkio_pre_connect_props");
+		return 1;
+	}
+
+	if (options->num_entries != prev_options->num_entries) {
+		fio_blkio_log_opt_compat_err("libblkio_num_entries");
+		return 1;
+	}
+
+	if (options->queue_size != prev_options->queue_size) {
+		fio_blkio_log_opt_compat_err("libblkio_queue_size");
+		return 1;
+	}
+
+	if (!possibly_null_strs_equal(options->pre_start_props,
+				      prev_options->pre_start_props)) {
+		fio_blkio_log_opt_compat_err("libblkio_pre_start_props");
+		return 1;
+	}
+
+	return 0;
+}
+
+static int fio_blkio_create_and_connect(struct thread_data *td,
+					struct blkio **out_blkio)
+{
+	const struct fio_blkio_options *options = td->eo;
+	struct blkio *b;
+	int ret;
+
+	if (!options->driver) {
+		log_err("fio: engine libblkio requires option libblkio_driver to be set\n");
+		return 1;
+	}
+
+	if (blkio_create(options->driver, &b) != 0) {
+		fio_blkio_log_err(blkio_create);
+		return 1;
+	}
+
+	/* don't fail if driver doesn't have a "direct" property */
+	ret = blkio_set_bool(b, "direct", td->o.odirect);
+	if (ret != 0 && ret != -ENOENT) {
+		fio_blkio_log_err(blkio_set_bool);
+		goto err_blkio_destroy;
+	}
+
+	if (blkio_set_bool(b, "read-only", read_only) != 0) {
+		fio_blkio_log_err(blkio_set_bool);
+		goto err_blkio_destroy;
+	}
+
+	if (options->path) {
+		if (blkio_set_str(b, "path", options->path) != 0) {
+			fio_blkio_log_err(blkio_set_str);
+			goto err_blkio_destroy;
+		}
+	}
+
+	if (fio_blkio_set_props_from_str(b, "libblkio_pre_connect_props",
+					 options->pre_connect_props) != 0)
+		goto err_blkio_destroy;
+
+	if (blkio_connect(b) != 0) {
+		fio_blkio_log_err(blkio_connect);
+		goto err_blkio_destroy;
+	}
+
+	if (options->num_entries != 0) {
+		if (blkio_set_int(b, "num-entries",
+				  options->num_entries) != 0) {
+			fio_blkio_log_err(blkio_set_int);
+			goto err_blkio_destroy;
+		}
+	}
+
+	if (options->queue_size != 0) {
+		if (blkio_set_int(b, "queue-size", options->queue_size) != 0) {
+			fio_blkio_log_err(blkio_set_int);
+			goto err_blkio_destroy;
+		}
+	}
+
+	if (fio_blkio_set_props_from_str(b, "libblkio_pre_start_props",
+					 options->pre_start_props) != 0)
+		goto err_blkio_destroy;
+
+	*out_blkio = b;
+	return 0;
+
+err_blkio_destroy:
+	blkio_destroy(&b);
+	return 1;
+}
+
+static bool incompatible_threaded_subjob_options = false;
+
+/*
+ * This callback determines the device/file size, so it creates and connects a
+ * blkio instance. But it is invoked from the main thread in the original fio
+ * process, not from the processes in which jobs will actually run. It thus
+ * subsequently destroys the blkio, which is recreated in the init() callback.
+ */
+static int fio_blkio_setup(struct thread_data *td)
+{
+	const struct fio_blkio_options *options = td->eo;
+	struct blkio *b;
+	int ret = 0;
+	uint64_t capacity;
+
+	assert(td->files_index == 1);
+
+	if (fio_blkio_check_opt_compat(td) != 0) {
+		incompatible_threaded_subjob_options = true;
+		return 1;
+	}
+
+	if (options->hipri &&
+		options->wait_mode == FIO_BLKIO_WAIT_MODE_EVENTFD) {
+		log_err("fio: option hipri is incompatible with option libblkio_wait_mode=eventfd\n");
+		return 1;
+	}
+
+	if (options->hipri && options->force_enable_completion_eventfd) {
+		log_err("fio: option hipri is incompatible with option libblkio_force_enable_completion_eventfd\n");
+		return 1;
+	}
+
+	if (fio_blkio_create_and_connect(td, &b) != 0)
+		return 1;
+
+	if (blkio_get_uint64(b, "capacity", &capacity) != 0) {
+		fio_blkio_log_err(blkio_get_uint64);
+		ret = 1;
+		goto out_blkio_destroy;
+	}
+
+	td->files[0]->real_file_size = capacity;
+	fio_file_set_size_known(td->files[0]);
+
+out_blkio_destroy:
+	blkio_destroy(&b);
+	return ret;
+}
+
+static int fio_blkio_init(struct thread_data *td)
+{
+	const struct fio_blkio_options *options = td->eo;
+	struct fio_blkio_data *data;
+	int flags;
+
+	if (td->o.use_thread && incompatible_threaded_subjob_options) {
+		/*
+		 * Different subjobs using option 'thread' specified
+		 * incompatible options. We don't know which configuration
+		 * should win, so we just fail all such subjobs.
+		 */
+		return 1;
+	}
+
+	/*
+	 * Request enqueueing is fast, and it's not possible to know exactly
+	 * when a request is submitted, so never report submission latencies.
+	 */
+	td->o.disable_slat = 1;
+
+	data = calloc(1, sizeof(*data));
+	if (!data) {
+		log_err("fio: calloc() failed\n");
+		return 1;
+	}
+
+	data->iovecs = calloc(td->o.iodepth, sizeof(data->iovecs[0]));
+	data->completions = calloc(td->o.iodepth, sizeof(data->completions[0]));
+	if (!data->iovecs || !data->completions) {
+		log_err("fio: calloc() failed\n");
+		goto err_free;
+	}
+
+	fio_blkio_proc_lock();
+
+	if (proc_state.initted_threads == 0) {
+		/* initialize per-process blkio */
+		int num_queues, num_poll_queues;
+
+		if (td->o.use_thread) {
+			num_queues 	= total_threaded_subjobs(false);
+			num_poll_queues = total_threaded_subjobs(true);
+		} else {
+			num_queues 	= options->hipri ? 0 : 1;
+			num_poll_queues = options->hipri ? 1 : 0;
+		}
+
+		if (fio_blkio_create_and_connect(td, &proc_state.b) != 0)
+			goto err_unlock;
+
+		if (blkio_set_int(proc_state.b, "num-queues",
+				  num_queues) != 0) {
+			fio_blkio_log_err(blkio_set_int);
+			goto err_blkio_destroy;
+		}
+
+		if (blkio_set_int(proc_state.b, "num-poll-queues",
+				  num_poll_queues) != 0) {
+			fio_blkio_log_err(blkio_set_int);
+			goto err_blkio_destroy;
+		}
+
+		if (blkio_start(proc_state.b) != 0) {
+			fio_blkio_log_err(blkio_start);
+			goto err_blkio_destroy;
+		}
+	}
+
+	if (options->hipri) {
+		int i = proc_state.initted_hipri_threads;
+		data->q = blkio_get_poll_queue(proc_state.b, i);
+	} else {
+		int i = proc_state.initted_threads -
+				proc_state.initted_hipri_threads;
+		data->q = blkio_get_queue(proc_state.b, i);
+	}
+
+	if (options->wait_mode == FIO_BLKIO_WAIT_MODE_EVENTFD ||
+		options->force_enable_completion_eventfd) {
+		/* enable completion fd and make it blocking */
+		blkioq_set_completion_fd_enabled(data->q, true);
+		data->completion_fd = blkioq_get_completion_fd(data->q);
+
+		flags = fcntl(data->completion_fd, F_GETFL);
+		if (flags < 0) {
+			log_err("fio: fcntl(F_GETFL) failed: %s\n",
+				strerror(errno));
+			goto err_blkio_destroy;
+		}
+
+		if (fcntl(data->completion_fd, F_SETFL,
+			  flags & ~O_NONBLOCK) != 0) {
+			log_err("fio: fcntl(F_SETFL) failed: %s\n",
+				strerror(errno));
+			goto err_blkio_destroy;
+		}
+	} else {
+		data->completion_fd = -1;
+	}
+
+	++proc_state.initted_threads;
+	if (options->hipri)
+		++proc_state.initted_hipri_threads;
+
+	/* Set data last so cleanup() does nothing if init() fails. */
+	td->io_ops_data = data;
+
+	fio_blkio_proc_unlock();
+
+	return 0;
+
+err_blkio_destroy:
+	if (proc_state.initted_threads == 0)
+		blkio_destroy(&proc_state.b);
+err_unlock:
+	if (proc_state.initted_threads == 0)
+		proc_state.b = NULL;
+	fio_blkio_proc_unlock();
+err_free:
+	free(data->completions);
+	free(data->iovecs);
+	free(data);
+	return 1;
+}
+
+static int fio_blkio_post_init(struct thread_data *td)
+{
+	struct fio_blkio_data *data = td->io_ops_data;
+
+	if (!data->has_mem_region) {
+		/*
+		 * Memory was allocated by the fio core and not iomem_alloc(),
+		 * so we need to register it as a memory region here.
+		 *
+		 * `td->orig_buffer_size` is computed like `len` below, but then
+		 * fio can add some padding to it to make sure it is
+		 * sufficiently aligned to the page size and the mem_align
+		 * option. However, this can make it become unaligned to the
+		 * "mem-region-alignment" property in ways that the user can't
+		 * control, so we essentially recompute `td->orig_buffer_size`
+		 * here but without adding that padding.
+		 */
+
+		unsigned long long max_block_size;
+		struct blkio_mem_region region;
+
+		max_block_size = max(td->o.max_bs[DDIR_READ],
+				     max(td->o.max_bs[DDIR_WRITE],
+					 td->o.max_bs[DDIR_TRIM]));
+
+		region = (struct blkio_mem_region) {
+			.addr	= td->orig_buffer,
+			.len	= (size_t)max_block_size *
+					(size_t)td->o.iodepth,
+			.fd	= -1,
+		};
+
+		if (blkio_map_mem_region(proc_state.b, &region) != 0) {
+			fio_blkio_log_err(blkio_map_mem_region);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static void fio_blkio_cleanup(struct thread_data *td)
+{
+	struct fio_blkio_data *data = td->io_ops_data;
+
+	/*
+	 * Subjobs from different jobs can be terminated at different times, so
+	 * this callback may be invoked for one subjob while another is still
+	 * doing I/O. Those subjobs may share the process, so we must wait until
+	 * the last subjob in the process wants to clean up to actually destroy
+	 * the blkio.
+	 */
+
+	if (data) {
+		free(data->completions);
+		free(data->iovecs);
+		free(data);
+
+		fio_blkio_proc_lock();
+		if (--proc_state.initted_threads == 0) {
+			blkio_destroy(&proc_state.b);
+			proc_state.b = NULL;
+		}
+		fio_blkio_proc_unlock();
+	}
+}
+
+#define align_up(x, y) ((((x) + (y) - 1) / (y)) * (y))
+
+static int fio_blkio_iomem_alloc(struct thread_data *td, size_t size)
+{
+	struct fio_blkio_data *data = td->io_ops_data;
+	int ret;
+	uint64_t mem_region_alignment;
+
+	if (blkio_get_uint64(proc_state.b, "mem-region-alignment",
+			     &mem_region_alignment) != 0) {
+		fio_blkio_log_err(blkio_get_uint64);
+		return 1;
+	}
+
+	/* round up size to satisfy mem-region-alignment */
+	size = align_up(size, (size_t)mem_region_alignment);
+
+	fio_blkio_proc_lock();
+
+	if (blkio_alloc_mem_region(proc_state.b, &data->mem_region,
+				   size) != 0) {
+		fio_blkio_log_err(blkio_alloc_mem_region);
+		ret = 1;
+		goto out;
+	}
+
+	if (blkio_map_mem_region(proc_state.b, &data->mem_region) != 0) {
+		fio_blkio_log_err(blkio_map_mem_region);
+		ret = 1;
+		goto out_free;
+	}
+
+	td->orig_buffer = data->mem_region.addr;
+	data->has_mem_region = true;
+
+	ret = 0;
+	goto out;
+
+out_free:
+	blkio_free_mem_region(proc_state.b, &data->mem_region);
+out:
+	fio_blkio_proc_unlock();
+	return ret;
+}
+
+static void fio_blkio_iomem_free(struct thread_data *td)
+{
+	struct fio_blkio_data *data = td->io_ops_data;
+
+	if (data && data->has_mem_region) {
+		fio_blkio_proc_lock();
+		blkio_unmap_mem_region(proc_state.b, &data->mem_region);
+		blkio_free_mem_region(proc_state.b, &data->mem_region);
+		fio_blkio_proc_unlock();
+
+		data->has_mem_region = false;
+	}
+}
+
+static int fio_blkio_open_file(struct thread_data *td, struct fio_file *f)
+{
+	return 0;
+}
+
+static enum fio_q_status fio_blkio_queue(struct thread_data *td,
+					 struct io_u *io_u)
+{
+	const struct fio_blkio_options *options = td->eo;
+	struct fio_blkio_data *data = td->io_ops_data;
+
+	fio_ro_check(td, io_u);
+
+	switch (io_u->ddir) {
+		case DDIR_READ:
+			if (options->vectored) {
+				struct iovec *iov = &data->iovecs[io_u->index];
+				iov->iov_base = io_u->xfer_buf;
+				iov->iov_len = (size_t)io_u->xfer_buflen;
+
+				blkioq_readv(data->q, io_u->offset, iov, 1,
+					     io_u, 0);
+			} else {
+				blkioq_read(data->q, io_u->offset,
+					    io_u->xfer_buf,
+					    (size_t)io_u->xfer_buflen, io_u, 0);
+			}
+			break;
+		case DDIR_WRITE:
+			if (options->vectored) {
+				struct iovec *iov = &data->iovecs[io_u->index];
+				iov->iov_base = io_u->xfer_buf;
+				iov->iov_len = (size_t)io_u->xfer_buflen;
+
+				blkioq_writev(data->q, io_u->offset, iov, 1,
+					      io_u, 0);
+			} else {
+				blkioq_write(data->q, io_u->offset,
+					     io_u->xfer_buf,
+					     (size_t)io_u->xfer_buflen, io_u,
+					     0);
+			}
+			break;
+		case DDIR_TRIM:
+			if (options->write_zeroes_on_trim) {
+				blkioq_write_zeroes(data->q, io_u->offset,
+						    io_u->xfer_buflen, io_u, 0);
+			} else {
+				blkioq_discard(data->q, io_u->offset,
+					       io_u->xfer_buflen, io_u, 0);
+			}
+		        break;
+		case DDIR_SYNC:
+		case DDIR_DATASYNC:
+			blkioq_flush(data->q, io_u, 0);
+			break;
+		default:
+			io_u->error = ENOTSUP;
+			io_u_log_error(td, io_u);
+			return FIO_Q_COMPLETED;
+	}
+
+	return FIO_Q_QUEUED;
+}
+
+static int fio_blkio_getevents(struct thread_data *td, unsigned int min,
+			       unsigned int max, const struct timespec *t)
+{
+	const struct fio_blkio_options *options = td->eo;
+	struct fio_blkio_data *data = td->io_ops_data;
+	int ret, n;
+	uint64_t event;
+
+	switch (options->wait_mode) {
+	case FIO_BLKIO_WAIT_MODE_BLOCK:
+		n = blkioq_do_io(data->q, data->completions, (int)min, (int)max,
+				 NULL);
+		if (n < 0) {
+			fio_blkio_log_err(blkioq_do_io);
+			return -1;
+		}
+		return n;
+	case FIO_BLKIO_WAIT_MODE_EVENTFD:
+		n = blkioq_do_io(data->q, data->completions, 0, (int)max, NULL);
+		if (n < 0) {
+			fio_blkio_log_err(blkioq_do_io);
+			return -1;
+		}
+		while (n < (int)min) {
+			ret = read(data->completion_fd, &event, sizeof(event));
+			if (ret != sizeof(event)) {
+				log_err("fio: read() on the completion fd returned %d\n",
+					ret);
+				return -1;
+			}
+
+			ret = blkioq_do_io(data->q, data->completions + n, 0,
+					   (int)max - n, NULL);
+			if (ret < 0) {
+				fio_blkio_log_err(blkioq_do_io);
+				return -1;
+			}
+
+			n += ret;
+		}
+		return n;
+	case FIO_BLKIO_WAIT_MODE_LOOP:
+		for (n = 0; n < (int)min; ) {
+			ret = blkioq_do_io(data->q, data->completions + n, 0,
+					   (int)max - n, NULL);
+			if (ret < 0) {
+				fio_blkio_log_err(blkioq_do_io);
+				return -1;
+			}
+
+			n += ret;
+		}
+		return n;
+	default:
+		return -1;
+	}
+}
+
+static struct io_u *fio_blkio_event(struct thread_data *td, int event)
+{
+	struct fio_blkio_data *data = td->io_ops_data;
+	struct blkio_completion *completion = &data->completions[event];
+	struct io_u *io_u = completion->user_data;
+
+	io_u->error = -completion->ret;
+
+	return io_u;
+}
+
+FIO_STATIC struct ioengine_ops ioengine = {
+	.name			= "libblkio",
+	.version		= FIO_IOOPS_VERSION,
+	.flags			= FIO_DISKLESSIO | FIO_NOEXTEND |
+				  FIO_NO_OFFLOAD | FIO_SKIPPABLE_IOMEM_ALLOC,
+
+	.setup			= fio_blkio_setup,
+	.init			= fio_blkio_init,
+	.post_init		= fio_blkio_post_init,
+	.cleanup		= fio_blkio_cleanup,
+
+	.iomem_alloc		= fio_blkio_iomem_alloc,
+	.iomem_free		= fio_blkio_iomem_free,
+
+	.open_file		= fio_blkio_open_file,
+
+	.queue			= fio_blkio_queue,
+	.getevents		= fio_blkio_getevents,
+	.event			= fio_blkio_event,
+
+	.options		= options,
+	.option_struct_size	= sizeof(struct fio_blkio_options),
+};
+
+static void fio_init fio_blkio_register(void)
+{
+	register_ioengine(&ioengine);
+}
+
+static void fio_exit fio_blkio_unregister(void)
+{
+	unregister_ioengine(&ioengine);
+}
diff --git a/examples/libblkio-io_uring.fio b/examples/libblkio-io_uring.fio
new file mode 100644
index 00000000..40f625cf
--- /dev/null
+++ b/examples/libblkio-io_uring.fio
@@ -0,0 +1,29 @@
+; Benchmark accessing a regular file or block device using libblkio.
+;
+; Replace "/dev/nvme0n1" below with the path to your file or device, or override
+; it by passing the '--libblkio_path=...' flag to fio.
+;
+; In the example below, the two subjobs of "job-B" *and* the single subjob of
+; "job-C" will share a single libblkio instance, and "job-A" will use a separate
+; libblkio instance.
+;
+; For information on libblkio, see: https://gitlab.com/libblkio/libblkio
+
+[global]
+ioengine=libblkio
+libblkio_driver=io_uring
+libblkio_path=/dev/nvme0n1  ; REPLACE THIS WITH THE RIGHT PATH
+rw=randread
+blocksize=4k
+direct=1
+time_based=1
+runtime=10s
+
+[job-A]
+
+[job-B]
+numjobs=2  ; run two copies of this job simultaneously
+thread=1   ; have each copy run as a separate thread in the *same* process
+
+[job-C]
+thread=1  ; have the job run as a thread in the *same* process as "job-B"
diff --git a/examples/libblkio-virtio-blk-vfio-pci.fio b/examples/libblkio-virtio-blk-vfio-pci.fio
new file mode 100644
index 00000000..024224a6
--- /dev/null
+++ b/examples/libblkio-virtio-blk-vfio-pci.fio
@@ -0,0 +1,29 @@
+; Benchmark accessing a PCI virtio-blk device using libblkio.
+;
+; Replace "/sys/bus/pci/devices/0000:00:01.0" below with the path to your
+; device's sysfs directory, or override it by passing the '--libblkio_path=...'
+; flag to fio.
+;
+; In the example below, the two subjobs of "job-B" *and* the single subjob of
+; "job-C" will share a single libblkio instance, and "job-A" will use a separate
+; libblkio instance.
+;
+; For information on libblkio, see: https://gitlab.com/libblkio/libblkio
+
+[global]
+ioengine=libblkio
+libblkio_driver=virtio-blk-vfio-pci
+libblkio_path=/sys/bus/pci/devices/0000:00:01.0  ; REPLACE THIS WITH THE RIGHT PATH
+rw=randread
+blocksize=4k
+time_based=1
+runtime=10s
+
+[job-A]
+
+[job-B]
+numjobs=2  ; run two copies of this job simultaneously
+thread=1   ; have each copy run as a separate thread in the *same* process
+
+[job-C]
+thread=1  ; have the job run as a thread in the *same* process as "job-B"
diff --git a/fio.1 b/fio.1
index 62af0bd2..7a153731 100644
--- a/fio.1
+++ b/fio.1
@@ -1992,6 +1992,16 @@ I/O engine using the xNVMe C API, for NVMe devices. The xnvme engine provides
 flexibility to access GNU/Linux Kernel NVMe driver via libaio, IOCTLs, io_uring,
 the SPDK NVMe driver, or your own custom NVMe driver. The xnvme engine includes
 engine specific options. (See \fIhttps://xnvme.io/\fR).
+.TP
+.B libblkio
+Use the libblkio library (\fIhttps://gitlab.com/libblkio/libblkio\fR). The
+specific driver to use must be set using \fBlibblkio_driver\fR. If
+\fBmem\fR/\fBiomem\fR is not specified, memory allocation is delegated to
+libblkio (and so is guaranteed to work with the selected driver). One libblkio
+instance is used per process, so all jobs setting option \fBthread\fR will share
+a single instance (with one queue per thread) and must specify compatible
+options. Note that some drivers don't allow several instances to access the same
+device or file simultaneously, but allow it for threads.
 .SS "I/O engine specific parameters"
 In addition, there are some parameters which are only valid when a specific
 \fBioengine\fR is in use. These are used identically to normal parameters,
@@ -2604,6 +2614,74 @@ xnvme namespace identifier for userspace NVMe driver such as SPDK.
 .TP
 .BI (xnvme)xnvme_iovec
 If this option is set, xnvme will use vectored read/write commands.
+.TP
+.BI (libblkio)libblkio_driver \fR=\fPstr
+The libblkio driver to use. Different drivers access devices through different
+underlying interfaces. Available drivers depend on the libblkio version in use
+and are listed at \fIhttps://libblkio.gitlab.io/libblkio/blkio.html#drivers\fR
+.TP
+.BI (libblkio)libblkio_path \fR=\fPstr
+Sets the value of the driver-specific "path" property before connecting the
+libblkio instance, which identifies the target device or file on which to
+perform I/O. Its exact semantics are driver-dependent and not all drivers may
+support it; see \fIhttps://libblkio.gitlab.io/libblkio/blkio.html#drivers\fR
+.TP
+.BI (libblkio)libblkio_pre_connect_props \fR=\fPstr
+A colon-separated list of additional libblkio properties to be set after
+creating but before connecting the libblkio instance. Each property must have
+the format \fB<name>=<value>\fR. Colons can be escaped as \fB\\:\fR. These are
+set after the engine sets any other properties, so those can be overriden.
+Available properties depend on the libblkio version in use and are listed at
+\fIhttps://libblkio.gitlab.io/libblkio/blkio.html#properties\fR
+.TP
+.BI (libblkio)libblkio_num_entries \fR=\fPint
+Sets the value of the driver-specific "num-entries" property before starting the
+libblkio instance. Its exact semantics are driver-dependent and not all drivers
+may support it; see \fIhttps://libblkio.gitlab.io/libblkio/blkio.html#drivers\fR
+.TP
+.BI (libblkio)libblkio_queue_size \fR=\fPint
+Sets the value of the driver-specific "queue-size" property before starting the
+libblkio instance. Its exact semantics are driver-dependent and not all drivers
+may support it; see \fIhttps://libblkio.gitlab.io/libblkio/blkio.html#drivers\fR
+.TP
+.BI (libblkio)libblkio_pre_start_props \fR=\fPstr
+A colon-separated list of additional libblkio properties to be set after
+connecting but before starting the libblkio instance. Each property must have
+the format \fB<name>=<value>\fR. Colons can be escaped as \fB\\:\fR. These are
+set after the engine sets any other properties, so those can be overriden.
+Available properties depend on the libblkio version in use and are listed at
+\fIhttps://libblkio.gitlab.io/libblkio/blkio.html#properties\fR
+.TP
+.BI (libblkio)hipri
+Use poll queues. This is incompatible with \fBlibblkio_wait_mode=eventfd\fR and
+\fBlibblkio_force_enable_completion_eventfd\fR.
+.TP
+.BI (libblkio)libblkio_vectored
+Submit vectored read and write requests.
+.TP
+.BI (libblkio)libblkio_write_zeroes_on_trim
+Submit trims as "write zeroes" requests instead of discard requests.
+.TP
+.BI (libblkio)libblkio_wait_mode \fR=\fPstr
+How to wait for completions:
+.RS
+.RS
+.TP
+.B block \fR(default)
+Use a blocking call to \fBblkioq_do_io()\fR.
+.TP
+.B eventfd
+Use a blocking call to \fBread()\fR on the completion eventfd.
+.TP
+.B loop
+Use a busy loop with a non-blocking call to \fBblkioq_do_io()\fR.
+.RE
+.RE
+.TP
+.BI (libblkio)libblkio_force_enable_completion_eventfd
+Enable the queue's completion eventfd even when unused. This may impact
+performance. The default is to enable it only if
+\fBlibblkio_wait_mode=eventfd\fR.
 .SS "I/O depth"
 .TP
 .BI iodepth \fR=\fPint
diff --git a/ioengines.h b/ioengines.h
index 11d2115c..d43540d0 100644
--- a/ioengines.h
+++ b/ioengines.h
@@ -87,6 +87,8 @@ enum fio_ioengine_flags {
 	FIO_NO_OFFLOAD	= 1 << 15,	/* no async offload */
 	FIO_ASYNCIO_SETS_ISSUE_TIME
 			= 1 << 16,	/* async ioengine with commit function that sets issue_time */
+	FIO_SKIPPABLE_IOMEM_ALLOC
+			= 1 << 17,	/* skip iomem_alloc & iomem_free if job sets mem/iomem */
 };
 
 /*
diff --git a/memory.c b/memory.c
index 6cf73333..577d3dd5 100644
--- a/memory.c
+++ b/memory.c
@@ -305,16 +305,18 @@ int allocate_io_mem(struct thread_data *td)
 	dprint(FD_MEM, "Alloc %llu for buffers\n", (unsigned long long) total_mem);
 
 	/*
-	 * If the IO engine has hooks to allocate/free memory, use those. But
-	 * error out if the user explicitly asked for something else.
+	 * If the IO engine has hooks to allocate/free memory and the user
+	 * doesn't explicitly ask for something else, use those. But fail if the
+	 * user asks for something else with an engine that doesn't allow that.
 	 */
-	if (td->io_ops->iomem_alloc) {
-		if (fio_option_is_set(&td->o, mem_type)) {
-			log_err("fio: option 'mem/iomem' conflicts with specified IO engine\n");
-			ret = 1;
-		} else
-			ret = td->io_ops->iomem_alloc(td, total_mem);
-	} else if (td->o.mem_type == MEM_MALLOC)
+	if (td->io_ops->iomem_alloc && fio_option_is_set(&td->o, mem_type) &&
+	    !td_ioengine_flagged(td, FIO_SKIPPABLE_IOMEM_ALLOC)) {
+		log_err("fio: option 'mem/iomem' conflicts with specified IO engine\n");
+		ret = 1;
+	} else if (td->io_ops->iomem_alloc &&
+		   !fio_option_is_set(&td->o, mem_type))
+		ret = td->io_ops->iomem_alloc(td, total_mem);
+	else if (td->o.mem_type == MEM_MALLOC)
 		ret = alloc_mem_malloc(td, total_mem);
 	else if (td->o.mem_type == MEM_SHM || td->o.mem_type == MEM_SHMHUGE)
 		ret = alloc_mem_shm(td, total_mem);
@@ -342,7 +344,7 @@ void free_io_mem(struct thread_data *td)
 	if (td->o.odirect || td->o.oatomic)
 		total_mem += page_mask;
 
-	if (td->io_ops->iomem_alloc) {
+	if (td->io_ops->iomem_alloc && !fio_option_is_set(&td->o, mem_type)) {
 		if (td->io_ops->iomem_free)
 			td->io_ops->iomem_free(td);
 	} else if (td->o.mem_type == MEM_MALLOC)
diff --git a/optgroup.h b/optgroup.h
index dc73c8f3..024b902f 100644
--- a/optgroup.h
+++ b/optgroup.h
@@ -73,6 +73,7 @@ enum opt_category_group {
 	__FIO_OPT_G_NFS,
 	__FIO_OPT_G_WINDOWSAIO,
 	__FIO_OPT_G_XNVME,
+	__FIO_OPT_G_LIBBLKIO,
 
 	FIO_OPT_G_RATE		= (1ULL << __FIO_OPT_G_RATE),
 	FIO_OPT_G_ZONE		= (1ULL << __FIO_OPT_G_ZONE),
@@ -120,6 +121,7 @@ enum opt_category_group {
 	FIO_OPT_G_DFS		= (1ULL << __FIO_OPT_G_DFS),
 	FIO_OPT_G_WINDOWSAIO	= (1ULL << __FIO_OPT_G_WINDOWSAIO),
 	FIO_OPT_G_XNVME         = (1ULL << __FIO_OPT_G_XNVME),
+	FIO_OPT_G_LIBBLKIO	= (1ULL << __FIO_OPT_G_LIBBLKIO),
 };
 
 extern const struct opt_group *opt_group_from_mask(uint64_t *mask);

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-12-02 13:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-12-02 13:00 UTC (permalink / raw)
  To: fio

The following changes since commit 6d8fe6e847bb43cf7db5eee4cf58fd490f12be47:

  backend: respect return value of init_io_u_buffers (2022-11-30 19:58:34 -0700)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 942d66c85ee8f007ea5f1097d097cf9a44b662a0:

  doc: update about size (2022-12-01 11:12:35 -0500)

----------------------------------------------------------------
Ankit Kumar (1):
      doc: update about size

 HOWTO.rst | 7 +++++--
 fio.1     | 7 +++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

---

Diff of recent changes:

diff --git a/HOWTO.rst b/HOWTO.rst
index 2ea84558..0aaf033a 100644
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -1875,8 +1875,11 @@ I/O size
 .. option:: size=int
 
 	The total size of file I/O for each thread of this job. Fio will run until
-	this many bytes has been transferred, unless runtime is limited by other options
-	(such as :option:`runtime`, for instance, or increased/decreased by :option:`io_size`).
+	this many bytes has been transferred, unless runtime is altered by other means
+	such as (1) :option:`runtime`, (2) :option:`io_size` (3) :option:`number_ios`,
+	(4) gaps/holes while doing I/O's such as ``rw=read:16K``, or (5) sequential
+	I/O reaching end of the file which is possible when :option:`percentage_random`
+	is less than 100.
 	Fio will divide this size between the available files determined by options
 	such as :option:`nrfiles`, :option:`filename`, unless :option:`filesize` is
 	specified by the job. If the result of division happens to be 0, the size is
diff --git a/fio.1 b/fio.1
index 746c4472..62af0bd2 100644
--- a/fio.1
+++ b/fio.1
@@ -1676,8 +1676,11 @@ simulate a smaller amount of memory. The amount specified is per worker.
 .TP
 .BI size \fR=\fPint[%|z]
 The total size of file I/O for each thread of this job. Fio will run until
-this many bytes has been transferred, unless runtime is limited by other options
-(such as \fBruntime\fR, for instance, or increased/decreased by \fBio_size\fR).
+this many bytes has been transferred, unless runtime is altered by other means
+such as (1) \fBruntime\fR, (2) \fBio_size\fR, (3) \fBnumber_ios\fR, (4)
+gaps/holes while doing I/O's such as `rw=read:16K', or (5) sequential I/O
+reaching end of the file which is possible when \fBpercentage_random\fR is
+less than 100.
 Fio will divide this size between the available files determined by options
 such as \fBnrfiles\fR, \fBfilename\fR, unless \fBfilesize\fR is
 specified by the job. If the result of division happens to be 0, the size is

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-12-01 13:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-12-01 13:00 UTC (permalink / raw)
  To: fio

The following changes since commit 967c5441fa3d3932ec50ea5623411cc6e8589463:

  docs: description for experimental_verify (2022-11-29 17:09:41 -0500)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 6d8fe6e847bb43cf7db5eee4cf58fd490f12be47:

  backend: respect return value of init_io_u_buffers (2022-11-30 19:58:34 -0700)

----------------------------------------------------------------
Shin'ichiro Kawasaki (1):
      backend: respect return value of init_io_u_buffers

 backend.c  | 3 ++-
 blktrace.c | 3 ++-
 iolog.c    | 3 ++-
 3 files changed, 6 insertions(+), 3 deletions(-)

---

Diff of recent changes:

diff --git a/backend.c b/backend.c
index ba954a6b..928e524a 100644
--- a/backend.c
+++ b/backend.c
@@ -1301,7 +1301,8 @@ static int init_io_u(struct thread_data *td)
 		}
 	}
 
-	init_io_u_buffers(td);
+	if (init_io_u_buffers(td))
+		return 1;
 
 	if (init_file_completion_logging(td, max_units))
 		return 1;
diff --git a/blktrace.c b/blktrace.c
index 00e5f9a9..d5c8aee7 100644
--- a/blktrace.c
+++ b/blktrace.c
@@ -545,7 +545,8 @@ bool read_blktrace(struct thread_data* td)
 			td->o.max_bs[DDIR_TRIM] = max(td->o.max_bs[DDIR_TRIM], rw_bs[DDIR_TRIM]);
 			io_u_quiesce(td);
 			free_io_mem(td);
-			init_io_u_buffers(td);
+			if (init_io_u_buffers(td))
+				return false;
 		}
 		return true;
 	}
diff --git a/iolog.c b/iolog.c
index aa9c3bb1..62f2f524 100644
--- a/iolog.c
+++ b/iolog.c
@@ -620,7 +620,8 @@ static bool read_iolog(struct thread_data *td)
 		{
 			io_u_quiesce(td);
 			free_io_mem(td);
-			init_io_u_buffers(td);
+			if (init_io_u_buffers(td))
+				return false;
 		}
 		return true;
 	}

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-11-30 13:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-11-30 13:00 UTC (permalink / raw)
  To: fio

The following changes since commit 01fd7497328f55622ec989a8edb015f2cccb94eb:

  Merge branch 'lintian-manpage-fixes' of https://github.com/hoexter/fio (2022-11-28 12:54:53 -0700)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 967c5441fa3d3932ec50ea5623411cc6e8589463:

  docs: description for experimental_verify (2022-11-29 17:09:41 -0500)

----------------------------------------------------------------
Vincent Fu (2):
      docs: synchronize fio.1 and HOWTO changes
      docs: description for experimental_verify

 HOWTO.rst | 11 +++++++----
 fio.1     |  6 ++++--
 2 files changed, 11 insertions(+), 6 deletions(-)

---

Diff of recent changes:

diff --git a/HOWTO.rst b/HOWTO.rst
index 4419ee1b..2ea84558 100644
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -1054,7 +1054,7 @@ Target file/device
 
 	When running a random write test across an entire drive many more
 	zones will be open than in a typical application workload. Hence this
-	command line option that allows to limit the number of open zones. The
+	command line option that allows one to limit the number of open zones. The
 	number of open zones is defined as the number of zones to which write
 	commands are issued.
 
@@ -1446,7 +1446,7 @@ I/O type
 	supplied as a value between 0 and 100.
 
 	The second, optional float is allowed for **pareto**, **zipf** and **normal** distributions.
-	It allows to set base of distribution in non-default place, giving more control
+	It allows one to set base of distribution in non-default place, giving more control
 	over most probable outcome. This value is in range [0-1] which maps linearly to
 	range of possible random values.
 	Defaults are: random for **pareto** and **zipf**, and 0.5 for **normal**.
@@ -3612,7 +3612,10 @@ Verification
 
 .. option:: experimental_verify=bool
 
-	Enable experimental verification.
+        Enable experimental verification. Standard verify records I/O metadata
+        for later use during the verification phase. Experimental verify
+        instead resets the file after the write phase and then replays I/Os for
+        the verification phase.
 
 Steady state
 ~~~~~~~~~~~~
@@ -4503,7 +4506,7 @@ Trace file format v2
 ~~~~~~~~~~~~~~~~~~~~
 
 The second version of the trace file format was added in fio version 1.17.  It
-allows to access more than one file per trace and has a bigger set of possible
+allows one to access more than one file per trace and has a bigger set of possible
 file actions.
 
 The first line of the trace file has to be::
diff --git a/fio.1 b/fio.1
index a28ec032..746c4472 100644
--- a/fio.1
+++ b/fio.1
@@ -3324,7 +3324,9 @@ Verify that trim/discarded blocks are returned as zeros.
 Trim this number of I/O blocks.
 .TP
 .BI experimental_verify \fR=\fPbool
-Enable experimental verification.
+Enable experimental verification. Standard verify records I/O metadata for
+later use during the verification phase. Experimental verify instead resets the
+file after the write phase and then replays I/Os for the verification phase.
 .SS "Steady state"
 .TP
 .BI steadystate \fR=\fPstr:float "\fR,\fP ss" \fR=\fPstr:float
@@ -4213,7 +4215,7 @@ This format is not supported in fio versions >= 1.20\-rc3.
 .TP
 .B Trace file format v2
 The second version of the trace file format was added in fio version 1.17. It
-allows one to access more then one file per trace and has a bigger set of possible
+allows one to access more than one file per trace and has a bigger set of possible
 file actions.
 .RS
 .P

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-11-29 13:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-11-29 13:00 UTC (permalink / raw)
  To: fio

The following changes since commit 72044c66ac7055a98c9b3021c298c81849e3c990:

  doc: update about sqthread_poll (2022-11-23 14:06:03 -0500)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 01fd7497328f55622ec989a8edb015f2cccb94eb:

  Merge branch 'lintian-manpage-fixes' of https://github.com/hoexter/fio (2022-11-28 12:54:53 -0700)

----------------------------------------------------------------
Jens Axboe (1):
      Merge branch 'lintian-manpage-fixes' of https://github.com/hoexter/fio

Sven Hoexter (2):
      Spelling: Fix allows to -> allows one to in man 1 fio
      Use correct backslash escape in man 1 fio

 fio.1 | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

---

Diff of recent changes:

diff --git a/fio.1 b/fio.1
index a156bf5d..a28ec032 100644
--- a/fio.1
+++ b/fio.1
@@ -569,7 +569,7 @@ by this option will be \fBsize\fR divided by number of files unless an
 explicit size is specified by \fBfilesize\fR.
 .RS
 .P
-Each colon in the wanted path must be escaped with a '\\'
+Each colon in the wanted path must be escaped with a '\e'
 character. For instance, if the path is `/dev/dsk/foo@3,0:c' then you
 would use `filename=/dev/dsk/foo@3,0\\:c' and if the path is
 `F:\\filename' then you would use `filename=F\\:\\filename'.
@@ -830,7 +830,7 @@ so. Default: false.
 .BI max_open_zones \fR=\fPint
 When running a random write test across an entire drive many more zones will be
 open than in a typical application workload. Hence this command line option
-that allows to limit the number of open zones. The number of open zones is
+that allows one to limit the number of open zones. The number of open zones is
 defined as the number of zones to which write commands are issued by all
 threads/processes.
 .TP
@@ -1224,7 +1224,7 @@ map. For the \fBnormal\fR distribution, a normal (Gaussian) deviation is
 supplied as a value between 0 and 100.
 .P
 The second, optional float is allowed for \fBpareto\fR, \fBzipf\fR and \fBnormal\fR
-distributions. It allows to set base of distribution in non-default place, giving
+distributions. It allows one to set base of distribution in non-default place, giving
 more control over most probable outcome. This value is in range [0-1] which maps linearly to
 range of possible random values.
 Defaults are: random for \fBpareto\fR and \fBzipf\fR, and 0.5 for \fBnormal\fR.
@@ -4213,7 +4213,7 @@ This format is not supported in fio versions >= 1.20\-rc3.
 .TP
 .B Trace file format v2
 The second version of the trace file format was added in fio version 1.17. It
-allows to access more then one file per trace and has a bigger set of possible
+allows one to access more then one file per trace and has a bigger set of possible
 file actions.
 .RS
 .P

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-11-24 13:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-11-24 13:00 UTC (permalink / raw)
  To: fio

The following changes since commit ede04c27b618842e32b2a3349672f6b59a1697e1:

  test: add large pattern test (2022-11-18 19:36:10 -0500)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 72044c66ac7055a98c9b3021c298c81849e3c990:

  doc: update about sqthread_poll (2022-11-23 14:06:03 -0500)

----------------------------------------------------------------
Ankit Kumar (2):
      engines:io_uring: fix clat calculation for sqthread poll
      doc: update about sqthread_poll

Jens Axboe (1):
      Merge branch 'patch-1' of https://github.com/chienfuchen32/fio

chienfuchen32 (1):
      update documentation typo

 HOWTO.rst          |  6 ++++--
 engines/io_uring.c | 20 ++++++++++++++++++++
 fio.1              |  4 +++-
 3 files changed, 27 insertions(+), 3 deletions(-)

---

Diff of recent changes:

diff --git a/HOWTO.rst b/HOWTO.rst
index e796f961..4419ee1b 100644
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -2299,7 +2299,9 @@ with the caveat that when used on the command line, they must come after the
 	kernel of available items in the SQ ring. If this option is set, the
 	act of submitting IO will be done by a polling thread in the kernel.
 	This frees up cycles for fio, at the cost of using more CPU in the
-	system.
+	system. As submission is just the time it takes to fill in the sqe
+	entries and any syscall required to wake up the idle kernel thread,
+	fio will not report submission latencies.
 
 .. option:: sqthread_poll_cpu=int : [io_uring] [io_uring_cmd]
 
@@ -4501,7 +4503,7 @@ Trace file format v2
 ~~~~~~~~~~~~~~~~~~~~
 
 The second version of the trace file format was added in fio version 1.17.  It
-allows to access more then one file per trace and has a bigger set of possible
+allows to access more than one file per trace and has a bigger set of possible
 file actions.
 
 The first line of the trace file has to be::
diff --git a/engines/io_uring.c b/engines/io_uring.c
index 3c656b77..a9abd11d 100644
--- a/engines/io_uring.c
+++ b/engines/io_uring.c
@@ -637,12 +637,16 @@ static int fio_ioring_commit(struct thread_data *td)
 	 */
 	if (o->sqpoll_thread) {
 		struct io_sq_ring *ring = &ld->sq_ring;
+		unsigned start = *ld->sq_ring.head;
 		unsigned flags;
 
 		flags = atomic_load_acquire(ring->flags);
 		if (flags & IORING_SQ_NEED_WAKEUP)
 			io_uring_enter(ld, ld->queued, 0,
 					IORING_ENTER_SQ_WAKEUP);
+		fio_ioring_queued(td, start, ld->queued);
+		io_u_mark_submit(td, ld->queued);
+
 		ld->queued = 0;
 		return 0;
 	}
@@ -804,6 +808,14 @@ static int fio_ioring_queue_init(struct thread_data *td)
 			p.flags |= IORING_SETUP_SQ_AFF;
 			p.sq_thread_cpu = o->sqpoll_cpu;
 		}
+
+		/*
+		 * Submission latency for sqpoll_thread is just the time it
+		 * takes to fill in the SQ ring entries, and any syscall if
+		 * IORING_SQ_NEED_WAKEUP is set, we don't need to log that time
+		 * separately.
+		 */
+		td->o.disable_slat = 1;
 	}
 
 	/*
@@ -876,6 +888,14 @@ static int fio_ioring_cmd_queue_init(struct thread_data *td)
 			p.flags |= IORING_SETUP_SQ_AFF;
 			p.sq_thread_cpu = o->sqpoll_cpu;
 		}
+
+		/*
+		 * Submission latency for sqpoll_thread is just the time it
+		 * takes to fill in the SQ ring entries, and any syscall if
+		 * IORING_SQ_NEED_WAKEUP is set, we don't need to log that time
+		 * separately.
+		 */
+		td->o.disable_slat = 1;
 	}
 	if (o->cmd_type == FIO_URING_CMD_NVME) {
 		p.flags |= IORING_SETUP_SQE128;
diff --git a/fio.1 b/fio.1
index 9e33c9e1..a156bf5d 100644
--- a/fio.1
+++ b/fio.1
@@ -2090,7 +2090,9 @@ sqthread_poll option.
 Normally fio will submit IO by issuing a system call to notify the kernel of
 available items in the SQ ring. If this option is set, the act of submitting IO
 will be done by a polling thread in the kernel. This frees up cycles for fio, at
-the cost of using more CPU in the system.
+the cost of using more CPU in the system. As submission is just the time it
+takes to fill in the sqe entries and any syscall required to wake up the idle
+kernel thread, fio will not report submission latencies.
 .TP
 .BI (io_uring,io_uring_cmd)sqthread_poll_cpu \fR=\fPint
 When `sqthread_poll` is set, this option provides a way to define which CPU

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-11-19 13:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-11-19 13:00 UTC (permalink / raw)
  To: fio

The following changes since commit 07c8fe21021681f86fbfd3c3d63b88a5ebd4e557:

  Merge branch 'master' of https://github.com/bvanassche/fio (2022-11-14 08:47:00 -0500)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to ede04c27b618842e32b2a3349672f6b59a1697e1:

  test: add large pattern test (2022-11-18 19:36:10 -0500)

----------------------------------------------------------------
Logan Gunthorpe (6):
      cconv: Support pattern buffers of arbitrary size
      lib/pattern: Support NULL output buffer in parse_and_fill_pattern()
      lib/pattern: Support short repeated read calls when loading from file
      options: Support arbitrarily long pattern buffers
      lib/pattern: Support binary pattern buffers on windows
      test: add large pattern test

Shin'ichiro Kawasaki (13):
      oslib: blkzoned: add blkzoned_finish_zone() helper function
      engines/libzbc: add libzbc_finish_zone() helper function
      zbd: add zbd_zone_remainder() helper function
      zbd: finish zones with remainder smaller than minimum write block size
      zbd: allow block size not divisor of zone size
      zbd, verify: verify before zone reset for zone_reset_threshold/frequency
      zbd: fix zone reset condition for verify
      zbd: prevent experimental verify with zonemode=zbd
      t/zbd: fix test case #33 for block size unaligned to zone size
      t/zbd: modify test case #34 for block size unaligned to zone size
      t/zbd: add test case to check zone_reset_threshold/frequency with verify
      t/zbd: remove experimental_verify option from test case #54
      t/zbd: add test case to check experimental_verify option

 cconv.c                |  86 +++++++++++++++++-------
 client.c               |  17 +++--
 engines/libzbc.c       |  34 ++++++++++
 gclient.c              |  12 +++-
 ioengines.h            |   2 +
 lib/pattern.c          | 100 +++++++++++++++++++++++-----
 lib/pattern.h          |  21 ++++--
 options.c              |  10 +--
 oslib/blkzoned.h       |   8 +++
 oslib/linux-blkzoned.c |  37 +++++++++++
 server.c               |  23 ++++---
 server.h               |   2 +-
 stat.h                 |   1 -
 t/jobs/t0027.fio       |  14 ++++
 t/run-fio-tests.py     |  29 ++++++++
 t/zbd/test-zbd-support |  60 +++++++++++++----
 thread_options.h       |  15 +++--
 verify.c               |   6 +-
 zbd.c                  | 175 +++++++++++++++++++++++++++++++++----------------
 zbd.h                  |   2 -
 20 files changed, 507 insertions(+), 147 deletions(-)
 create mode 100644 t/jobs/t0027.fio

---

Diff of recent changes:

diff --git a/cconv.c b/cconv.c
index 6c36afb7..d755844f 100644
--- a/cconv.c
+++ b/cconv.c
@@ -48,14 +48,24 @@ static void free_thread_options_to_cpu(struct thread_options *o)
 	free(o->profile);
 	free(o->cgroup);
 
+	free(o->verify_pattern);
+	free(o->buffer_pattern);
+
 	for (i = 0; i < DDIR_RWDIR_CNT; i++) {
 		free(o->bssplit[i]);
 		free(o->zone_split[i]);
 	}
 }
 
-void convert_thread_options_to_cpu(struct thread_options *o,
-				   struct thread_options_pack *top)
+size_t thread_options_pack_size(struct thread_options *o)
+{
+	return sizeof(struct thread_options_pack) + o->verify_pattern_bytes +
+		o->buffer_pattern_bytes;
+}
+
+int convert_thread_options_to_cpu(struct thread_options *o,
+				  struct thread_options_pack *top,
+				  size_t top_sz)
 {
 	int i, j;
 
@@ -171,10 +181,21 @@ void convert_thread_options_to_cpu(struct thread_options *o,
 	o->verify_interval = le32_to_cpu(top->verify_interval);
 	o->verify_offset = le32_to_cpu(top->verify_offset);
 
-	memcpy(o->verify_pattern, top->verify_pattern, MAX_PATTERN_SIZE);
-	memcpy(o->buffer_pattern, top->buffer_pattern, MAX_PATTERN_SIZE);
-
 	o->verify_pattern_bytes = le32_to_cpu(top->verify_pattern_bytes);
+	o->buffer_pattern_bytes = le32_to_cpu(top->buffer_pattern_bytes);
+	if (o->verify_pattern_bytes >= MAX_PATTERN_SIZE ||
+	    o->buffer_pattern_bytes >= MAX_PATTERN_SIZE ||
+	    thread_options_pack_size(o) > top_sz)
+		return -EINVAL;
+
+	o->verify_pattern = realloc(o->verify_pattern,
+				    o->verify_pattern_bytes);
+	o->buffer_pattern = realloc(o->buffer_pattern,
+				    o->buffer_pattern_bytes);
+	memcpy(o->verify_pattern, top->patterns, o->verify_pattern_bytes);
+	memcpy(o->buffer_pattern, &top->patterns[o->verify_pattern_bytes],
+	       o->buffer_pattern_bytes);
+
 	o->verify_fatal = le32_to_cpu(top->verify_fatal);
 	o->verify_dump = le32_to_cpu(top->verify_dump);
 	o->verify_async = le32_to_cpu(top->verify_async);
@@ -268,7 +289,6 @@ void convert_thread_options_to_cpu(struct thread_options *o,
 	o->zero_buffers = le32_to_cpu(top->zero_buffers);
 	o->refill_buffers = le32_to_cpu(top->refill_buffers);
 	o->scramble_buffers = le32_to_cpu(top->scramble_buffers);
-	o->buffer_pattern_bytes = le32_to_cpu(top->buffer_pattern_bytes);
 	o->time_based = le32_to_cpu(top->time_based);
 	o->disable_lat = le32_to_cpu(top->disable_lat);
 	o->disable_clat = le32_to_cpu(top->disable_clat);
@@ -334,6 +354,8 @@ void convert_thread_options_to_cpu(struct thread_options *o,
 	uint8_t verify_cpumask[FIO_TOP_STR_MAX];
 	uint8_t log_gz_cpumask[FIO_TOP_STR_MAX];
 #endif
+
+	return 0;
 }
 
 void convert_thread_options_to_net(struct thread_options_pack *top,
@@ -572,8 +594,9 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
 		top->max_latency[i] = __cpu_to_le64(o->max_latency[i]);
 	}
 
-	memcpy(top->verify_pattern, o->verify_pattern, MAX_PATTERN_SIZE);
-	memcpy(top->buffer_pattern, o->buffer_pattern, MAX_PATTERN_SIZE);
+	memcpy(top->patterns, o->verify_pattern, o->verify_pattern_bytes);
+	memcpy(&top->patterns[o->verify_pattern_bytes], o->buffer_pattern,
+	       o->buffer_pattern_bytes);
 
 	top->size = __cpu_to_le64(o->size);
 	top->io_size = __cpu_to_le64(o->io_size);
@@ -620,7 +643,6 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
 	uint8_t verify_cpumask[FIO_TOP_STR_MAX];
 	uint8_t log_gz_cpumask[FIO_TOP_STR_MAX];
 #endif
-
 }
 
 /*
@@ -630,18 +652,36 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
  */
 int fio_test_cconv(struct thread_options *__o)
 {
-	struct thread_options o;
-	struct thread_options_pack top1, top2;
-
-	memset(&top1, 0, sizeof(top1));
-	memset(&top2, 0, sizeof(top2));
-
-	convert_thread_options_to_net(&top1, __o);
-	memset(&o, 0, sizeof(o));
-	convert_thread_options_to_cpu(&o, &top1);
-	convert_thread_options_to_net(&top2, &o);
-
-	free_thread_options_to_cpu(&o);
-
-	return memcmp(&top1, &top2, sizeof(top1));
+	struct thread_options o1 = *__o, o2;
+	struct thread_options_pack *top1, *top2;
+	size_t top_sz;
+	int ret;
+
+	o1.verify_pattern_bytes = 61;
+	o1.verify_pattern = malloc(o1.verify_pattern_bytes);
+	memset(o1.verify_pattern, 'V', o1.verify_pattern_bytes);
+	o1.buffer_pattern_bytes = 15;
+	o1.buffer_pattern = malloc(o1.buffer_pattern_bytes);
+	memset(o1.buffer_pattern, 'B', o1.buffer_pattern_bytes);
+
+	top_sz = thread_options_pack_size(&o1);
+	top1 = calloc(1, top_sz);
+	top2 = calloc(1, top_sz);
+
+	convert_thread_options_to_net(top1, &o1);
+	memset(&o2, 0, sizeof(o2));
+	ret = convert_thread_options_to_cpu(&o2, top1, top_sz);
+	if (ret)
+		goto out;
+
+	convert_thread_options_to_net(top2, &o2);
+	ret = memcmp(top1, top2, top_sz);
+
+out:
+	free_thread_options_to_cpu(&o2);
+	free(top2);
+	free(top1);
+	free(o1.buffer_pattern);
+	free(o1.verify_pattern);
+	return ret;
 }
diff --git a/client.c b/client.c
index 37da74bc..51496c77 100644
--- a/client.c
+++ b/client.c
@@ -922,13 +922,20 @@ int fio_clients_send_ini(const char *filename)
 int fio_client_update_options(struct fio_client *client,
 			      struct thread_options *o, uint64_t *tag)
 {
-	struct cmd_add_job_pdu pdu;
+	size_t cmd_sz = offsetof(struct cmd_add_job_pdu, top) +
+		thread_options_pack_size(o);
+	struct cmd_add_job_pdu *pdu;
+	int ret;
 
-	pdu.thread_number = cpu_to_le32(client->thread_number);
-	pdu.groupid = cpu_to_le32(client->groupid);
-	convert_thread_options_to_net(&pdu.top, o);
+	pdu = malloc(cmd_sz);
+	pdu->thread_number = cpu_to_le32(client->thread_number);
+	pdu->groupid = cpu_to_le32(client->groupid);
+	convert_thread_options_to_net(&pdu->top, o);
 
-	return fio_net_send_cmd(client->fd, FIO_NET_CMD_UPDATE_JOB, &pdu, sizeof(pdu), tag, &client->cmd_list);
+	ret = fio_net_send_cmd(client->fd, FIO_NET_CMD_UPDATE_JOB, pdu,
+			       cmd_sz, tag, &client->cmd_list);
+	free(pdu);
+	return ret;
 }
 
 static void convert_io_stat(struct io_stat *dst, struct io_stat *src)
diff --git a/engines/libzbc.c b/engines/libzbc.c
index 2bc2c7e0..2b63ef1a 100644
--- a/engines/libzbc.c
+++ b/engines/libzbc.c
@@ -332,6 +332,39 @@ err:
 	return -ret;
 }
 
+static int libzbc_finish_zone(struct thread_data *td, struct fio_file *f,
+			      uint64_t offset, uint64_t length)
+{
+	struct libzbc_data *ld = td->io_ops_data;
+	uint64_t sector = offset >> 9;
+	unsigned int nr_zones;
+	struct zbc_errno err;
+	int i, ret;
+
+	assert(ld);
+	assert(ld->zdev);
+
+	nr_zones = (length + td->o.zone_size - 1) / td->o.zone_size;
+	assert(nr_zones > 0);
+
+	for (i = 0; i < nr_zones; i++, sector += td->o.zone_size >> 9) {
+		ret = zbc_finish_zone(ld->zdev, sector, 0);
+		if (ret)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	zbc_errno(ld->zdev, &err);
+	td_verror(td, errno, "zbc_finish_zone failed");
+	if (err.sk)
+		log_err("%s: finish zone failed %s:%s\n",
+			f->file_name,
+			zbc_sk_str(err.sk), zbc_asc_ascq_str(err.asc_ascq));
+	return -ret;
+}
+
 static int libzbc_get_max_open_zones(struct thread_data *td, struct fio_file *f,
 				     unsigned int *max_open_zones)
 {
@@ -434,6 +467,7 @@ FIO_STATIC struct ioengine_ops ioengine = {
 	.report_zones		= libzbc_report_zones,
 	.reset_wp		= libzbc_reset_wp,
 	.get_max_open_zones	= libzbc_get_max_open_zones,
+	.finish_zone		= libzbc_finish_zone,
 	.queue			= libzbc_queue,
 	.flags			= FIO_SYNCIO | FIO_NOEXTEND | FIO_RAWIO,
 };
diff --git a/gclient.c b/gclient.c
index c59bcfe2..73f64b3b 100644
--- a/gclient.c
+++ b/gclient.c
@@ -553,12 +553,15 @@ static void gfio_quit_op(struct fio_client *client, struct fio_net_cmd *cmd)
 }
 
 static struct thread_options *gfio_client_add_job(struct gfio_client *gc,
-			struct thread_options_pack *top)
+			struct thread_options_pack *top, size_t top_sz)
 {
 	struct gfio_client_options *gco;
 
 	gco = calloc(1, sizeof(*gco));
-	convert_thread_options_to_cpu(&gco->o, top);
+	if (convert_thread_options_to_cpu(&gco->o, top, top_sz)) {
+		dprint(FD_NET, "client: failed parsing add_job command\n");
+		return NULL;
+	}
 	INIT_FLIST_HEAD(&gco->list);
 	flist_add_tail(&gco->list, &gc->o_list);
 	gc->o_list_nr = 1;
@@ -577,7 +580,10 @@ static void gfio_add_job_op(struct fio_client *client, struct fio_net_cmd *cmd)
 
 	p->thread_number = le32_to_cpu(p->thread_number);
 	p->groupid = le32_to_cpu(p->groupid);
-	o = gfio_client_add_job(gc, &p->top);
+	o = gfio_client_add_job(gc, &p->top,
+			cmd->pdu_len - offsetof(struct cmd_add_job_pdu, top));
+	if (o == NULL)
+		return;
 
 	gdk_threads_enter();
 
diff --git a/ioengines.h b/ioengines.h
index fafa1e48..11d2115c 100644
--- a/ioengines.h
+++ b/ioengines.h
@@ -61,6 +61,8 @@ struct ioengine_ops {
 			uint64_t, uint64_t);
 	int (*get_max_open_zones)(struct thread_data *, struct fio_file *,
 				  unsigned int *);
+	int (*finish_zone)(struct thread_data *, struct fio_file *,
+			   uint64_t, uint64_t);
 	int option_struct_size;
 	struct fio_option *options;
 };
diff --git a/lib/pattern.c b/lib/pattern.c
index d8203630..9be29af6 100644
--- a/lib/pattern.c
+++ b/lib/pattern.c
@@ -32,7 +32,7 @@ static const char *parse_file(const char *beg, char *out,
 	const char *end;
 	char *file;
 	int fd;
-	ssize_t count;
+	ssize_t rc, count = 0;
 
 	if (!out_len)
 		goto err_out;
@@ -47,13 +47,32 @@ static const char *parse_file(const char *beg, char *out,
 	if (file == NULL)
 		goto err_out;
 
+#ifdef _WIN32
+	fd = open(file, O_RDONLY | O_BINARY);
+#else
 	fd = open(file, O_RDONLY);
+#endif
 	if (fd < 0)
 		goto err_free_out;
 
-	count = read(fd, out, out_len);
-	if (count == -1)
-		goto err_free_close_out;
+	if (out) {
+		while (1) {
+			rc = read(fd, out, out_len - count);
+			if (rc == 0)
+				break;
+			if (rc == -1)
+				goto err_free_close_out;
+
+			count += rc;
+			out += rc;
+		}
+	} else {
+		count = lseek(fd, 0, SEEK_END);
+		if (count == -1)
+			goto err_free_close_out;
+		if (count >= out_len)
+			count = out_len;
+	}
 
 	*filled = count;
 	close(fd);
@@ -100,7 +119,8 @@ static const char *parse_string(const char *beg, char *out,
 	if (end - beg > out_len)
 		return NULL;
 
-	memcpy(out, beg, end - beg);
+	if (out)
+		memcpy(out, beg, end - beg);
 	*filled = end - beg;
 
 	/* Catch up quote */
@@ -156,12 +176,14 @@ static const char *parse_number(const char *beg, char *out,
 		i = 0;
 		if (!lval) {
 			num    = 0;
-			out[i] = 0x00;
+			if (out)
+				out[i] = 0x00;
 			i      = 1;
 		} else {
 			val = (unsigned int)lval;
 			for (; val && out_len; out_len--, i++, val >>= 8)
-				out[i] = val & 0xff;
+				if (out)
+					out[i] = val & 0xff;
 			if (val)
 				return NULL;
 		}
@@ -183,7 +205,8 @@ static const char *parse_number(const char *beg, char *out,
 			const char *fmt;
 
 			fmt = (num & 1 ? "%1hhx" : "%2hhx");
-			sscanf(beg, fmt, &out[i]);
+			if (out)
+				sscanf(beg, fmt, &out[i]);
 			if (num & 1) {
 				num++;
 				beg--;
@@ -251,7 +274,8 @@ static const char *parse_format(const char *in, char *out, unsigned int parsed,
 	if (f->desc->len > out_len)
 		return NULL;
 
-	memset(out, '\0', f->desc->len);
+	if (out)
+		memset(out, '\0', f->desc->len);
 	*filled = f->desc->len;
 
 	return in + len;
@@ -262,7 +286,9 @@ static const char *parse_format(const char *in, char *out, unsigned int parsed,
  *                            numbers and pattern formats.
  * @in - string input
  * @in_len - size of the input string
- * @out - output buffer where parsed result will be put
+ * @out - output buffer where parsed result will be put, may be NULL
+ *	  in which case this function just calculates the required
+ *	  length of the buffer
  * @out_len - lengths of the output buffer
  * @fmt_desc - array of pattern format descriptors [input]
  * @fmt - array of pattern formats [output]
@@ -305,16 +331,16 @@ static const char *parse_format(const char *in, char *out, unsigned int parsed,
  *
  * Returns number of bytes filled or err < 0 in case of failure.
  */
-int parse_and_fill_pattern(const char *in, unsigned int in_len,
-			   char *out, unsigned int out_len,
-			   const struct pattern_fmt_desc *fmt_desc,
-			   struct pattern_fmt *fmt,
-			   unsigned int *fmt_sz_out)
+static int parse_and_fill_pattern(const char *in, unsigned int in_len,
+				  char *out, unsigned int out_len,
+				  const struct pattern_fmt_desc *fmt_desc,
+				  struct pattern_fmt *fmt,
+				  unsigned int *fmt_sz_out)
 {
 	const char *beg, *end, *out_beg = out;
 	unsigned int total = 0, fmt_rem = 0;
 
-	if (!in || !in_len || !out || !out_len)
+	if (!in || !in_len || !out_len)
 		return -EINVAL;
 	if (fmt_sz_out)
 		fmt_rem = *fmt_sz_out;
@@ -370,6 +396,48 @@ int parse_and_fill_pattern(const char *in, unsigned int in_len,
 	return total;
 }
 
+/**
+ * parse_and_fill_pattern_alloc() - Parses combined input, which consists of
+ *				    strings, numbers and pattern formats and
+ *				    allocates a buffer for the result.
+ *
+ * @in - string input
+ * @in_len - size of the input string
+ * @out - pointer to the output buffer pointer, this will be set to the newly
+ *        allocated pattern buffer which must be freed by the caller
+ * @fmt_desc - array of pattern format descriptors [input]
+ * @fmt - array of pattern formats [output]
+ * @fmt_sz - pointer where the size of pattern formats array stored [input],
+ *           after successful parsing this pointer will contain the number
+ *           of parsed formats if any [output].
+ *
+ * See documentation on parse_and_fill_pattern() above for a description
+ * of the functionality.
+ *
+ * Returns number of bytes filled or err < 0 in case of failure.
+ */
+int parse_and_fill_pattern_alloc(const char *in, unsigned int in_len,
+		char **out, const struct pattern_fmt_desc *fmt_desc,
+		struct pattern_fmt *fmt, unsigned int *fmt_sz_out)
+{
+	int count;
+
+	count = parse_and_fill_pattern(in, in_len, NULL, MAX_PATTERN_SIZE,
+				       fmt_desc, fmt, fmt_sz_out);
+	if (count < 0)
+		return count;
+
+	*out = malloc(count);
+	count = parse_and_fill_pattern(in, in_len, *out, count, fmt_desc,
+				       fmt, fmt_sz_out);
+	if (count < 0) {
+		free(*out);
+		*out = NULL;
+	}
+
+	return count;
+}
+
 /**
  * dup_pattern() - Duplicates part of the pattern all over the buffer.
  *
diff --git a/lib/pattern.h b/lib/pattern.h
index a6d9d6b4..7123b42d 100644
--- a/lib/pattern.h
+++ b/lib/pattern.h
@@ -1,6 +1,19 @@
 #ifndef FIO_PARSE_PATTERN_H
 #define FIO_PARSE_PATTERN_H
 
+/*
+ * The pattern is dynamically allocated, but that doesn't mean there
+ * are not limits. The network protocol has a limit of
+ * FIO_SERVER_MAX_CMD_MB and potentially two patterns must fit in there.
+ * There's also a need to verify the incoming data from the network and
+ * this provides a sensible check.
+ *
+ * 128MiB is an arbitrary limit that meets these criteria. The patterns
+ * tend to be truncated at the IO size anyway and IO sizes that large
+ * aren't terribly practical.
+ */
+#define MAX_PATTERN_SIZE	(128 << 20)
+
 /**
  * Pattern format description. The input for 'parse_pattern'.
  * Describes format with its name and callback, which should
@@ -21,11 +34,9 @@ struct pattern_fmt {
 	const struct pattern_fmt_desc *desc;
 };
 
-int parse_and_fill_pattern(const char *in, unsigned int in_len,
-			   char *out, unsigned int out_len,
-			   const struct pattern_fmt_desc *fmt_desc,
-			   struct pattern_fmt *fmt,
-			   unsigned int *fmt_sz_out);
+int parse_and_fill_pattern_alloc(const char *in, unsigned int in_len,
+		char **out, const struct pattern_fmt_desc *fmt_desc,
+		struct pattern_fmt *fmt, unsigned int *fmt_sz_out);
 
 int paste_format_inplace(char *pattern, unsigned int pattern_len,
 			 struct pattern_fmt *fmt, unsigned int fmt_sz,
diff --git a/options.c b/options.c
index 9e4d8cd1..49612345 100644
--- a/options.c
+++ b/options.c
@@ -1488,8 +1488,8 @@ static int str_buffer_pattern_cb(void *data, const char *input)
 	int ret;
 
 	/* FIXME: for now buffer pattern does not support formats */
-	ret = parse_and_fill_pattern(input, strlen(input), td->o.buffer_pattern,
-				     MAX_PATTERN_SIZE, NULL, NULL, NULL);
+	ret = parse_and_fill_pattern_alloc(input, strlen(input),
+				&td->o.buffer_pattern, NULL, NULL, NULL);
 	if (ret < 0)
 		return 1;
 
@@ -1537,9 +1537,9 @@ static int str_verify_pattern_cb(void *data, const char *input)
 	int ret;
 
 	td->o.verify_fmt_sz = FIO_ARRAY_SIZE(td->o.verify_fmt);
-	ret = parse_and_fill_pattern(input, strlen(input), td->o.verify_pattern,
-				     MAX_PATTERN_SIZE, fmt_desc,
-				     td->o.verify_fmt, &td->o.verify_fmt_sz);
+	ret = parse_and_fill_pattern_alloc(input, strlen(input),
+			&td->o.verify_pattern, fmt_desc, td->o.verify_fmt,
+			&td->o.verify_fmt_sz);
 	if (ret < 0)
 		return 1;
 
diff --git a/oslib/blkzoned.h b/oslib/blkzoned.h
index 719b041d..29fb034f 100644
--- a/oslib/blkzoned.h
+++ b/oslib/blkzoned.h
@@ -18,6 +18,8 @@ extern int blkzoned_reset_wp(struct thread_data *td, struct fio_file *f,
 				uint64_t offset, uint64_t length);
 extern int blkzoned_get_max_open_zones(struct thread_data *td, struct fio_file *f,
 				       unsigned int *max_open_zones);
+extern int blkzoned_finish_zone(struct thread_data *td, struct fio_file *f,
+				uint64_t offset, uint64_t length);
 #else
 /*
  * Define stubs for systems that do not have zoned block device support.
@@ -51,6 +53,12 @@ static inline int blkzoned_get_max_open_zones(struct thread_data *td, struct fio
 {
 	return -EIO;
 }
+static inline int blkzoned_finish_zone(struct thread_data *td,
+				       struct fio_file *f,
+				       uint64_t offset, uint64_t length)
+{
+	return -EIO;
+}
 #endif
 
 #endif /* FIO_BLKZONED_H */
diff --git a/oslib/linux-blkzoned.c b/oslib/linux-blkzoned.c
index 185bd501..c3130d0e 100644
--- a/oslib/linux-blkzoned.c
+++ b/oslib/linux-blkzoned.c
@@ -308,3 +308,40 @@ int blkzoned_reset_wp(struct thread_data *td, struct fio_file *f,
 
 	return ret;
 }
+
+int blkzoned_finish_zone(struct thread_data *td, struct fio_file *f,
+			 uint64_t offset, uint64_t length)
+{
+#ifdef BLKFINISHZONE
+	struct blk_zone_range zr = {
+		.sector         = offset >> 9,
+		.nr_sectors     = length >> 9,
+	};
+	int fd, ret = 0;
+
+	/* If the file is not yet opened, open it for this function. */
+	fd = f->fd;
+	if (fd < 0) {
+		fd = open(f->file_name, O_RDWR | O_LARGEFILE);
+		if (fd < 0)
+			return -errno;
+	}
+
+	if (ioctl(fd, BLKFINISHZONE, &zr) < 0)
+		ret = -errno;
+
+	if (f->fd < 0)
+		close(fd);
+
+	return ret;
+#else
+	/*
+	 * Kernel versions older than 5.5 does not support BLKFINISHZONE. These
+	 * old kernels assumed zones are closed automatically at max_open_zones
+	 * limit. Also they did not support max_active_zones limit. Then there
+	 * was no need to finish zones to avoid errors caused by max_open_zones
+	 * or max_active_zones. For those old versions, just do nothing.
+	 */
+	return 0;
+#endif
+}
diff --git a/server.c b/server.c
index b869d387..a6347efd 100644
--- a/server.c
+++ b/server.c
@@ -1082,6 +1082,7 @@ static int handle_update_job_cmd(struct fio_net_cmd *cmd)
 	struct cmd_add_job_pdu *pdu = (struct cmd_add_job_pdu *) cmd->payload;
 	struct thread_data *td;
 	uint32_t tnumber;
+	int ret;
 
 	tnumber = le32_to_cpu(pdu->thread_number);
 
@@ -1093,8 +1094,9 @@ static int handle_update_job_cmd(struct fio_net_cmd *cmd)
 	}
 
 	td = tnumber_to_td(tnumber);
-	convert_thread_options_to_cpu(&td->o, &pdu->top);
-	send_update_job_reply(cmd->tag, 0);
+	ret = convert_thread_options_to_cpu(&td->o, &pdu->top,
+			cmd->pdu_len - offsetof(struct cmd_add_job_pdu, top));
+	send_update_job_reply(cmd->tag, ret);
 	return 0;
 }
 
@@ -2323,15 +2325,18 @@ int fio_send_iolog(struct thread_data *td, struct io_log *log, const char *name)
 
 void fio_server_send_add_job(struct thread_data *td)
 {
-	struct cmd_add_job_pdu pdu = {
-		.thread_number = cpu_to_le32(td->thread_number),
-		.groupid = cpu_to_le32(td->groupid),
-	};
+	struct cmd_add_job_pdu *pdu;
+	size_t cmd_sz = offsetof(struct cmd_add_job_pdu, top) +
+		thread_options_pack_size(&td->o);
 
-	convert_thread_options_to_net(&pdu.top, &td->o);
+	pdu = malloc(cmd_sz);
+	pdu->thread_number = cpu_to_le32(td->thread_number);
+	pdu->groupid = cpu_to_le32(td->groupid);
 
-	fio_net_queue_cmd(FIO_NET_CMD_ADD_JOB, &pdu, sizeof(pdu), NULL,
-				SK_F_COPY);
+	convert_thread_options_to_net(&pdu->top, &td->o);
+
+	fio_net_queue_cmd(FIO_NET_CMD_ADD_JOB, pdu, cmd_sz, NULL, SK_F_COPY);
+	free(pdu);
 }
 
 void fio_server_send_start(struct thread_data *td)
diff --git a/server.h b/server.h
index b0c5e2df..28133020 100644
--- a/server.h
+++ b/server.h
@@ -51,7 +51,7 @@ struct fio_net_cmd_reply {
 };
 
 enum {
-	FIO_SERVER_VER			= 97,
+	FIO_SERVER_VER			= 98,
 
 	FIO_SERVER_MAX_FRAGMENT_PDU	= 1024,
 	FIO_SERVER_MAX_CMD_MB		= 2048,
diff --git a/stat.h b/stat.h
index 4c3bf71f..8ceabc48 100644
--- a/stat.h
+++ b/stat.h
@@ -142,7 +142,6 @@ enum block_info_state {
 	BLOCK_STATE_COUNT,
 };
 
-#define MAX_PATTERN_SIZE	512
 #define FIO_JOBNAME_SIZE	128
 #define FIO_JOBDESC_SIZE	256
 #define FIO_VERROR_SIZE		128
diff --git a/t/jobs/t0027.fio b/t/jobs/t0027.fio
new file mode 100644
index 00000000..b5b97a30
--- /dev/null
+++ b/t/jobs/t0027.fio
@@ -0,0 +1,14 @@
+[global]
+filename=t0027file
+size=16k
+bs=16k
+
+[write_job]
+readwrite=write
+buffer_pattern='t0027.pattern'
+
+[read_job]
+stonewall=1
+readwrite=read
+verify=pattern
+verify_pattern='t0027.pattern'
diff --git a/t/run-fio-tests.py b/t/run-fio-tests.py
index e5b307ac..a06f8126 100755
--- a/t/run-fio-tests.py
+++ b/t/run-fio-tests.py
@@ -799,6 +799,26 @@ class FioJobTest_t0025(FioJobTest):
         if self.json_data['jobs'][0]['read']['io_kbytes'] != 128:
             self.passed = False
 
+class FioJobTest_t0027(FioJobTest):
+    def setup(self, *args, **kws):
+        super(FioJobTest_t0027, self).setup(*args, **kws)
+        self.pattern_file = os.path.join(self.test_dir, "t0027.pattern")
+        self.output_file = os.path.join(self.test_dir, "t0027file")
+        self.pattern = os.urandom(16 << 10)
+        with open(self.pattern_file, "wb") as f:
+            f.write(self.pattern)
+
+    def check_result(self):
+        super(FioJobTest_t0027, self).check_result()
+
+        if not self.passed:
+            return
+
+        with open(self.output_file, "rb") as f:
+            data = f.read()
+
+        if data != self.pattern:
+            self.passed = False
 
 class FioJobTest_iops_rate(FioJobTest):
     """Test consists of fio test job t0009
@@ -1214,6 +1234,15 @@ TEST_LIST = [
         'pre_success':      None,
         'requirements':     [Requirements.not_windows],
     },
+    {
+        'test_id':          27,
+        'test_class':       FioJobTest_t0027,
+        'job':              't0027.fio',
+        'success':          SUCCESS_DEFAULT,
+        'pre_job':          None,
+        'pre_success':      None,
+        'requirements':     [],
+    },
     {
         'test_id':          1000,
         'test_class':       FioExeTest,
diff --git a/t/zbd/test-zbd-support b/t/zbd/test-zbd-support
index cdc03f28..4091d9ac 100755
--- a/t/zbd/test-zbd-support
+++ b/t/zbd/test-zbd-support
@@ -813,7 +813,8 @@ test33() {
     local bs io_size size
     local off capacity=0;
 
-    prep_write
+    [ -n "$is_zbd" ] && reset_zone "$dev" -1
+
     off=$((first_sequential_zone_sector * 512))
     capacity=$(total_zone_capacity 1 $off $dev)
     size=$((2 * zone_size))
@@ -822,20 +823,30 @@ test33() {
     run_fio_on_seq "$(ioengine "psync")" --iodepth=1 --rw=write	\
 		   --size=$size --io_size=$io_size --bs=$bs	\
 		   >> "${logfile}.${test_number}" 2>&1 || return $?
-    check_written $(((io_size + bs - 1) / bs * bs)) || return $?
+    check_written $((io_size / bs * bs)) || return $?
 }
 
-# Write to sequential zones with a block size that is not a divisor of the
-# zone size and with data verification enabled.
+# Test repeated async write job with verify using two unaligned block sizes.
 test34() {
-    local size
+	local bs off zone_capacity
+	local -a block_sizes
 
-    prep_write
-    size=$((2 * zone_size))
-    run_fio_on_seq "$(ioengine "psync")" --iodepth=1 --rw=write --size=$size \
-		   --do_verify=1 --verify=md5 --bs=$((3 * zone_size / 4)) \
-		   >> "${logfile}.${test_number}" 2>&1 && return 1
-    grep -q 'not a divisor of' "${logfile}.${test_number}"
+	require_zbd || return $SKIP_TESTCASE
+	prep_write
+
+	off=$((first_sequential_zone_sector * 512))
+	zone_capacity=$(total_zone_capacity 1 $off $dev)
+	block_sizes=($((4096 * 7)) $(($(min ${zone_capacity} 4194304) - 4096)))
+
+	for bs in ${block_sizes[@]}; do
+		run_fio --name=job --filename="${dev}" --rw=randwrite \
+			--bs="${bs}" --offset="${off}" \
+			--size=$((4 * zone_size)) --iodepth=256 \
+			"$(ioengine "libaio")" --time_based=1 --runtime=15s \
+			--zonemode=zbd --direct=1 --zonesize="${zone_size}" \
+			--verify=crc32c --do_verify=1 ${job_var_opts[@]} \
+			>> "${logfile}.${test_number}" 2>&1 || return $?
+	done
 }
 
 # Test 1/4 for the I/O boundary rounding code: $size < $zone_size.
@@ -1171,7 +1182,6 @@ test54() {
 		--rw=randrw:2 --rwmixwrite=25 --bsrange=4k-${zone_size} \
 		--zonemode=zbd --zonesize=${zone_size} \
 		--verify=crc32c --do_verify=1 --verify_backlog=2 \
-		--experimental_verify=1 \
 		--alloc-size=65536 --random_generator=tausworthe64 \
 		${job_var_opts[@]} --debug=zbd \
 		>> "${logfile}.${test_number}" 2>&1 || return $?
@@ -1269,6 +1279,32 @@ test58() {
 	    >>"${logfile}.${test_number}" 2>&1
 }
 
+# Test zone_reset_threshold with verify.
+test59() {
+	local off bs loops=2 size=$((zone_size)) w
+	local -a workloads=(write randwrite rw randrw)
+
+	prep_write
+	off=$((first_sequential_zone_sector * 512))
+
+	bs=$(min $((256*1024)) "$zone_size")
+	for w in "${workloads[@]}"; do
+		run_fio_on_seq "$(ioengine "psync")" --rw=${w} --bs="$bs" \
+			       --size=$size --loops=$loops --do_verify=1 \
+			       --verify=md5 --zone_reset_frequency=.9 \
+			       --zone_reset_threshold=.1 \
+			       >> "${logfile}.${test_number}" 2>&1 || return $?
+	done
+}
+
+# Test fio errors out experimental_verify option with zonemode=zbd.
+test60() {
+	run_fio_on_seq "$(ioengine "psync")" --rw=write --size=$zone_size \
+		       --do_verify=1 --verify=md5 --experimental_verify=1 \
+		       >> "${logfile}.${test_number}" 2>&1 && return 1
+	grep -q 'not support experimental verify' "${logfile}.${test_number}"
+}
+
 SECONDS=0
 tests=()
 dynamic_analyzer=()
diff --git a/thread_options.h b/thread_options.h
index 634070af..74e7ea45 100644
--- a/thread_options.h
+++ b/thread_options.h
@@ -144,7 +144,7 @@ struct thread_options {
 	unsigned int do_verify;
 	unsigned int verify_interval;
 	unsigned int verify_offset;
-	char verify_pattern[MAX_PATTERN_SIZE];
+	char *verify_pattern;
 	unsigned int verify_pattern_bytes;
 	struct pattern_fmt verify_fmt[8];
 	unsigned int verify_fmt_sz;
@@ -256,7 +256,7 @@ struct thread_options {
 	unsigned int zero_buffers;
 	unsigned int refill_buffers;
 	unsigned int scramble_buffers;
-	char buffer_pattern[MAX_PATTERN_SIZE];
+	char *buffer_pattern;
 	unsigned int buffer_pattern_bytes;
 	unsigned int compress_percentage;
 	unsigned int compress_chunk;
@@ -464,7 +464,6 @@ struct thread_options_pack {
 	uint32_t do_verify;
 	uint32_t verify_interval;
 	uint32_t verify_offset;
-	uint8_t verify_pattern[MAX_PATTERN_SIZE];
 	uint32_t verify_pattern_bytes;
 	uint32_t verify_fatal;
 	uint32_t verify_dump;
@@ -572,7 +571,6 @@ struct thread_options_pack {
 	uint32_t zero_buffers;
 	uint32_t refill_buffers;
 	uint32_t scramble_buffers;
-	uint8_t buffer_pattern[MAX_PATTERN_SIZE];
 	uint32_t buffer_pattern_bytes;
 	uint32_t compress_percentage;
 	uint32_t compress_chunk;
@@ -699,9 +697,16 @@ struct thread_options_pack {
 
 	uint32_t log_entries;
 	uint32_t log_prio;
+
+	/*
+	 * verify_pattern followed by buffer_pattern from the unpacked struct
+	 */
+	uint8_t patterns[];
 } __attribute__((packed));
 
-extern void convert_thread_options_to_cpu(struct thread_options *o, struct thread_options_pack *top);
+extern int convert_thread_options_to_cpu(struct thread_options *o,
+		struct thread_options_pack *top, size_t top_sz);
+extern size_t thread_options_pack_size(struct thread_options *o);
 extern void convert_thread_options_to_net(struct thread_options_pack *top, struct thread_options *);
 extern int fio_test_cconv(struct thread_options *);
 extern void options_default_fill(struct thread_options *o);
diff --git a/verify.c b/verify.c
index d6a229ca..ddfadcc8 100644
--- a/verify.c
+++ b/verify.c
@@ -917,9 +917,11 @@ int verify_io_u(struct thread_data *td, struct io_u **io_u_ptr)
 		hdr = p;
 
 		/*
-		 * Make rand_seed check pass when have verify_backlog.
+		 * Make rand_seed check pass when have verify_backlog or
+		 * zone reset frequency for zonemode=zbd.
 		 */
-		if (!td_rw(td) || (td->flags & TD_F_VER_BACKLOG))
+		if (!td_rw(td) || (td->flags & TD_F_VER_BACKLOG) ||
+		    td->o.zrf.u.f)
 			io_u->rand_seed = hdr->rand_seed;
 
 		if (td->o.verify != VERIFY_PATTERN_NO_HDR) {
diff --git a/zbd.c b/zbd.c
index 627fb968..d1e469f6 100644
--- a/zbd.c
+++ b/zbd.c
@@ -70,6 +70,19 @@ static inline uint64_t zbd_zone_capacity_end(const struct fio_zone_info *z)
 	return z->start + z->capacity;
 }
 
+/**
+ * zbd_zone_remainder - Return the number of bytes that are still available for
+ *                      writing before the zone gets full
+ * @z: zone info pointer.
+ */
+static inline uint64_t zbd_zone_remainder(struct fio_zone_info *z)
+{
+	if (z->wp >= zbd_zone_capacity_end(z))
+		return 0;
+
+	return zbd_zone_capacity_end(z) - z->wp;
+}
+
 /**
  * zbd_zone_full - verify whether a minimum number of bytes remain in a zone
  * @f: file pointer.
@@ -83,8 +96,7 @@ static bool zbd_zone_full(const struct fio_file *f, struct fio_zone_info *z,
 {
 	assert((required & 511) == 0);
 
-	return z->has_wp &&
-		z->wp + required > zbd_zone_capacity_end(z);
+	return z->has_wp && required > zbd_zone_remainder(z);
 }
 
 static void zone_lock(struct thread_data *td, const struct fio_file *f,
@@ -279,7 +291,6 @@ static int zbd_reset_zone(struct thread_data *td, struct fio_file *f,
 	pthread_mutex_unlock(&f->zbd_info->mutex);
 
 	z->wp = z->start;
-	z->verify_block = 0;
 
 	td->ts.nr_zone_resets++;
 
@@ -322,6 +333,44 @@ static void zbd_close_zone(struct thread_data *td, const struct fio_file *f,
 	z->open = 0;
 }
 
+/**
+ * zbd_finish_zone - finish the specified zone
+ * @td: FIO thread data.
+ * @f: FIO file for which to finish a zone
+ * @z: Zone to finish.
+ *
+ * Finish the zone at @offset with open or close status.
+ */
+static int zbd_finish_zone(struct thread_data *td, struct fio_file *f,
+			   struct fio_zone_info *z)
+{
+	uint64_t offset = z->start;
+	uint64_t length = f->zbd_info->zone_size;
+	int ret = 0;
+
+	switch (f->zbd_info->model) {
+	case ZBD_HOST_AWARE:
+	case ZBD_HOST_MANAGED:
+		if (td->io_ops && td->io_ops->finish_zone)
+			ret = td->io_ops->finish_zone(td, f, offset, length);
+		else
+			ret = blkzoned_finish_zone(td, f, offset, length);
+		break;
+	default:
+		break;
+	}
+
+	if (ret < 0) {
+		td_verror(td, errno, "finish zone failed");
+		log_err("%s: finish zone at sector %"PRIu64" failed (%d).\n",
+			f->file_name, offset >> 9, errno);
+	} else {
+		z->wp = (z+1)->start;
+	}
+
+	return ret;
+}
+
 /**
  * zbd_reset_zones - Reset a range of zones.
  * @td: fio thread data.
@@ -440,7 +489,7 @@ static bool zbd_open_zone(struct thread_data *td, const struct fio_file *f,
 		 * already in-flight, handle it as a full zone instead of an
 		 * open zone.
 		 */
-		if (z->wp >= zbd_zone_capacity_end(z))
+		if (!zbd_zone_remainder(z))
 			res = false;
 		goto out;
 	}
@@ -602,7 +651,7 @@ static bool zbd_verify_bs(void)
 {
 	struct thread_data *td;
 	struct fio_file *f;
-	int i, j, k;
+	int i, j;
 
 	for_each_td(td, i) {
 		if (td_trim(td) &&
@@ -624,15 +673,6 @@ static bool zbd_verify_bs(void)
 					 zone_size);
 				return false;
 			}
-			for (k = 0; k < FIO_ARRAY_SIZE(td->o.bs); k++) {
-				if (td->o.verify != VERIFY_NONE &&
-				    zone_size % td->o.bs[k] != 0) {
-					log_info("%s: block size %llu is not a divisor of the zone size %"PRIu64"\n",
-						 f->file_name, td->o.bs[k],
-						 zone_size);
-					return false;
-				}
-			}
 		}
 	}
 	return true;
@@ -1044,6 +1084,11 @@ int zbd_setup_files(struct thread_data *td)
 	if (!zbd_verify_bs())
 		return 1;
 
+	if (td->o.experimental_verify) {
+		log_err("zonemode=zbd does not support experimental verify\n");
+		return 1;
+	}
+
 	for_each_file(td, f, i) {
 		struct zoned_block_device_info *zbd = f->zbd_info;
 		struct fio_zone_info *z;
@@ -1208,6 +1253,7 @@ void zbd_file_reset(struct thread_data *td, struct fio_file *f)
 {
 	struct fio_zone_info *zb, *ze;
 	uint64_t swd;
+	bool verify_data_left = false;
 
 	if (!f->zbd_info || !td_write(td))
 		return;
@@ -1224,8 +1270,16 @@ void zbd_file_reset(struct thread_data *td, struct fio_file *f)
 	 * writing any data to avoid that a zone reset has to be issued while
 	 * writing data, which causes data loss.
 	 */
-	if (td->o.verify != VERIFY_NONE && td->runstate != TD_VERIFYING)
-		zbd_reset_zones(td, f, zb, ze);
+	if (td->o.verify != VERIFY_NONE) {
+		verify_data_left = td->runstate == TD_VERIFYING ||
+			td->io_hist_len || td->verify_batch;
+		if (td->io_hist_len && td->o.verify_backlog)
+			verify_data_left =
+				td->io_hist_len % td->o.verify_backlog;
+		if (!verify_data_left)
+			zbd_reset_zones(td, f, zb, ze);
+	}
+
 	zbd_reset_write_cnt(td, f);
 }
 
@@ -1368,7 +1422,7 @@ found_candidate_zone:
 	/* Both z->mutex and zbdi->mutex are held. */
 
 examine_zone:
-	if (z->wp + min_bs <= zbd_zone_capacity_end(z)) {
+	if (zbd_zone_remainder(z) >= min_bs) {
 		pthread_mutex_unlock(&zbdi->mutex);
 		goto out;
 	}
@@ -1433,7 +1487,7 @@ retry:
 		z = zbd_get_zone(f, zone_idx);
 
 		zone_lock(td, f, z);
-		if (z->wp + min_bs <= zbd_zone_capacity_end(z))
+		if (zbd_zone_remainder(z) >= min_bs)
 			goto out;
 		pthread_mutex_lock(&zbdi->mutex);
 	}
@@ -1476,42 +1530,6 @@ out:
 	return z;
 }
 
-/* The caller must hold z->mutex. */
-static struct fio_zone_info *zbd_replay_write_order(struct thread_data *td,
-						    struct io_u *io_u,
-						    struct fio_zone_info *z)
-{
-	const struct fio_file *f = io_u->file;
-	const uint64_t min_bs = td->o.min_bs[DDIR_WRITE];
-
-	if (!zbd_open_zone(td, f, z)) {
-		zone_unlock(z);
-		z = zbd_convert_to_open_zone(td, io_u);
-		assert(z);
-	}
-
-	if (z->verify_block * min_bs >= z->capacity) {
-		log_err("%s: %d * %"PRIu64" >= %"PRIu64"\n",
-			f->file_name, z->verify_block, min_bs, z->capacity);
-		/*
-		 * If the assertion below fails during a test run, adding
-		 * "--experimental_verify=1" to the command line may help.
-		 */
-		assert(false);
-	}
-
-	io_u->offset = z->start + z->verify_block * min_bs;
-	if (io_u->offset + io_u->buflen >= zbd_zone_capacity_end(z)) {
-		log_err("%s: %llu + %llu >= %"PRIu64"\n",
-			f->file_name, io_u->offset, io_u->buflen,
-			zbd_zone_capacity_end(z));
-		assert(false);
-	}
-	z->verify_block += io_u->buflen / min_bs;
-
-	return z;
-}
-
 /*
  * Find another zone which has @min_bytes of readable data. Search in zones
  * @zb + 1 .. @zl. For random workload, also search in zones @zb - 1 .. @zf.
@@ -1862,10 +1880,8 @@ enum io_u_action zbd_adjust_block(struct thread_data *td, struct io_u *io_u)
 
 	switch (io_u->ddir) {
 	case DDIR_READ:
-		if (td->runstate == TD_VERIFYING && td_write(td)) {
-			zb = zbd_replay_write_order(td, io_u, zb);
+		if (td->runstate == TD_VERIFYING && td_write(td))
 			goto accept;
-		}
 
 		/*
 		 * Check that there is enough written data in the zone to do an
@@ -1941,6 +1957,33 @@ enum io_u_action zbd_adjust_block(struct thread_data *td, struct io_u *io_u)
 			goto eof;
 		}
 
+retry:
+		if (zbd_zone_remainder(zb) > 0 &&
+		    zbd_zone_remainder(zb) < min_bs) {
+			pthread_mutex_lock(&f->zbd_info->mutex);
+			zbd_close_zone(td, f, zb);
+			pthread_mutex_unlock(&f->zbd_info->mutex);
+			dprint(FD_ZBD,
+			       "%s: finish zone %d\n",
+			       f->file_name, zbd_zone_idx(f, zb));
+			io_u_quiesce(td);
+			zbd_finish_zone(td, f, zb);
+			if (zbd_zone_idx(f, zb) + 1 >= f->max_zone) {
+				if (!td_random(td))
+					goto eof;
+			}
+			zone_unlock(zb);
+
+			/* Find the next write pointer zone */
+			do {
+				zb++;
+				if (zbd_zone_idx(f, zb) >= f->max_zone)
+					zb = zbd_get_zone(f, f->min_zone);
+			} while (!zb->has_wp);
+
+			zone_lock(td, f, zb);
+		}
+
 		if (!zbd_open_zone(td, f, zb)) {
 			zone_unlock(zb);
 			zb = zbd_convert_to_open_zone(td, io_u);
@@ -1951,6 +1994,10 @@ enum io_u_action zbd_adjust_block(struct thread_data *td, struct io_u *io_u)
 			}
 		}
 
+		if (zbd_zone_remainder(zb) > 0 &&
+		    zbd_zone_remainder(zb) < min_bs)
+			goto retry;
+
 		/* Check whether the zone reset threshold has been exceeded */
 		if (td->o.zrf.u.f) {
 			if (zbdi->wp_sectors_with_data >= f->io_size * td->o.zrt.u.f &&
@@ -1960,7 +2007,19 @@ enum io_u_action zbd_adjust_block(struct thread_data *td, struct io_u *io_u)
 
 		/* Reset the zone pointer if necessary */
 		if (zb->reset_zone || zbd_zone_full(f, zb, min_bs)) {
-			assert(td->o.verify == VERIFY_NONE);
+			if (td->o.verify != VERIFY_NONE) {
+				/*
+				 * Unset io-u->file to tell get_next_verify()
+				 * that this IO is not requeue.
+				 */
+				io_u->file = NULL;
+				if (!get_next_verify(td, io_u)) {
+					zone_unlock(zb);
+					return io_u_accept;
+				}
+				io_u->file = f;
+			}
+
 			/*
 			 * Since previous write requests may have been submitted
 			 * asynchronously and since we will submit the zone
diff --git a/zbd.h b/zbd.h
index 0a73b41d..d425707e 100644
--- a/zbd.h
+++ b/zbd.h
@@ -25,7 +25,6 @@ enum io_u_action {
  * @start: zone start location (bytes)
  * @wp: zone write pointer location (bytes)
  * @capacity: maximum size usable from the start of a zone (bytes)
- * @verify_block: number of blocks that have been verified for this zone
  * @mutex: protects the modifiable members in this structure
  * @type: zone type (BLK_ZONE_TYPE_*)
  * @cond: zone state (BLK_ZONE_COND_*)
@@ -39,7 +38,6 @@ struct fio_zone_info {
 	uint64_t		start;
 	uint64_t		wp;
 	uint64_t		capacity;
-	uint32_t		verify_block;
 	enum zbd_zone_type	type:2;
 	enum zbd_zone_cond	cond:4;
 	unsigned int		has_wp:1;

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-11-15 13:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-11-15 13:00 UTC (permalink / raw)
  To: fio

The following changes since commit 2d92b09513b3c11a04541298aece35eae3dbc963:

  Merge branch 'master' of https://github.com/bvanassche/fio (2022-11-07 16:20:04 -0700)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 07c8fe21021681f86fbfd3c3d63b88a5ebd4e557:

  Merge branch 'master' of https://github.com/bvanassche/fio (2022-11-14 08:47:00 -0500)

----------------------------------------------------------------
Bart Van Assche (3):
      configure: Fix clock_gettime() detection
      configure: Fix the struct nvme_uring_cmd detection
      os/os.h: Improve cpus_configured()

Vincent Fu (1):
      Merge branch 'master' of https://github.com/bvanassche/fio

 configure | 10 ++++++----
 os/os.h   |  4 +++-
 2 files changed, 9 insertions(+), 5 deletions(-)

---

Diff of recent changes:

diff --git a/configure b/configure
index 30bf5acb..1b12d268 100755
--- a/configure
+++ b/configure
@@ -1172,7 +1172,9 @@ cat > $TMPC << EOF
 #include <time.h>
 int main(int argc, char **argv)
 {
-  return clock_gettime(0, NULL);
+  struct timespec ts;
+
+  return clock_gettime(0, &ts);
 }
 EOF
 if compile_prog "" "" "clock_gettime"; then
@@ -1194,7 +1196,9 @@ if test "$clock_gettime" = "yes" ; then
 #include <time.h>
 int main(int argc, char **argv)
 {
-  return clock_gettime(CLOCK_MONOTONIC, NULL);
+  struct timespec ts;
+
+  return clock_gettime(CLOCK_MONOTONIC, &ts);
 }
 EOF
   if compile_prog "" "$LIBS" "clock monotonic"; then
@@ -2634,8 +2638,6 @@ cat > $TMPC << EOF
 #include <linux/nvme_ioctl.h>
 int main(void)
 {
-  struct nvme_uring_cmd *cmd;
-
   return sizeof(struct nvme_uring_cmd);
 }
 EOF
diff --git a/os/os.h b/os/os.h
index a6fde1fd..c428260c 100644
--- a/os/os.h
+++ b/os/os.h
@@ -355,7 +355,9 @@ static inline unsigned long long get_fs_free_size(const char *path)
 #ifndef FIO_HAVE_CPU_CONF_SYSCONF
 static inline unsigned int cpus_configured(void)
 {
-	return sysconf(_SC_NPROCESSORS_CONF);
+	int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+
+	return nr_cpus >= 1 ? nr_cpus : 1;
 }
 #endif
 

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-11-08 13:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-11-08 13:00 UTC (permalink / raw)
  To: fio

The following changes since commit 72bcaffd7d56d4c2ebad6d0a1e465e0e9db8be40:

  Fio 3.33 (2022-11-06 13:55:41 -0700)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 2d92b09513b3c11a04541298aece35eae3dbc963:

  Merge branch 'master' of https://github.com/bvanassche/fio (2022-11-07 16:20:04 -0700)

----------------------------------------------------------------
Bart Van Assche (2):
      Windows: Fix the build
      Android: Enable zoned block device support

Jens Axboe (1):
      Merge branch 'master' of https://github.com/bvanassche/fio

 configure                         |  2 +-
 os/windows/dlls.c                 | 16 +++++++++++-----
 os/windows/posix/include/syslog.h |  2 +-
 3 files changed, 13 insertions(+), 7 deletions(-)

---

Diff of recent changes:

diff --git a/configure b/configure
index 24c599a8..30bf5acb 100755
--- a/configure
+++ b/configure
@@ -2561,7 +2561,7 @@ if compile_prog "" "" "valgrind_dev"; then
 fi
 print_config "Valgrind headers" "$valgrind_dev"
 
-if test "$targetos" = "Linux" ; then
+if test "$targetos" = "Linux" || test "$targetos" = "Android"; then
 ##########################################
 # <linux/blkzoned.h> probe
 if test "$linux_blkzoned" != "yes" ; then
diff --git a/os/windows/dlls.c b/os/windows/dlls.c
index 774b1c61..ffedfa1e 100644
--- a/os/windows/dlls.c
+++ b/os/windows/dlls.c
@@ -11,12 +11,18 @@ void os_clk_tck(long *clk_tck)
 	 */
 	unsigned long minRes, maxRes, curRes;
 	HMODULE lib;
-	FARPROC queryTimer;
-	FARPROC setTimer;
+	NTSTATUS NTAPI (*queryTimer)
+		(OUT PULONG              MinimumResolution,
+		 OUT PULONG              MaximumResolution,
+		 OUT PULONG              CurrentResolution);
+	NTSTATUS NTAPI (*setTimer)
+		(IN ULONG                DesiredResolution,
+		 IN BOOLEAN              SetResolution,
+		 OUT PULONG              CurrentResolution);
 
 	if (!(lib = LoadLibrary(TEXT("ntdll.dll"))) ||
-		!(queryTimer = GetProcAddress(lib, "NtQueryTimerResolution")) ||
-		!(setTimer = GetProcAddress(lib, "NtSetTimerResolution"))) {
+		!(queryTimer = (void *)GetProcAddress(lib, "NtQueryTimerResolution")) ||
+		!(setTimer = (void *)GetProcAddress(lib, "NtSetTimerResolution"))) {
 		dprint(FD_HELPERTHREAD, 
 			"Failed to load ntdll library, set to lower bound 64 Hz\n");
 		*clk_tck = 64;
@@ -30,4 +36,4 @@ void os_clk_tck(long *clk_tck)
 		setTimer(maxRes, 1, &curRes);
 		*clk_tck = (long) (10000000L / maxRes);
 	}
-}
\ No newline at end of file
+}
diff --git a/os/windows/posix/include/syslog.h b/os/windows/posix/include/syslog.h
index b8582e95..03a04f69 100644
--- a/os/windows/posix/include/syslog.h
+++ b/os/windows/posix/include/syslog.h
@@ -1,7 +1,7 @@
 #ifndef SYSLOG_H
 #define SYSLOG_H
 
-int syslog();
+int syslog(int priority, const char *format, ...);
 
 #define LOG_INFO	0x1
 #define LOG_ERROR	0x2

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-11-07 13:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-11-07 13:00 UTC (permalink / raw)
  To: fio

The following changes since commit 02ee8a1ba7ea798f03fb029f589382b6f799be24:

  test: use homebrew to install sphinx instead of pip on macOS (2022-11-04 13:50:31 -0400)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 72bcaffd7d56d4c2ebad6d0a1e465e0e9db8be40:

  Fio 3.33 (2022-11-06 13:55:41 -0700)

----------------------------------------------------------------
Jens Axboe (1):
      Fio 3.33

 FIO-VERSION-GEN | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

---

Diff of recent changes:

diff --git a/FIO-VERSION-GEN b/FIO-VERSION-GEN
index db073818..5a0822c9 100755
--- a/FIO-VERSION-GEN
+++ b/FIO-VERSION-GEN
@@ -1,7 +1,7 @@
 #!/bin/sh
 
 GVF=FIO-VERSION-FILE
-DEF_VER=fio-3.32
+DEF_VER=fio-3.33
 
 LF='
 '

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-11-05 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-11-05 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 7fc3a553beadd15cac09b1514547c4d382d292d9:

  HOWTO: clean up exit_what description (2022-11-02 10:26:36 -0400)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 02ee8a1ba7ea798f03fb029f589382b6f799be24:

  test: use homebrew to install sphinx instead of pip on macOS (2022-11-04 13:50:31 -0400)

----------------------------------------------------------------
Ankit Kumar (1):
      io_uring: update documentation and small fix for sqthread_poll

Vincent Fu (2):
      test: change GitHub Actions macOS platform to macOS 12
      test: use homebrew to install sphinx instead of pip on macOS

 .github/workflows/ci.yml | 2 +-
 HOWTO.rst                | 6 +++---
 ci/actions-install.sh    | 5 +++--
 engines/io_uring.c       | 2 +-
 fio.1                    | 6 +++---
 5 files changed, 11 insertions(+), 10 deletions(-)

---

Diff of recent changes:

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 1b8c0701..4bc91d3e 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -24,7 +24,7 @@ jobs:
           os: ubuntu-22.04
           cc: clang
         - build: macos
-          os: macos-11
+          os: macos-12
         - build: linux-i686-gcc
           os: ubuntu-22.04
           arch: i686
diff --git a/HOWTO.rst b/HOWTO.rst
index 0fb5593e..e796f961 100644
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -2274,7 +2274,7 @@ with the caveat that when used on the command line, they must come after the
 	map and release for each IO. This is more efficient, and reduces the
 	IO latency as well.
 
-.. option:: nonvectored : [io_uring] [io_uring_cmd]
+.. option:: nonvectored=int : [io_uring] [io_uring_cmd]
 
 	With this option, fio will use non-vectored read/write commands, where
 	address must contain the address directly. Default is -1.
@@ -2301,7 +2301,7 @@ with the caveat that when used on the command line, they must come after the
 	This frees up cycles for fio, at the cost of using more CPU in the
 	system.
 
-.. option:: sqthread_poll_cpu : [io_uring] [io_uring_cmd]
+.. option:: sqthread_poll_cpu=int : [io_uring] [io_uring_cmd]
 
 	When :option:`sqthread_poll` is set, this option provides a way to
 	define which CPU should be used for the polling thread.
@@ -2351,7 +2351,7 @@ with the caveat that when used on the command line, they must come after the
 	When hipri is set this determines the probability of a pvsync2 I/O being high
 	priority. The default is 100%.
 
-.. option:: nowait : [pvsync2] [libaio] [io_uring]
+.. option:: nowait=bool : [pvsync2] [libaio] [io_uring] [io_uring_cmd]
 
 	By default if a request cannot be executed immediately (e.g. resource starvation,
 	waiting on locks) it is queued and the initiating process will be blocked until
diff --git a/ci/actions-install.sh b/ci/actions-install.sh
index 82e14d2a..c16dff16 100755
--- a/ci/actions-install.sh
+++ b/ci/actions-install.sh
@@ -84,8 +84,9 @@ install_macos() {
     #echo "Updating homebrew..."
     #brew update >/dev/null 2>&1
     echo "Installing packages..."
-    HOMEBREW_NO_AUTO_UPDATE=1 brew install cunit libnfs
-    pip3 install scipy six sphinx
+    HOMEBREW_NO_AUTO_UPDATE=1 brew install cunit libnfs sphinx-doc
+    brew link sphinx-doc --force
+    pip3 install scipy six 
 }
 
 main() {
diff --git a/engines/io_uring.c b/engines/io_uring.c
index 6906e0a4..3c656b77 100644
--- a/engines/io_uring.c
+++ b/engines/io_uring.c
@@ -226,7 +226,7 @@ static struct fio_option options[] = {
 	{
 		.name	= "sqthread_poll",
 		.lname	= "Kernel SQ thread polling",
-		.type	= FIO_OPT_INT,
+		.type	= FIO_OPT_STR_SET,
 		.off1	= offsetof(struct ioring_options, sqpoll_thread),
 		.help	= "Offload submission/completion to kernel thread",
 		.category = FIO_OPT_C_ENGINE,
diff --git a/fio.1 b/fio.1
index 4324a975..9e33c9e1 100644
--- a/fio.1
+++ b/fio.1
@@ -2063,7 +2063,7 @@ release them when IO is done. If this option is set, the pages are pre-mapped
 before IO is started. This eliminates the need to map and release for each IO.
 This is more efficient, and reduces the IO latency as well.
 .TP
-.BI (io_uring,io_uring_cmd)nonvectored
+.BI (io_uring,io_uring_cmd)nonvectored \fR=\fPint
 With this option, fio will use non-vectored read/write commands, where address
 must contain the address directly. Default is -1.
 .TP
@@ -2092,7 +2092,7 @@ available items in the SQ ring. If this option is set, the act of submitting IO
 will be done by a polling thread in the kernel. This frees up cycles for fio, at
 the cost of using more CPU in the system.
 .TP
-.BI (io_uring,io_uring_cmd)sqthread_poll_cpu
+.BI (io_uring,io_uring_cmd)sqthread_poll_cpu \fR=\fPint
 When `sqthread_poll` is set, this option provides a way to define which CPU
 should be used for the polling thread.
 .TP
@@ -2115,7 +2115,7 @@ than normal.
 When hipri is set this determines the probability of a pvsync2 I/O being high
 priority. The default is 100%.
 .TP
-.BI (pvsync2,libaio,io_uring)nowait
+.BI (pvsync2,libaio,io_uring,io_uring_cmd)nowait \fR=\fPbool
 By default if a request cannot be executed immediately (e.g. resource starvation,
 waiting on locks) it is queued and the initiating process will be blocked until
 the required resource becomes free.

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-11-03 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-11-03 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 73f168ea2c9a66145559c2217fc5a70c992cb80e:

  HOWTO: update description for flow option (2022-11-01 17:24:34 -0400)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 7fc3a553beadd15cac09b1514547c4d382d292d9:

  HOWTO: clean up exit_what description (2022-11-02 10:26:36 -0400)

----------------------------------------------------------------
Vincent Fu (1):
      HOWTO: clean up exit_what description

 HOWTO.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

---

Diff of recent changes:

diff --git a/HOWTO.rst b/HOWTO.rst
index 53ae8c17..0fb5593e 100644
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -3356,10 +3356,10 @@ Threads, processes and job synchronization
 	make fio terminate all jobs in the same group, as soon as one job of that
 	group finishes.
 
-.. option:: exit_what
+.. option:: exit_what=str
 
 	By default, fio will continue running all other jobs when one job finishes.
-	Sometimes this is not the desired action. Setting ``exit_all`` will
+	Sometimes this is not the desired action. Setting ``exitall`` will
 	instead make fio terminate all jobs in the same group. The option
         ``exit_what`` allows to control which jobs get terminated when ``exitall`` is
         enabled. The default is ``group`` and does not change the behaviour of

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-11-02 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-11-02 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit c4704c081a54160621227b42238f6e439c28fba3:

  test: add test for experimental verify with loops and time_based options (2022-10-24 10:34:57 -0400)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 73f168ea2c9a66145559c2217fc5a70c992cb80e:

  HOWTO: update description for flow option (2022-11-01 17:24:34 -0400)

----------------------------------------------------------------
Vincent Fu (1):
      HOWTO: update description for flow option

 HOWTO.rst | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

---

Diff of recent changes:

diff --git a/HOWTO.rst b/HOWTO.rst
index e89d05f0..53ae8c17 100644
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -3329,13 +3329,13 @@ Threads, processes and job synchronization
 
 .. option:: flow=int
 
-	Weight in token-based flow control. If this value is used, then there is a
-	'flow counter' which is used to regulate the proportion of activity between
-	two or more jobs. Fio attempts to keep this flow counter near zero. The
-	``flow`` parameter stands for how much should be added or subtracted to the
-	flow counter on each iteration of the main I/O loop. That is, if one job has
-	``flow=8`` and another job has ``flow=-1``, then there will be a roughly 1:8
-	ratio in how much one runs vs the other.
+        Weight in token-based flow control. If this value is used, then fio
+        regulates the activity between two or more jobs sharing the same
+        flow_id. Fio attempts to keep each job activity proportional to other
+        jobs' activities in the same flow_id group, with respect to requested
+        weight per job. That is, if one job has `flow=3', another job has
+        `flow=2' and another with `flow=1`, then there will be a roughly 3:2:1
+        ratio in how much one runs vs the others.
 
 .. option:: flow_sleep=int
 

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-10-25 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-10-25 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit d72244761b2230fbb2d6eaec59cdedd3ea651d4f:

  stat: fix segfault with fio option --bandwidth-log (2022-10-21 13:23:41 -0400)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to c4704c081a54160621227b42238f6e439c28fba3:

  test: add test for experimental verify with loops and time_based options (2022-10-24 10:34:57 -0400)

----------------------------------------------------------------
Shin'ichiro Kawasaki (4):
      verify: fix bytes_done accounting of experimental verify
      verify: fix numberio accounting of experimental verify
      test: add test for verify read back of experimental verify
      test: add test for experimental verify with loops and time_based options

Vincent Fu (1):
      Merge branch 'fix-cpus_allowed' of https://github.com/roxma/fio

mayuanpeng (1):
      cpus_allowed: use __NRPROCESSORS_CONF instead of __SC_NPROCESSORS_ONLN for non-sequential CPU ids

 backend.c                 |  8 ++++++--
 fio.h                     |  2 ++
 gettime.c                 |  2 +-
 idletime.c                |  2 +-
 io_u.c                    | 23 +++++++++++++++++------
 libfio.c                  |  1 +
 options.c                 |  8 ++++----
 os/os-hpux.h              |  4 ++--
 os/os-linux.h             |  8 --------
 os/os-solaris.h           |  2 +-
 os/os-windows.h           |  5 +----
 os/os.h                   |  8 ++++----
 os/windows/cpu-affinity.c |  6 ------
 os/windows/posix.c        | 16 ++++++++++++----
 rate-submit.c             |  2 ++
 server.c                  |  2 +-
 t/dedupe.c                |  2 +-
 t/jobs/t0025.fio          |  7 +++++++
 t/jobs/t0026.fio          | 19 +++++++++++++++++++
 t/run-fio-tests.py        | 31 +++++++++++++++++++++++++++++++
 verify.c                  |  2 --
 21 files changed, 113 insertions(+), 47 deletions(-)
 create mode 100644 t/jobs/t0025.fio
 create mode 100644 t/jobs/t0026.fio

---

Diff of recent changes:

diff --git a/backend.c b/backend.c
index d8f4f2a5..ba954a6b 100644
--- a/backend.c
+++ b/backend.c
@@ -682,7 +682,7 @@ static void do_verify(struct thread_data *td, uint64_t verify_bytes)
 				break;
 			}
 		} else {
-			if (ddir_rw_sum(td->bytes_done) + td->o.rw_min_bs > verify_bytes)
+			if (td->bytes_verified + td->o.rw_min_bs > verify_bytes)
 				break;
 
 			while ((io_u = get_io_u(td)) != NULL) {
@@ -711,6 +711,8 @@ static void do_verify(struct thread_data *td, uint64_t verify_bytes)
 					break;
 				} else if (io_u->ddir == DDIR_WRITE) {
 					io_u->ddir = DDIR_READ;
+					io_u->numberio = td->verify_read_issues;
+					td->verify_read_issues++;
 					populate_verify_io_u(td, io_u);
 					break;
 				} else {
@@ -1030,8 +1032,10 @@ static void do_io(struct thread_data *td, uint64_t *bytes_done)
 			break;
 		}
 
-		if (io_u->ddir == DDIR_WRITE && td->flags & TD_F_DO_VERIFY)
+		if (io_u->ddir == DDIR_WRITE && td->flags & TD_F_DO_VERIFY) {
+			io_u->numberio = td->io_issues[io_u->ddir];
 			populate_verify_io_u(td, io_u);
+		}
 
 		ddir = io_u->ddir;
 
diff --git a/fio.h b/fio.h
index de7eca79..8da77640 100644
--- a/fio.h
+++ b/fio.h
@@ -356,6 +356,7 @@ struct thread_data {
 	 * Issue side
 	 */
 	uint64_t io_issues[DDIR_RWDIR_CNT];
+	uint64_t verify_read_issues;
 	uint64_t io_issue_bytes[DDIR_RWDIR_CNT];
 	uint64_t loops;
 
@@ -370,6 +371,7 @@ struct thread_data {
 	uint64_t zone_bytes;
 	struct fio_sem *sem;
 	uint64_t bytes_done[DDIR_RWDIR_CNT];
+	uint64_t bytes_verified;
 
 	uint64_t *thinktime_blocks_counter;
 	struct timespec last_thinktime;
diff --git a/gettime.c b/gettime.c
index 8993be16..bc66a3ac 100644
--- a/gettime.c
+++ b/gettime.c
@@ -671,7 +671,7 @@ static int clock_cmp(const void *p1, const void *p2)
 int fio_monotonic_clocktest(int debug)
 {
 	struct clock_thread *cthreads;
-	unsigned int seen_cpus, nr_cpus = cpus_online();
+	unsigned int seen_cpus, nr_cpus = cpus_configured();
 	struct clock_entry *entries;
 	unsigned long nr_entries, tentries, failed = 0;
 	struct clock_entry *prev, *this;
diff --git a/idletime.c b/idletime.c
index fc1df8e9..90ed77ea 100644
--- a/idletime.c
+++ b/idletime.c
@@ -189,7 +189,7 @@ void fio_idle_prof_init(void)
 	pthread_condattr_t cattr;
 	struct idle_prof_thread *ipt;
 
-	ipc.nr_cpus = cpus_online();
+	ipc.nr_cpus = cpus_configured();
 	ipc.status = IDLE_PROF_STATUS_OK;
 
 	if (ipc.opt == IDLE_PROF_OPT_NONE)
diff --git a/io_u.c b/io_u.c
index 91f1a358..8035f4b7 100644
--- a/io_u.c
+++ b/io_u.c
@@ -2121,13 +2121,26 @@ static void ios_completed(struct thread_data *td,
 	}
 }
 
+static void io_u_update_bytes_done(struct thread_data *td,
+				   struct io_completion_data *icd)
+{
+	int ddir;
+
+	if (td->runstate == TD_VERIFYING) {
+		td->bytes_verified += icd->bytes_done[DDIR_READ];
+		return;
+	}
+
+	for (ddir = 0; ddir < DDIR_RWDIR_CNT; ddir++)
+		td->bytes_done[ddir] += icd->bytes_done[ddir];
+}
+
 /*
  * Complete a single io_u for the sync engines.
  */
 int io_u_sync_complete(struct thread_data *td, struct io_u *io_u)
 {
 	struct io_completion_data icd;
-	int ddir;
 
 	init_icd(td, &icd, 1);
 	io_completed(td, &io_u, &icd);
@@ -2140,8 +2153,7 @@ int io_u_sync_complete(struct thread_data *td, struct io_u *io_u)
 		return -1;
 	}
 
-	for (ddir = 0; ddir < DDIR_RWDIR_CNT; ddir++)
-		td->bytes_done[ddir] += icd.bytes_done[ddir];
+	io_u_update_bytes_done(td, &icd);
 
 	return 0;
 }
@@ -2153,7 +2165,7 @@ int io_u_queued_complete(struct thread_data *td, int min_evts)
 {
 	struct io_completion_data icd;
 	struct timespec *tvp = NULL;
-	int ret, ddir;
+	int ret;
 	struct timespec ts = { .tv_sec = 0, .tv_nsec = 0, };
 
 	dprint(FD_IO, "io_u_queued_complete: min=%d\n", min_evts);
@@ -2179,8 +2191,7 @@ int io_u_queued_complete(struct thread_data *td, int min_evts)
 		return -1;
 	}
 
-	for (ddir = 0; ddir < DDIR_RWDIR_CNT; ddir++)
-		td->bytes_done[ddir] += icd.bytes_done[ddir];
+	io_u_update_bytes_done(td, &icd);
 
 	return ret;
 }
diff --git a/libfio.c b/libfio.c
index 1a891776..ac521974 100644
--- a/libfio.c
+++ b/libfio.c
@@ -94,6 +94,7 @@ static void reset_io_counters(struct thread_data *td, int all)
 			td->rate_next_io_time[ddir] = 0;
 			td->last_usec[ddir] = 0;
 		}
+		td->bytes_verified = 0;
 	}
 
 	td->zone_bytes = 0;
diff --git a/options.c b/options.c
index a668b0e4..9e4d8cd1 100644
--- a/options.c
+++ b/options.c
@@ -627,7 +627,7 @@ static int str_exitall_cb(void)
 int fio_cpus_split(os_cpu_mask_t *mask, unsigned int cpu_index)
 {
 	unsigned int i, index, cpus_in_mask;
-	const long max_cpu = cpus_online();
+	const long max_cpu = cpus_configured();
 
 	cpus_in_mask = fio_cpu_count(mask);
 	if (!cpus_in_mask)
@@ -666,7 +666,7 @@ static int str_cpumask_cb(void *data, unsigned long long *val)
 		return 1;
 	}
 
-	max_cpu = cpus_online();
+	max_cpu = cpus_configured();
 
 	for (i = 0; i < sizeof(int) * 8; i++) {
 		if ((1 << i) & *val) {
@@ -702,7 +702,7 @@ static int set_cpus_allowed(struct thread_data *td, os_cpu_mask_t *mask,
 	strip_blank_front(&str);
 	strip_blank_end(str);
 
-	max_cpu = cpus_online();
+	max_cpu = cpus_configured();
 
 	while ((cpu = strsep(&str, ",")) != NULL) {
 		char *str2, *cpu2;
@@ -5305,7 +5305,7 @@ void fio_keywords_init(void)
 	sprintf(buf, "%llu", mb_memory);
 	fio_keywords[1].replace = strdup(buf);
 
-	l = cpus_online();
+	l = cpus_configured();
 	sprintf(buf, "%lu", l);
 	fio_keywords[2].replace = strdup(buf);
 }
diff --git a/os/os-hpux.h b/os/os-hpux.h
index a80cb2bc..9f3d76f5 100644
--- a/os/os-hpux.h
+++ b/os/os-hpux.h
@@ -88,9 +88,9 @@ static inline unsigned long long os_phys_mem(void)
 	return ret;
 }
 
-#define FIO_HAVE_CPU_ONLINE_SYSCONF
+#define FIO_HAVE_CPU_CONF_SYSCONF
 
-static inline unsigned int cpus_online(void)
+static inline unsigned int cpus_configured(void)
 {
 	return mpctl(MPC_GETNUMSPUS, 0, NULL);
 }
diff --git a/os/os-linux.h b/os/os-linux.h
index 831f0ad0..bbb1f27c 100644
--- a/os/os-linux.h
+++ b/os/os-linux.h
@@ -251,14 +251,6 @@ static inline int arch_cache_line_size(void)
 		return atoi(size);
 }
 
-#ifdef __powerpc64__
-#define FIO_HAVE_CPU_ONLINE_SYSCONF
-static inline unsigned int cpus_online(void)
-{
-        return sysconf(_SC_NPROCESSORS_CONF);
-}
-#endif
-
 static inline unsigned long long get_fs_free_size(const char *path)
 {
 	unsigned long long ret;
diff --git a/os/os-solaris.h b/os/os-solaris.h
index ea1f081c..60d4c1ec 100644
--- a/os/os-solaris.h
+++ b/os/os-solaris.h
@@ -119,7 +119,7 @@ static inline int fio_set_odirect(struct fio_file *f)
 
 static inline bool fio_cpu_isset(os_cpu_mask_t *mask, int cpu)
 {
-	const unsigned int max_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+	const unsigned int max_cpus = sysconf(_SC_NPROCESSORS_CONF);
 	unsigned int num_cpus;
 	processorid_t *cpus;
 	bool ret;
diff --git a/os/os-windows.h b/os/os-windows.h
index 510b8143..12f33486 100644
--- a/os/os-windows.h
+++ b/os/os-windows.h
@@ -44,7 +44,7 @@
 #define fio_swap64(x)	_byteswap_uint64(x)
 
 #define _SC_PAGESIZE			0x1
-#define _SC_NPROCESSORS_ONLN	0x2
+#define _SC_NPROCESSORS_CONF	0x2
 #define _SC_PHYS_PAGES			0x4
 
 #define SA_RESTART	0
@@ -219,9 +219,6 @@ static inline int fio_mkdir(const char *path, mode_t mode) {
 	return 0;
 }
 
-#define FIO_HAVE_CPU_ONLINE_SYSCONF
-unsigned int cpus_online(void);
-
 int first_set_cpu(os_cpu_mask_t *cpumask);
 int fio_setaffinity(int pid, os_cpu_mask_t cpumask);
 int fio_cpuset_init(os_cpu_mask_t *mask);
diff --git a/os/os.h b/os/os.h
index aba6813f..a6fde1fd 100644
--- a/os/os.h
+++ b/os/os.h
@@ -352,10 +352,10 @@ static inline unsigned long long get_fs_free_size(const char *path)
 }
 #endif
 
-#ifndef FIO_HAVE_CPU_ONLINE_SYSCONF
-static inline unsigned int cpus_online(void)
+#ifndef FIO_HAVE_CPU_CONF_SYSCONF
+static inline unsigned int cpus_configured(void)
 {
-	return sysconf(_SC_NPROCESSORS_ONLN);
+	return sysconf(_SC_NPROCESSORS_CONF);
 }
 #endif
 
@@ -363,7 +363,7 @@ static inline unsigned int cpus_online(void)
 #ifdef FIO_HAVE_CPU_AFFINITY
 static inline int CPU_COUNT(os_cpu_mask_t *mask)
 {
-	int max_cpus = cpus_online();
+	int max_cpus = cpus_configured();
 	int nr_cpus, i;
 
 	for (i = 0, nr_cpus = 0; i < max_cpus; i++)
diff --git a/os/windows/cpu-affinity.c b/os/windows/cpu-affinity.c
index 7601970f..8f3d6a76 100644
--- a/os/windows/cpu-affinity.c
+++ b/os/windows/cpu-affinity.c
@@ -2,12 +2,6 @@
 
 #include <windows.h>
 
-/* Return all processors regardless of processor group */
-unsigned int cpus_online(void)
-{
-	return GetActiveProcessorCount(ALL_PROCESSOR_GROUPS);
-}
-
 static void print_mask(os_cpu_mask_t *cpumask)
 {
 	for (int i = 0; i < FIO_CPU_MASK_ROWS; i++)
diff --git a/os/windows/posix.c b/os/windows/posix.c
index a3a6c89f..a47223da 100644
--- a/os/windows/posix.c
+++ b/os/windows/posix.c
@@ -216,10 +216,18 @@ long sysconf(int name)
 	MEMORYSTATUSEX status;
 
 	switch (name) {
-	case _SC_NPROCESSORS_ONLN:
-		val = GetNumLogicalProcessors();
+	case _SC_NPROCESSORS_CONF:
+		/*
+		 * Using GetMaximumProcessorCount introduces a problem in
+		 * gettime.c because Windows does not have
+		 * fio_get_thread_affinity. Log sample (see #1479):
+		 *
+		 *   CPU mask contains processor beyond last active processor index (2)
+		 *   clock setaffinity failed: No error
+		 */
+		val = GetActiveProcessorCount(ALL_PROCESSOR_GROUPS);
 		if (val == -1)
-			log_err("sysconf(_SC_NPROCESSORS_ONLN) failed\n");
+			log_err("sysconf(_SC_NPROCESSORS_CONF) failed\n");
 
 		break;
 
@@ -1201,4 +1209,4 @@ cleanup:
 	DisconnectNamedPipe(hpipe);
 	CloseHandle(hpipe);
 	return ret;
-}
\ No newline at end of file
+}
diff --git a/rate-submit.c b/rate-submit.c
index 268356d1..2fe768c0 100644
--- a/rate-submit.c
+++ b/rate-submit.c
@@ -263,6 +263,8 @@ static void sum_ddir(struct thread_data *dst, struct thread_data *src,
 	sum_val(&dst->this_io_blocks[ddir], &src->this_io_blocks[ddir]);
 	sum_val(&dst->this_io_bytes[ddir], &src->this_io_bytes[ddir]);
 	sum_val(&dst->bytes_done[ddir], &src->bytes_done[ddir]);
+	if (ddir == DDIR_READ)
+		sum_val(&dst->bytes_verified, &src->bytes_verified);
 
 	pthread_double_unlock(&dst->io_wq.stat_lock, &src->io_wq.stat_lock);
 }
diff --git a/server.c b/server.c
index b453be5f..b869d387 100644
--- a/server.c
+++ b/server.c
@@ -999,7 +999,7 @@ static int handle_probe_cmd(struct fio_net_cmd *cmd)
 		.os		= FIO_OS,
 		.arch		= FIO_ARCH,
 		.bpp		= sizeof(void *),
-		.cpus		= __cpu_to_le32(cpus_online()),
+		.cpus		= __cpu_to_le32(cpus_configured()),
 	};
 
 	dprint(FD_NET, "server: sending probe reply\n");
diff --git a/t/dedupe.c b/t/dedupe.c
index d21e96f4..02e52b74 100644
--- a/t/dedupe.c
+++ b/t/dedupe.c
@@ -688,7 +688,7 @@ int main(int argc, char *argv[])
 		use_bloom = 0;
 
 	if (!num_threads)
-		num_threads = cpus_online();
+		num_threads = cpus_configured();
 
 	if (argc == optind)
 		return usage(argv);
diff --git a/t/jobs/t0025.fio b/t/jobs/t0025.fio
new file mode 100644
index 00000000..29b5fe80
--- /dev/null
+++ b/t/jobs/t0025.fio
@@ -0,0 +1,7 @@
+[job]
+filename=t0025file
+size=128k
+readwrite=write
+do_verify=1
+verify=md5
+experimental_verify=1
diff --git a/t/jobs/t0026.fio b/t/jobs/t0026.fio
new file mode 100644
index 00000000..ee89b140
--- /dev/null
+++ b/t/jobs/t0026.fio
@@ -0,0 +1,19 @@
+[job1]
+filename=t0026file
+size=1M
+readwrite=randwrite
+loops=8
+do_verify=1
+verify=md5
+experimental_verify=1
+
+[job2]
+stonewall=1
+filename=t0026file
+size=1M
+readwrite=randrw
+time_based
+runtime=5
+do_verify=1
+verify=md5
+experimental_verify=1
diff --git a/t/run-fio-tests.py b/t/run-fio-tests.py
index df87ae72..e5b307ac 100755
--- a/t/run-fio-tests.py
+++ b/t/run-fio-tests.py
@@ -788,6 +788,18 @@ class FioJobTest_t0024(FioJobTest_t0023):
         self.check_all_offsets("bssplit_bw.log", 512, filesize)
 
 
+class FioJobTest_t0025(FioJobTest):
+    """Test experimental verify read backs written data pattern."""
+    def check_result(self):
+        super(FioJobTest_t0025, self).check_result()
+
+        if not self.passed:
+            return
+
+        if self.json_data['jobs'][0]['read']['io_kbytes'] != 128:
+            self.passed = False
+
+
 class FioJobTest_iops_rate(FioJobTest):
     """Test consists of fio test job t0009
     Confirm that job0 iops == 1000
@@ -1183,6 +1195,25 @@ TEST_LIST = [
         'pre_success':      None,
         'requirements':     [],
     },
+    {
+        'test_id':          25,
+        'test_class':       FioJobTest_t0025,
+        'job':              't0025.fio',
+        'success':          SUCCESS_DEFAULT,
+        'pre_job':          None,
+        'pre_success':      None,
+        'output_format':    'json',
+        'requirements':     [],
+    },
+    {
+        'test_id':          26,
+        'test_class':       FioJobTest,
+        'job':              't0026.fio',
+        'success':          SUCCESS_DEFAULT,
+        'pre_job':          None,
+        'pre_success':      None,
+        'requirements':     [Requirements.not_windows],
+    },
     {
         'test_id':          1000,
         'test_class':       FioExeTest,
diff --git a/verify.c b/verify.c
index 0e1e4639..d6a229ca 100644
--- a/verify.c
+++ b/verify.c
@@ -1287,8 +1287,6 @@ void populate_verify_io_u(struct thread_data *td, struct io_u *io_u)
 	if (td->o.verify == VERIFY_NULL)
 		return;
 
-	io_u->numberio = td->io_issues[io_u->ddir];
-
 	fill_pattern_headers(td, io_u, 0, 0);
 }
 

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-10-22 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-10-22 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 548f1269e3772c666cac4148453d9c63bdfa65c4:

  Merge branch 'issue-1213' of https://github.com/SystemFabricWorks/fio (2022-10-19 12:04:50 -0400)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to d72244761b2230fbb2d6eaec59cdedd3ea651d4f:

  stat: fix segfault with fio option --bandwidth-log (2022-10-21 13:23:41 -0400)

----------------------------------------------------------------
Ankit Kumar (1):
      stat: fix segfault with fio option --bandwidth-log

 stat.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

---

Diff of recent changes:

diff --git a/stat.c b/stat.c
index 949af5ed..b963973a 100644
--- a/stat.c
+++ b/stat.c
@@ -2870,7 +2870,10 @@ static struct io_logs *get_new_log(struct io_log *iolog)
 	 * forever
 	 */
 	if (!iolog->cur_log_max) {
-		new_samples = iolog->td->o.log_entries;
+		if (iolog->td)
+			new_samples = iolog->td->o.log_entries;
+		else
+			new_samples = DEF_LOG_ENTRIES;
 	} else {
 		new_samples = iolog->cur_log_max * 2;
 		if (new_samples > MAX_LOG_ENTRIES)

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-10-20 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-10-20 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 2f160e0c8848bab566427a11eee116d8e834bcf0:

  test: change GitHub actions checkout from v2 to v3 (2022-10-18 11:13:03 -0400)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 548f1269e3772c666cac4148453d9c63bdfa65c4:

  Merge branch 'issue-1213' of https://github.com/SystemFabricWorks/fio (2022-10-19 12:04:50 -0400)

----------------------------------------------------------------
Brian T. Smith (2):
      fix configure probe for libcufile
      libcufile: use generic_get_file_size

Vincent Fu (1):
      Merge branch 'issue-1213' of https://github.com/SystemFabricWorks/fio

 configure           | 4 ++--
 engines/libcufile.c | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

---

Diff of recent changes:

diff --git a/configure b/configure
index 546541a2..24c599a8 100755
--- a/configure
+++ b/configure
@@ -2723,9 +2723,9 @@ int main(int argc, char* argv[]) {
    return 0;
 }
 EOF
-  if compile_prog "" "-lcuda -lcudart -lcufile" "libcufile"; then
+  if compile_prog "" "-lcuda -lcudart -lcufile -ldl" "libcufile"; then
     libcufile="yes"
-    LIBS="-lcuda -lcudart -lcufile $LIBS"
+    LIBS="-lcuda -lcudart -lcufile -ldl $LIBS"
   else
     if test "$libcufile" = "yes" ; then
       feature_not_found "libcufile" ""
diff --git a/engines/libcufile.c b/engines/libcufile.c
index e575b786..2bedf261 100644
--- a/engines/libcufile.c
+++ b/engines/libcufile.c
@@ -606,6 +606,7 @@ FIO_STATIC struct ioengine_ops ioengine = {
 	.version             = FIO_IOOPS_VERSION,
 	.init                = fio_libcufile_init,
 	.queue               = fio_libcufile_queue,
+	.get_file_size       = generic_get_file_size,
 	.open_file           = fio_libcufile_open_file,
 	.close_file          = fio_libcufile_close_file,
 	.iomem_alloc         = fio_libcufile_iomem_alloc,

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-10-19 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-10-19 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 0360d61fbfcc1f07bcdc16672f5040f8cf49681f:

  t/zbd: add a CLI option to force io_uring (2022-10-16 17:05:03 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 2f160e0c8848bab566427a11eee116d8e834bcf0:

  test: change GitHub actions checkout from v2 to v3 (2022-10-18 11:13:03 -0400)

----------------------------------------------------------------
Vincent Fu (1):
      test: change GitHub actions checkout from v2 to v3

 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

---

Diff of recent changes:

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index bdc4db85..1b8c0701 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -39,7 +39,7 @@ jobs:
 
     steps:
     - name: Checkout repo
-      uses: actions/checkout@v2
+      uses: actions/checkout@v3
     - name: Install dependencies
       run: ./ci/actions-install.sh
     - name: Build

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-10-17 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-10-17 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 8a63e7a32fcb6b7b131c4678ba95b81a9f2f8bca:

  Merge branch 'readme-update' of https://github.com/nikoandpiko/fio (2022-10-15 09:05:32 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 0360d61fbfcc1f07bcdc16672f5040f8cf49681f:

  t/zbd: add a CLI option to force io_uring (2022-10-16 17:05:03 -0600)

----------------------------------------------------------------
Alexey Dobriyan (1):
      fio: warn about "ioengine=psync" and "iodepth >= 1"

Dmitry Fomichev (2):
      t/zbd: fix max_open_zones determination in tests
      t/zbd: add a CLI option to force io_uring

 backend.c              |  5 +++++
 t/zbd/functions        |  4 +++-
 t/zbd/test-zbd-support | 10 ++++++++++
 3 files changed, 18 insertions(+), 1 deletion(-)

---

Diff of recent changes:

diff --git a/backend.c b/backend.c
index ec535bcc..d8f4f2a5 100644
--- a/backend.c
+++ b/backend.c
@@ -1791,6 +1791,11 @@ static void *thread_main(void *data)
 	if (td_io_init(td))
 		goto err;
 
+	if (td_ioengine_flagged(td, FIO_SYNCIO) && td->o.iodepth > 1) {
+		log_info("note: both iodepth >= 1 and synchronous I/O engine "
+			 "are selected, queue depth will be capped at 1\n");
+	}
+
 	if (init_io_u(td))
 		goto err;
 
diff --git a/t/zbd/functions b/t/zbd/functions
index 7cff18fd..812320f5 100644
--- a/t/zbd/functions
+++ b/t/zbd/functions
@@ -230,9 +230,11 @@ max_open_zones() {
 		    echo ${max_nr_open_zones}
 		}
 	fi
-    else
+    elif [ -n "${use_libzbc}" ]; then
 	${zbc_report_zones} "$dev" |
 	    sed -n 's/^[[:blank:]]*Maximum number of open sequential write required zones:[[:blank:]]*//p'
+    else
+	echo 0
     fi
 }
 
diff --git a/t/zbd/test-zbd-support b/t/zbd/test-zbd-support
index d4aaa813..cdc03f28 100755
--- a/t/zbd/test-zbd-support
+++ b/t/zbd/test-zbd-support
@@ -17,6 +17,7 @@ usage() {
 	echo -e "\t-t <test #> Run only a single test case with specified number"
 	echo -e "\t-q Quit the test run after any failed test"
 	echo -e "\t-z Run fio with debug=zbd option"
+	echo -e "\t-u Use io_uring ioengine in place of libaio"
 }
 
 max() {
@@ -38,6 +39,8 @@ min() {
 ioengine() {
 	if [ -n "$use_libzbc" ]; then
 		echo -n "--ioengine=libzbc"
+	elif [ "$1" = "libaio" -a -n "$force_io_uring" ]; then
+		echo -n "--ioengine=io_uring"
 	else
 		echo -n "--ioengine=$1"
 	fi
@@ -1275,6 +1278,7 @@ use_libzbc=
 zbd_debug=
 max_open_zones_opt=
 quit_on_err=
+force_io_uring=
 
 while [ "${1#-}" != "$1" ]; do
   case "$1" in
@@ -1292,6 +1296,7 @@ while [ "${1#-}" != "$1" ]; do
 	shift;;
     -q) quit_on_err=1; shift;;
     -z) zbd_debug=1; shift;;
+    -u) force_io_uring=1; shift;;
     --) shift; break;;
      *) usage; exit 1;;
   esac
@@ -1302,6 +1307,11 @@ if [ $# != 1 ]; then
     exit 1
 fi
 
+if [ -n "$use_libzbc" -a -n "$force_io_uring" ]; then
+    echo "Please specify only one of -l and -u options"
+    exit 1
+fi
+
 # shellcheck source=functions
 source "$(dirname "$0")/functions" || exit $?
 

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-10-16 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-10-16 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 07f78c37833730594778fb5684ac6ec40d0289f8:

  engines/io_uring: set coop taskrun, single issuer and defer taskrun (2022-10-12 07:19:35 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 8a63e7a32fcb6b7b131c4678ba95b81a9f2f8bca:

  Merge branch 'readme-update' of https://github.com/nikoandpiko/fio (2022-10-15 09:05:32 -0600)

----------------------------------------------------------------
Jens Axboe (1):
      Merge branch 'readme-update' of https://github.com/nikoandpiko/fio

Nicholas Roma (1):
      Update to README

 README.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

---

Diff of recent changes:

diff --git a/README.rst b/README.rst
index 79582dea..bcd08ec9 100644
--- a/README.rst
+++ b/README.rst
@@ -150,7 +150,7 @@ GNU make isn't the default, type ``gmake`` instead of ``make``.
 
 Configure will print the enabled options. Note that on Linux based platforms,
 the libaio development packages must be installed to use the libaio
-engine. Depending on distro, it is usually called libaio-devel or libaio-dev.
+engine. Depending on the distro, it is usually called libaio-devel or libaio-dev.
 
 For gfio, gtk 2.18 (or newer), associated glib threads, and cairo are required
 to be installed.  gfio isn't built automatically and can be enabled with a
@@ -170,7 +170,7 @@ configure.
 Windows
 ~~~~~~~
 
-The minimum versions of Windows for building/runing fio are Windows 7/Windows
+The minimum versions of Windows for building/running fio are Windows 7/Windows
 Server 2008 R2. On Windows, Cygwin (https://www.cygwin.com/) is required in
 order to build fio. To create an MSI installer package install WiX from
 https://wixtoolset.org and run :file:`dobuild.cmd` from the :file:`os/windows`
@@ -224,7 +224,7 @@ implemented, I'd be happy to take patches for that. An example of that is disk
 utility statistics and (I think) huge page support, support for that does exist
 in FreeBSD/Solaris.
 
-Fio uses pthread mutexes for signalling and locking and some platforms do not
+Fio uses pthread mutexes for signaling and locking and some platforms do not
 support process shared pthread mutexes. As a result, on such platforms only
 threads are supported. This could be fixed with sysv ipc locking or other
 locking alternatives.

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-10-15 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-10-15 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit b19c5ee1357ffb74f4de57b1617364bbbaacf1a0:

  examples: uring-cmd-zoned: expand the reasoning behind QD1 (2022-10-07 09:50:37 -0400)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 07f78c37833730594778fb5684ac6ec40d0289f8:

  engines/io_uring: set coop taskrun, single issuer and defer taskrun (2022-10-12 07:19:35 -0600)

----------------------------------------------------------------
Ankit Kumar (1):
      engines/io_uring: set coop taskrun, single issuer and defer taskrun

 engines/io_uring.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

---

Diff of recent changes:

diff --git a/engines/io_uring.c b/engines/io_uring.c
index c679177f..6906e0a4 100644
--- a/engines/io_uring.c
+++ b/engines/io_uring.c
@@ -889,9 +889,30 @@ static int fio_ioring_cmd_queue_init(struct thread_data *td)
 	p.flags |= IORING_SETUP_CQSIZE;
 	p.cq_entries = depth;
 
+	/*
+	 * Setup COOP_TASKRUN as we don't need to get IPI interrupted for
+	 * completing IO operations.
+	 */
+	p.flags |= IORING_SETUP_COOP_TASKRUN;
+
+	/*
+	 * io_uring is always a single issuer, and we can defer task_work
+	 * runs until we reap events.
+	 */
+	p.flags |= IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN;
+
 retry:
 	ret = syscall(__NR_io_uring_setup, depth, &p);
 	if (ret < 0) {
+		if (errno == EINVAL && p.flags & IORING_SETUP_DEFER_TASKRUN) {
+			p.flags &= ~IORING_SETUP_DEFER_TASKRUN;
+			p.flags &= ~IORING_SETUP_SINGLE_ISSUER;
+			goto retry;
+		}
+		if (errno == EINVAL && p.flags & IORING_SETUP_COOP_TASKRUN) {
+			p.flags &= ~IORING_SETUP_COOP_TASKRUN;
+			goto retry;
+		}
 		if (errno == EINVAL && p.flags & IORING_SETUP_CQSIZE) {
 			p.flags &= ~IORING_SETUP_CQSIZE;
 			goto retry;

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-10-08 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-10-08 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 7aeb498947d6f2d6c96b571520f12b80365fa8a1:

  test: make t0014.fio time_based (2022-10-05 18:34:41 -0400)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to b19c5ee1357ffb74f4de57b1617364bbbaacf1a0:

  examples: uring-cmd-zoned: expand the reasoning behind QD1 (2022-10-07 09:50:37 -0400)

----------------------------------------------------------------
Pankaj Raghav (1):
      examples: uring-cmd-zoned: expand the reasoning behind QD1

 examples/uring-cmd-zoned.fio | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

---

Diff of recent changes:

diff --git a/examples/uring-cmd-zoned.fio b/examples/uring-cmd-zoned.fio
index 58e8f79e..89be61be 100644
--- a/examples/uring-cmd-zoned.fio
+++ b/examples/uring-cmd-zoned.fio
@@ -1,7 +1,11 @@
 # io_uring_cmd I/O engine for nvme-ns generic zoned character device
 #
-# NOTE: with write workload iodepth must be set to 1 as there is no IO
-# scheduler.
+# NOTE:
+# Regular writes against a zone should be limited to QD1, as the device can
+# reorder the requests.
+#
+# As the passthrough path do not use an IO scheduler (such as mq-deadline),
+# the queue depth should be limited to 1 to avoid zone invalid writes.
 
 [global]
 filename=/dev/ng0n1

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-10-06 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-10-06 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 0474b83f022f1f1cc14208c05b7ccda682e01263:

  Merge branch 'master' of https://github.com/bvanassche/fio (2022-10-04 14:25:09 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 7aeb498947d6f2d6c96b571520f12b80365fa8a1:

  test: make t0014.fio time_based (2022-10-05 18:34:41 -0400)

----------------------------------------------------------------
Vincent Fu (7):
      test: clean up randtrimwrite test
      test: check all offsets touched for randtrimwrite
      test: fix style issues in run-fio-tests.py
      test: add basic tests for trimwrite workloads
      test: fix t/run-fio-tests.py style issues identified by pylint
      test: improve run-fio-tests.py file open method
      test: make t0014.fio time_based

 t/jobs/t0014.fio   |   1 +
 t/jobs/t0023.fio   |  16 +---
 t/jobs/t0024.fio   |  36 +++++++++
 t/run-fio-tests.py | 209 ++++++++++++++++++++++++++++++++++++++---------------
 4 files changed, 190 insertions(+), 72 deletions(-)
 create mode 100644 t/jobs/t0024.fio

---

Diff of recent changes:

diff --git a/t/jobs/t0014.fio b/t/jobs/t0014.fio
index d9b45651..eb13478b 100644
--- a/t/jobs/t0014.fio
+++ b/t/jobs/t0014.fio
@@ -17,6 +17,7 @@ flow_id=1
 thread
 log_avg_msec=1000
 write_iops_log=t0014.fio
+time_based
 
 [flow1]
 flow=1
diff --git a/t/jobs/t0023.fio b/t/jobs/t0023.fio
index 0250ee1a..4f0bef89 100644
--- a/t/jobs/t0023.fio
+++ b/t/jobs/t0023.fio
@@ -6,29 +6,26 @@ rw=randtrimwrite
 log_offset=1
 per_job_logs=0
 randrepeat=0
-stonewall
+write_bw_log
 
 # Expected result: 	trim issued to random offset followed by write to same offset
 # 			all offsets touched
 # 			block sizes match
 # Buggy result: 	something else
 [basic]
-write_bw_log
 
 # Expected result: 	trim issued to random offset followed by write to same offset
 # 			all offsets trimmed
 # 			block sizes 8k for both write and trim
 # Buggy result: 	something else
 [bs]
-write_bw_log
-bs=4k,4k,8k
+bs=8k,8k,8k
 
 # Expected result: 	trim issued to random offset followed by write to same offset
 # 			all offsets trimmed
 # 			block sizes match
 # Buggy result: 	something else
 [bsrange]
-write_bw_log
 bsrange=512-4k
 
 # Expected result: 	trim issued to random offset followed by write to same offset
@@ -36,40 +33,31 @@ bsrange=512-4k
 # 			block sizes match
 # Buggy result: 	something else
 [bssplit]
-write_bw_log
 bsrange=512/25:1k:25:2k:25:4k/25
 
 # Expected result: 	trim issued to random offset followed by write to same offset
-# 			all offsets touched
 # 			block sizes match
 # Buggy result: 	something else
 [basic_no_rm]
-write_bw_log
 norandommap=1
 
 # Expected result: 	trim issued to random offset followed by write to same offset
-# 			all offsets trimmed
 # 			block sizes 8k for both write and trim
 # Buggy result: 	something else
 [bs_no_rm]
-write_bw_log
 bs=4k,4k,8k
 norandommap=1
 
 # Expected result: 	trim issued to random offset followed by write to same offset
-# 			all offsets trimmed
 # 			block sizes match
 # Buggy result: 	something else
 [bsrange_no_rm]
-write_bw_log
 bsrange=512-4k
 norandommap=1
 
 # Expected result: 	trim issued to random offset followed by write to same offset
-# 			all offsets trimmed
 # 			block sizes match
 # Buggy result: 	something else
 [bssplit_no_rm]
-write_bw_log
 bsrange=512/25:1k:25:2k:25:4k/25
 norandommap=1
diff --git a/t/jobs/t0024.fio b/t/jobs/t0024.fio
new file mode 100644
index 00000000..393a2b70
--- /dev/null
+++ b/t/jobs/t0024.fio
@@ -0,0 +1,36 @@
+# trimwrite data direction tests
+[global]
+filesize=1M
+ioengine=null
+rw=trimwrite
+log_offset=1
+per_job_logs=0
+randrepeat=0
+write_bw_log
+
+# Expected result: 	trim issued to sequential offsets followed by write to same offset
+# 			all offsets touched
+# 			block sizes match
+# Buggy result: 	something else
+[basic]
+
+# Expected result: 	trim issued to sequential offsets followed by write to same offset
+# 			all offsets trimmed
+# 			block sizes 8k for both write and trim
+# Buggy result: 	something else
+[bs]
+bs=8k,8k,8k
+
+# Expected result: 	trim issued to sequential offsets followed by write to same offset
+# 			all offsets trimmed
+# 			block sizes match
+# Buggy result: 	something else
+[bsrange]
+bsrange=512-4k
+
+# Expected result: 	trim issued to sequential offsets followed by write to same offset
+# 			all offsets trimmed
+# 			block sizes match
+# Buggy result: 	something else
+[bssplit]
+bsrange=512/25:1k:25:2k:25:4k/25
diff --git a/t/run-fio-tests.py b/t/run-fio-tests.py
index a2b036d9..df87ae72 100755
--- a/t/run-fio-tests.py
+++ b/t/run-fio-tests.py
@@ -55,7 +55,7 @@ import multiprocessing
 from pathlib import Path
 
 
-class FioTest(object):
+class FioTest():
     """Base for all fio tests."""
 
     def __init__(self, exe_path, parameters, success):
@@ -286,6 +286,19 @@ class FioJobTest(FioExeTest):
 
         return file_data, success
 
+    def get_file_fail(self, filename):
+        """Safely read a file and fail the test upon error."""
+        file_data = None
+
+        try:
+            with open(filename, "r") as output_file:
+                file_data = output_file.read()
+        except OSError:
+            self.failure_reason += " unable to read file {0}".format(filename)
+            self.passed = False
+
+        return file_data
+
     def check_result(self):
         """Check fio job results."""
 
@@ -302,10 +315,8 @@ class FioJobTest(FioExeTest):
         if 'json' not in self.output_format:
             return
 
-        file_data, success = self.get_file(os.path.join(self.test_dir, self.fio_output))
-        if not success:
-            self.failure_reason = "{0} unable to open output file,".format(self.failure_reason)
-            self.passed = False
+        file_data = self.get_file_fail(os.path.join(self.test_dir, self.fio_output))
+        if not file_data:
             return
 
         #
@@ -427,12 +438,11 @@ class FioJobTest_t0012(FioJobTest):
             return
 
         iops_files = []
-        for i in range(1,4):
-            file_data, success = self.get_file(os.path.join(self.test_dir, "{0}_iops.{1}.log".format(os.path.basename(self.fio_job), i)))
-
-            if not success:
-                self.failure_reason = "{0} unable to open output file,".format(self.failure_reason)
-                self.passed = False
+        for i in range(1, 4):
+            filename = os.path.join(self.test_dir, "{0}_iops.{1}.log".format(os.path.basename(
+                self.fio_job), i))
+            file_data = self.get_file_fail(filename)
+            if not file_data:
                 return
 
             iops_files.append(file_data.splitlines())
@@ -448,17 +458,15 @@ class FioJobTest_t0012(FioJobTest):
 
             ratio1 = iops3/iops2
             ratio2 = iops3/iops1
-            logging.debug(
-                "sample {0}: job1 iops={1} job2 iops={2} job3 iops={3} job3/job2={4:.3f} job3/job1={5:.3f}".format(
-                    i, iops1, iops2, iops3, ratio1, ratio2
-                )
-            )
+            logging.debug("sample {0}: job1 iops={1} job2 iops={2} job3 iops={3} " \
+                "job3/job2={4:.3f} job3/job1={5:.3f}".format(i, iops1, iops2, iops3, ratio1,
+                                                             ratio2))
 
         # test job1 and job2 succeeded to recalibrate
         if ratio1 < 1 or ratio1 > 3 or ratio2 < 7 or ratio2 > 13:
-            self.failure_reason = "{0} iops ratio mismatch iops1={1} iops2={2} iops3={3} expected r1~2 r2~10 got r1={4:.3f} r2={5:.3f},".format(
-                self.failure_reason, iops1, iops2, iops3, ratio1, ratio2
-            )
+            self.failure_reason += " iops ratio mismatch iops1={0} iops2={1} iops3={2} " \
+                "expected r1~2 r2~10 got r1={3:.3f} r2={4:.3f},".format(iops1, iops2, iops3,
+                                                                        ratio1, ratio2)
             self.passed = False
             return
 
@@ -478,12 +486,11 @@ class FioJobTest_t0014(FioJobTest):
             return
 
         iops_files = []
-        for i in range(1,4):
-            file_data, success = self.get_file(os.path.join(self.test_dir, "{0}_iops.{1}.log".format(os.path.basename(self.fio_job), i)))
-
-            if not success:
-                self.failure_reason = "{0} unable to open output file,".format(self.failure_reason)
-                self.passed = False
+        for i in range(1, 4):
+            filename = os.path.join(self.test_dir, "{0}_iops.{1}.log".format(os.path.basename(
+                self.fio_job), i))
+            file_data = self.get_file_fail(filename)
+            if not file_data:
                 return
 
             iops_files.append(file_data.splitlines())
@@ -501,10 +508,9 @@ class FioJobTest_t0014(FioJobTest):
 
 
                 if ratio1 < 0.43 or ratio1 > 0.57 or ratio2 < 0.21 or ratio2 > 0.45:
-                    self.failure_reason = "{0} iops ratio mismatch iops1={1} iops2={2} iops3={3}\
-                                                expected r1~0.5 r2~0.33 got r1={4:.3f} r2={5:.3f},".format(
-                        self.failure_reason, iops1, iops2, iops3, ratio1, ratio2
-                    )
+                    self.failure_reason += " iops ratio mismatch iops1={0} iops2={1} iops3={2} " \
+                                           "expected r1~0.5 r2~0.33 got r1={3:.3f} r2={4:.3f},".format(
+                                               iops1, iops2, iops3, ratio1, ratio2)
                     self.passed = False
 
             iops1 = iops1 + float(iops_files[0][i].split(',')[1])
@@ -512,17 +518,14 @@ class FioJobTest_t0014(FioJobTest):
 
             ratio1 = iops1/iops2
             ratio2 = iops1/iops3
-            logging.debug(
-                "sample {0}: job1 iops={1} job2 iops={2} job3 iops={3} job1/job2={4:.3f} job1/job3={5:.3f}".format(
-                    i, iops1, iops2, iops3, ratio1, ratio2
-                )
-            )
+            logging.debug("sample {0}: job1 iops={1} job2 iops={2} job3 iops={3} " \
+                          "job1/job2={4:.3f} job1/job3={5:.3f}".format(i, iops1, iops2, iops3,
+                                                                       ratio1, ratio2))
 
         # test job1 and job2 succeeded to recalibrate
         if ratio1 < 0.43 or ratio1 > 0.57:
-            self.failure_reason = "{0} iops ratio mismatch iops1={1} iops2={2} expected ratio~0.5 got ratio={3:.3f},".format(
-                self.failure_reason, iops1, iops2, ratio1
-            )
+            self.failure_reason += " iops ratio mismatch iops1={0} iops2={1} expected ratio~0.5 " \
+                                   "got ratio={2:.3f},".format(iops1, iops2, ratio1)
             self.passed = False
             return
 
@@ -556,7 +559,10 @@ class FioJobTest_t0019(FioJobTest):
         super(FioJobTest_t0019, self).check_result()
 
         bw_log_filename = os.path.join(self.test_dir, "test_bw.log")
-        file_data, success = self.get_file(bw_log_filename)
+        file_data = self.get_file_fail(bw_log_filename)
+        if not file_data:
+            return
+
         log_lines = file_data.split('\n')
 
         prev = -4096
@@ -583,7 +589,10 @@ class FioJobTest_t0020(FioJobTest):
         super(FioJobTest_t0020, self).check_result()
 
         bw_log_filename = os.path.join(self.test_dir, "test_bw.log")
-        file_data, success = self.get_file(bw_log_filename)
+        file_data = self.get_file_fail(bw_log_filename)
+        if not file_data:
+            return
+
         log_lines = file_data.split('\n')
 
         seq_count = 0
@@ -621,7 +630,10 @@ class FioJobTest_t0022(FioJobTest):
         super(FioJobTest_t0022, self).check_result()
 
         bw_log_filename = os.path.join(self.test_dir, "test_bw.log")
-        file_data, success = self.get_file(bw_log_filename)
+        file_data = self.get_file_fail(bw_log_filename)
+        if not file_data:
+            return
+
         log_lines = file_data.split('\n')
 
         filesize = 1024*1024
@@ -646,15 +658,20 @@ class FioJobTest_t0022(FioJobTest):
 
         if len(offsets) == filesize/bs:
             self.passed = False
-            self.failure_reason += " no duplicate offsets found with norandommap=1".format(len(offsets))
+            self.failure_reason += " no duplicate offsets found with norandommap=1"
 
 
 class FioJobTest_t0023(FioJobTest):
-    """Test consists of fio test job t0023"""
+    """Test consists of fio test job t0023 randtrimwrite test."""
+
+    def check_trimwrite(self, filename):
+        """Make sure that trims are followed by writes of the same size at the same offset."""
 
-    def check_seq(self, filename):
         bw_log_filename = os.path.join(self.test_dir, filename)
-        file_data, success = self.get_file(bw_log_filename)
+        file_data = self.get_file_fail(bw_log_filename)
+        if not file_data:
+            return
+
         log_lines = file_data.split('\n')
 
         prev_ddir = 1
@@ -668,40 +685,107 @@ class FioJobTest_t0023(FioJobTest):
             if prev_ddir == 1:
                 if ddir != 2:
                     self.passed = False
-                    self.failure_reason += " {0}: write not preceeded by trim: {1}".format(bw_log_filename, line)
+                    self.failure_reason += " {0}: write not preceeded by trim: {1}".format(
+                        bw_log_filename, line)
                     break
             else:
                 if ddir != 1:
                     self.passed = False
-                    self.failure_reason += " {0}: trim not preceeded by write: {1}".format(bw_log_filename, line)
+                    self.failure_reason += " {0}: trim not preceeded by write: {1}".format(
+                        bw_log_filename, line)
                     break
                 else:
                     if prev_bs != bs:
                         self.passed = False
-                        self.failure_reason += " {0}: block size does not match: {1}".format(bw_log_filename, line)
+                        self.failure_reason += " {0}: block size does not match: {1}".format(
+                            bw_log_filename, line)
                         break
                     if prev_offset != offset:
                         self.passed = False
-                        self.failure_reason += " {0}: offset does not match: {1}".format(bw_log_filename, line)
+                        self.failure_reason += " {0}: offset does not match: {1}".format(
+                            bw_log_filename, line)
                         break
             prev_ddir = ddir
             prev_bs = bs
             prev_offset = offset
 
 
+    def check_all_offsets(self, filename, sectorsize, filesize):
+        """Make sure all offsets were touched."""
+
+        file_data = self.get_file_fail(os.path.join(self.test_dir, filename))
+        if not file_data:
+            return
+
+        log_lines = file_data.split('\n')
+
+        offsets = set()
+
+        for line in log_lines:
+            if len(line.strip()) == 0:
+                continue
+            vals = line.split(',')
+            bs = int(vals[3])
+            offset = int(vals[4])
+            if offset % sectorsize != 0:
+                self.passed = False
+                self.failure_reason += " {0}: offset {1} not a multiple of sector size {2}".format(
+                    filename, offset, sectorsize)
+                break
+            if bs % sectorsize != 0:
+                self.passed = False
+                self.failure_reason += " {0}: block size {1} not a multiple of sector size " \
+                    "{2}".format(filename, bs, sectorsize)
+                break
+            for i in range(int(bs/sectorsize)):
+                offsets.add(offset/sectorsize + i)
+
+        if len(offsets) != filesize/sectorsize:
+            self.passed = False
+            self.failure_reason += " {0}: only {1} offsets touched; expected {2}".format(
+                filename, len(offsets), filesize/sectorsize)
+        else:
+            logging.debug("%s: %d sectors touched", filename, len(offsets))
+
+
+    def check_result(self):
+        super(FioJobTest_t0023, self).check_result()
+
+        filesize = 1024*1024
+
+        self.check_trimwrite("basic_bw.log")
+        self.check_trimwrite("bs_bw.log")
+        self.check_trimwrite("bsrange_bw.log")
+        self.check_trimwrite("bssplit_bw.log")
+        self.check_trimwrite("basic_no_rm_bw.log")
+        self.check_trimwrite("bs_no_rm_bw.log")
+        self.check_trimwrite("bsrange_no_rm_bw.log")
+        self.check_trimwrite("bssplit_no_rm_bw.log")
+
+        self.check_all_offsets("basic_bw.log", 4096, filesize)
+        self.check_all_offsets("bs_bw.log", 8192, filesize)
+        self.check_all_offsets("bsrange_bw.log", 512, filesize)
+        self.check_all_offsets("bssplit_bw.log", 512, filesize)
+
+
+class FioJobTest_t0024(FioJobTest_t0023):
+    """Test consists of fio test job t0024 trimwrite test."""
+
     def check_result(self):
+        # call FioJobTest_t0023's parent to skip checks done by t0023
         super(FioJobTest_t0023, self).check_result()
 
-        self.check_seq("basic_bw.log")
-        self.check_seq("bs_bw.log")
-        self.check_seq("bsrange_bw.log")
-        self.check_seq("bssplit_bw.log")
-        self.check_seq("basic_no_rm_bw.log")
-        self.check_seq("bs_no_rm_bw.log")
-        self.check_seq("bsrange_no_rm_bw.log")
-        self.check_seq("bssplit_no_rm_bw.log")
+        filesize = 1024*1024
 
-        # TODO make sure all offsets were touched
+        self.check_trimwrite("basic_bw.log")
+        self.check_trimwrite("bs_bw.log")
+        self.check_trimwrite("bsrange_bw.log")
+        self.check_trimwrite("bssplit_bw.log")
+
+        self.check_all_offsets("basic_bw.log", 4096, filesize)
+        self.check_all_offsets("bs_bw.log", 8192, filesize)
+        self.check_all_offsets("bsrange_bw.log", 512, filesize)
+        self.check_all_offsets("bssplit_bw.log", 512, filesize)
 
 
 class FioJobTest_iops_rate(FioJobTest):
@@ -732,7 +816,7 @@ class FioJobTest_iops_rate(FioJobTest):
             self.passed = False
 
 
-class Requirements(object):
+class Requirements():
     """Requirements consists of multiple run environment characteristics.
     These are to determine if a particular test can be run"""
 
@@ -1090,6 +1174,15 @@ TEST_LIST = [
         'pre_success':      None,
         'requirements':     [],
     },
+    {
+        'test_id':          24,
+        'test_class':       FioJobTest_t0024,
+        'job':              't0024.fio',
+        'success':          SUCCESS_DEFAULT,
+        'pre_job':          None,
+        'pre_success':      None,
+        'requirements':     [],
+    },
     {
         'test_id':          1000,
         'test_class':       FioExeTest,

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-10-05 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-10-05 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 793b868671d14f9a3e4fa76ac129545987084a8d:

  randtrimwrite: fix corner case with variable block sizes (2022-10-03 17:36:57 -0400)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 0474b83f022f1f1cc14208c05b7ccda682e01263:

  Merge branch 'master' of https://github.com/bvanassche/fio (2022-10-04 14:25:09 -0600)

----------------------------------------------------------------
Bart Van Assche (2):
      Android: Fix the build of the 'sg' engine
      Android: Enable the 'sg' engine

Jens Axboe (1):
      Merge branch 'master' of https://github.com/bvanassche/fio

 Makefile     | 3 ++-
 engines/sg.c | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

---

Diff of recent changes:

diff --git a/Makefile b/Makefile
index f947f11c..7bd572d7 100644
--- a/Makefile
+++ b/Makefile
@@ -249,7 +249,8 @@ endif
 endif
 ifeq ($(CONFIG_TARGET_OS), Android)
   SOURCE += diskutil.c fifo.c blktrace.c cgroup.c trim.c profiles/tiobench.c \
-		oslib/linux-dev-lookup.c engines/io_uring.c engines/nvme.c
+		oslib/linux-dev-lookup.c engines/io_uring.c engines/nvme.c \
+		engines/sg.c
   cmdprio_SRCS = engines/cmdprio.c
 ifdef CONFIG_HAS_BLKZONED
   SOURCE += oslib/linux-blkzoned.c
diff --git a/engines/sg.c b/engines/sg.c
index 72ee07ba..24783374 100644
--- a/engines/sg.c
+++ b/engines/sg.c
@@ -1331,10 +1331,12 @@ static char *fio_sgio_errdetails(struct io_u *io_u)
 			strlcat(msg, ". ", MAXERRDETAIL);
 		}
 		if (hdr->sb_len_wr) {
+			const uint8_t *const sbp = hdr->sbp;
+
 			snprintf(msgchunk, MAXMSGCHUNK, "Sense Data (%d bytes):", hdr->sb_len_wr);
 			strlcat(msg, msgchunk, MAXERRDETAIL);
 			for (i = 0; i < hdr->sb_len_wr; i++) {
-				snprintf(msgchunk, MAXMSGCHUNK, " %02x", hdr->sbp[i]);
+				snprintf(msgchunk, MAXMSGCHUNK, " %02x", sbp[i]);
 				strlcat(msg, msgchunk, MAXERRDETAIL);
 			}
 			strlcat(msg, ". ", MAXERRDETAIL);

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-10-04 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-10-04 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit c16dc793a3c45780f67ce65244b6e91323dee014:

  Add randtrimwrite data direction (2022-09-28 10:06:40 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 793b868671d14f9a3e4fa76ac129545987084a8d:

  randtrimwrite: fix corner case with variable block sizes (2022-10-03 17:36:57 -0400)

----------------------------------------------------------------
Anuj Gupta (1):
      engines/io_uring: add fixedbufs support for io_uring_cmd

Vincent Fu (4):
      randtrimwrite: write at same offset as trim
      test: test job for randtrimwrite
      randtrimwrite: fix offsets for corner case
      randtrimwrite: fix corner case with variable block sizes

 engines/io_uring.c |  4 +++
 io_ddir.h          |  2 ++
 io_u.c             | 34 +++++++++++++++++++++++--
 t/jobs/t0023.fio   | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 t/run-fio-tests.py | 64 ++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 177 insertions(+), 2 deletions(-)
 create mode 100644 t/jobs/t0023.fio

---

Diff of recent changes:

diff --git a/engines/io_uring.c b/engines/io_uring.c
index d0fc61dc..c679177f 100644
--- a/engines/io_uring.c
+++ b/engines/io_uring.c
@@ -433,6 +433,10 @@ static int fio_ioring_cmd_prep(struct thread_data *td, struct io_u *io_u)
 		ld->prepped = 0;
 		sqe->flags |= IOSQE_ASYNC;
 	}
+	if (o->fixedbufs) {
+		sqe->uring_cmd_flags = IORING_URING_CMD_FIXED;
+		sqe->buf_index = io_u->index;
+	}
 
 	cmd = (struct nvme_uring_cmd *)sqe->cmd;
 	return fio_nvme_uring_cmd_prep(cmd, io_u,
diff --git a/io_ddir.h b/io_ddir.h
index 7227e9ee..217eb628 100644
--- a/io_ddir.h
+++ b/io_ddir.h
@@ -52,6 +52,8 @@ enum td_ddir {
 #define file_randommap(td, f)	(!(td)->o.norandommap && fio_file_axmap((f)))
 #define td_trimwrite(td)	(((td)->o.td_ddir & TD_DDIR_TRIMWRITE) \
 					== TD_DDIR_TRIMWRITE)
+#define td_randtrimwrite(td)	(((td)->o.td_ddir & TD_DDIR_RANDTRIMWRITE) \
+					== TD_DDIR_RANDTRIMWRITE)
 
 static inline int ddir_sync(enum fio_ddir ddir)
 {
diff --git a/io_u.c b/io_u.c
index eec378dd..91f1a358 100644
--- a/io_u.c
+++ b/io_u.c
@@ -417,7 +417,13 @@ static int get_next_block(struct thread_data *td, struct io_u *io_u,
 
 	b = offset = -1ULL;
 
-	if (rw_seq) {
+	if (td_randtrimwrite(td) && ddir == DDIR_WRITE) {
+		/* don't mark randommap for these writes */
+		io_u_set(td, io_u, IO_U_F_BUSY_OK);
+		offset = f->last_start[DDIR_TRIM];
+		*is_random = true;
+		ret = 0;
+	} else if (rw_seq) {
 		if (td_random(td)) {
 			if (should_do_random(td, ddir)) {
 				ret = get_next_rand_block(td, f, ddir, &b);
@@ -507,6 +513,24 @@ static int get_next_offset(struct thread_data *td, struct io_u *io_u,
 		return 1;
 	}
 
+	/*
+	 * For randtrimwrite, we decide whether to issue a trim or a write
+	 * based on whether the offsets for the most recent trim and write
+	 * operations match. If they don't match that means we just issued a
+	 * new trim and the next operation should be a write. If they *do*
+	 * match that means we just completed a trim+write pair and the next
+	 * command should be a trim.
+	 *
+	 * This works fine for sequential workloads but for random workloads
+	 * it's possible to complete a trim+write pair and then have the next
+	 * randomly generated offset match the previous offset. If that happens
+	 * we need to alter the offset for the last write operation in order
+	 * to ensure that we issue a write operation the next time through.
+	 */
+	if (td_randtrimwrite(td) && ddir == DDIR_TRIM &&
+	    f->last_start[DDIR_TRIM] == io_u->offset)
+		f->last_start[DDIR_WRITE]--;
+
 	io_u->verify_offset = io_u->offset;
 	return 0;
 }
@@ -530,6 +554,12 @@ static unsigned long long get_next_buflen(struct thread_data *td, struct io_u *i
 
 	assert(ddir_rw(ddir));
 
+	if (td_randtrimwrite(td) && ddir == DDIR_WRITE) {
+		struct fio_file *f = io_u->file;
+
+		return f->last_pos[DDIR_TRIM] - f->last_start[DDIR_TRIM];
+	}
+
 	if (td->o.bs_is_seq_rand)
 		ddir = is_random ? DDIR_WRITE : DDIR_READ;
 
@@ -768,7 +798,7 @@ static void set_rw_ddir(struct thread_data *td, struct io_u *io_u)
 
 	if (td_trimwrite(td)) {
 		struct fio_file *f = io_u->file;
-		if (f->last_pos[DDIR_WRITE] == f->last_pos[DDIR_TRIM])
+		if (f->last_start[DDIR_WRITE] == f->last_start[DDIR_TRIM])
 			ddir = DDIR_TRIM;
 		else
 			ddir = DDIR_WRITE;
diff --git a/t/jobs/t0023.fio b/t/jobs/t0023.fio
new file mode 100644
index 00000000..0250ee1a
--- /dev/null
+++ b/t/jobs/t0023.fio
@@ -0,0 +1,75 @@
+# randtrimwrite data direction tests
+[global]
+filesize=1M
+ioengine=null
+rw=randtrimwrite
+log_offset=1
+per_job_logs=0
+randrepeat=0
+stonewall
+
+# Expected result: 	trim issued to random offset followed by write to same offset
+# 			all offsets touched
+# 			block sizes match
+# Buggy result: 	something else
+[basic]
+write_bw_log
+
+# Expected result: 	trim issued to random offset followed by write to same offset
+# 			all offsets trimmed
+# 			block sizes 8k for both write and trim
+# Buggy result: 	something else
+[bs]
+write_bw_log
+bs=4k,4k,8k
+
+# Expected result: 	trim issued to random offset followed by write to same offset
+# 			all offsets trimmed
+# 			block sizes match
+# Buggy result: 	something else
+[bsrange]
+write_bw_log
+bsrange=512-4k
+
+# Expected result: 	trim issued to random offset followed by write to same offset
+# 			all offsets trimmed
+# 			block sizes match
+# Buggy result: 	something else
+[bssplit]
+write_bw_log
+bsrange=512/25:1k:25:2k:25:4k/25
+
+# Expected result: 	trim issued to random offset followed by write to same offset
+# 			all offsets touched
+# 			block sizes match
+# Buggy result: 	something else
+[basic_no_rm]
+write_bw_log
+norandommap=1
+
+# Expected result: 	trim issued to random offset followed by write to same offset
+# 			all offsets trimmed
+# 			block sizes 8k for both write and trim
+# Buggy result: 	something else
+[bs_no_rm]
+write_bw_log
+bs=4k,4k,8k
+norandommap=1
+
+# Expected result: 	trim issued to random offset followed by write to same offset
+# 			all offsets trimmed
+# 			block sizes match
+# Buggy result: 	something else
+[bsrange_no_rm]
+write_bw_log
+bsrange=512-4k
+norandommap=1
+
+# Expected result: 	trim issued to random offset followed by write to same offset
+# 			all offsets trimmed
+# 			block sizes match
+# Buggy result: 	something else
+[bssplit_no_rm]
+write_bw_log
+bsrange=512/25:1k:25:2k:25:4k/25
+norandommap=1
diff --git a/t/run-fio-tests.py b/t/run-fio-tests.py
index e72fa2a0..a2b036d9 100755
--- a/t/run-fio-tests.py
+++ b/t/run-fio-tests.py
@@ -649,6 +649,61 @@ class FioJobTest_t0022(FioJobTest):
             self.failure_reason += " no duplicate offsets found with norandommap=1".format(len(offsets))
 
 
+class FioJobTest_t0023(FioJobTest):
+    """Test consists of fio test job t0023"""
+
+    def check_seq(self, filename):
+        bw_log_filename = os.path.join(self.test_dir, filename)
+        file_data, success = self.get_file(bw_log_filename)
+        log_lines = file_data.split('\n')
+
+        prev_ddir = 1
+        for line in log_lines:
+            if len(line.strip()) == 0:
+                continue
+            vals = line.split(',')
+            ddir = int(vals[2])
+            bs = int(vals[3])
+            offset = int(vals[4])
+            if prev_ddir == 1:
+                if ddir != 2:
+                    self.passed = False
+                    self.failure_reason += " {0}: write not preceeded by trim: {1}".format(bw_log_filename, line)
+                    break
+            else:
+                if ddir != 1:
+                    self.passed = False
+                    self.failure_reason += " {0}: trim not preceeded by write: {1}".format(bw_log_filename, line)
+                    break
+                else:
+                    if prev_bs != bs:
+                        self.passed = False
+                        self.failure_reason += " {0}: block size does not match: {1}".format(bw_log_filename, line)
+                        break
+                    if prev_offset != offset:
+                        self.passed = False
+                        self.failure_reason += " {0}: offset does not match: {1}".format(bw_log_filename, line)
+                        break
+            prev_ddir = ddir
+            prev_bs = bs
+            prev_offset = offset
+
+
+    def check_result(self):
+        super(FioJobTest_t0023, self).check_result()
+
+        self.check_seq("basic_bw.log")
+        self.check_seq("bs_bw.log")
+        self.check_seq("bsrange_bw.log")
+        self.check_seq("bssplit_bw.log")
+        self.check_seq("basic_no_rm_bw.log")
+        self.check_seq("bs_no_rm_bw.log")
+        self.check_seq("bsrange_no_rm_bw.log")
+        self.check_seq("bssplit_no_rm_bw.log")
+
+        # TODO make sure all offsets were touched
+
+
 class FioJobTest_iops_rate(FioJobTest):
     """Test consists of fio test job t0009
     Confirm that job0 iops == 1000
@@ -1026,6 +1081,15 @@ TEST_LIST = [
         'pre_success':      None,
         'requirements':     [],
     },
+    {
+        'test_id':          23,
+        'test_class':       FioJobTest_t0023,
+        'job':              't0023.fio',
+        'success':          SUCCESS_DEFAULT,
+        'pre_job':          None,
+        'pre_success':      None,
+        'requirements':     [],
+    },
     {
         'test_id':          1000,
         'test_class':       FioExeTest,

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-09-29 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-09-29 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 6112c0f5a86c6b437e7158ab40a6e9384ce95e85:

  doc: build manpage from fio_doc.rst instead of fio_man.rst (2022-09-27 11:58:25 -0400)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to c16dc793a3c45780f67ce65244b6e91323dee014:

  Add randtrimwrite data direction (2022-09-28 10:06:40 -0600)

----------------------------------------------------------------
Jens Axboe (2):
      t/io_uring: get rid of useless read barriers
      Add randtrimwrite data direction

 HOWTO.rst    |  3 +++
 fio.1        |  5 +++++
 io_ddir.h    |  4 +++-
 options.c    |  4 ++++
 t/io_uring.c | 10 ++++++----
 5 files changed, 21 insertions(+), 5 deletions(-)

---

Diff of recent changes:

diff --git a/HOWTO.rst b/HOWTO.rst
index 924f5ed9..e89d05f0 100644
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -1134,6 +1134,9 @@ I/O type
 				write 64K bytes on the same trimmed blocks. This behaviour
 				will be consistent with ``number_ios`` or other Fio options
 				limiting the total bytes or number of I/O's.
+		**randtrimwrite**
+				Like trimwrite, but uses random offsets rather
+				than sequential writes.
 
 	Fio defaults to read if the option is not specified.  For the mixed I/O
 	types, the default is to split them 50/50.  For certain types of I/O the
diff --git a/fio.1 b/fio.1
index 39d6b4f4..4324a975 100644
--- a/fio.1
+++ b/fio.1
@@ -904,6 +904,11 @@ then the same blocks will be written to. So if `io_size=64K' is specified,
 Fio will trim a total of 64K bytes and also write 64K bytes on the same
 trimmed blocks. This behaviour will be consistent with `number_ios' or
 other Fio options limiting the total bytes or number of I/O's.
+.TP
+.B randtrimwrite
+Like
+.B trimwrite ,
+but uses random offsets rather than sequential writes.
 .RE
 .P
 Fio defaults to read if the option is not specified. For the mixed I/O
diff --git a/io_ddir.h b/io_ddir.h
index 296a9d04..7227e9ee 100644
--- a/io_ddir.h
+++ b/io_ddir.h
@@ -41,6 +41,7 @@ enum td_ddir {
 	TD_DDIR_RANDRW		= TD_DDIR_RW | TD_DDIR_RAND,
 	TD_DDIR_RANDTRIM	= TD_DDIR_TRIM | TD_DDIR_RAND,
 	TD_DDIR_TRIMWRITE	= TD_DDIR_TRIM | TD_DDIR_WRITE,
+	TD_DDIR_RANDTRIMWRITE	= TD_DDIR_RANDTRIM | TD_DDIR_WRITE,
 };
 
 #define td_read(td)		((td)->o.td_ddir & TD_DDIR_READ)
@@ -67,7 +68,8 @@ static inline const char *ddir_str(enum td_ddir ddir)
 {
 	static const char *__str[] = { NULL, "read", "write", "rw", "rand",
 				"randread", "randwrite", "randrw",
-				"trim", NULL, "trimwrite", NULL, "randtrim" };
+				"trim", NULL, "trimwrite", NULL, "randtrim",
+				NULL, "randtrimwrite" };
 
 	return __str[ddir];
 }
diff --git a/options.c b/options.c
index 5d3daedf..a668b0e4 100644
--- a/options.c
+++ b/options.c
@@ -1947,6 +1947,10 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
 			    .oval = TD_DDIR_TRIMWRITE,
 			    .help = "Trim and write mix, trims preceding writes"
 			  },
+			  { .ival = "randtrimwrite",
+			    .oval = TD_DDIR_RANDTRIMWRITE,
+			    .help = "Randomly trim and write mix, trims preceding writes"
+			  },
 		},
 	},
 	{
diff --git a/t/io_uring.c b/t/io_uring.c
index b9353ac8..edbacee3 100644
--- a/t/io_uring.c
+++ b/t/io_uring.c
@@ -661,8 +661,12 @@ static void init_io_pt(struct submitter *s, unsigned index)
 static int prep_more_ios_uring(struct submitter *s, int max_ios)
 {
 	struct io_sq_ring *ring = &s->sq_ring;
-	unsigned index, tail, next_tail, prepped = 0;
-	unsigned int head = atomic_load_acquire(ring->head);
+	unsigned head, index, tail, next_tail, prepped = 0;
+
+	if (sq_thread_poll)
+		head = atomic_load_acquire(ring->head);
+	else
+		head = *ring->head;
 
 	next_tail = tail = *ring->tail;
 	do {
@@ -741,7 +745,6 @@ static int reap_events_uring(struct submitter *s)
 	do {
 		struct file *f;
 
-		read_barrier();
 		if (head == atomic_load_acquire(ring->tail))
 			break;
 		cqe = &ring->cqes[head & cq_ring_mask];
@@ -796,7 +799,6 @@ static int reap_events_uring_pt(struct submitter *s)
 	do {
 		struct file *f;
 
-		read_barrier();
 		if (head == atomic_load_acquire(ring->tail))
 			break;
 		index = head & cq_ring_mask;

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-09-23 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-09-23 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit d14687025c0c61d047e4252036d1b024d62cb0a6:

  configure: change grep -P to grep -E (2022-09-19 09:42:14 -0400)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 0574e8c3b2b47e1e2564c2f50ea0b6f2629f2e48:

  arm64: ensure CPU clock retrieval issues isb() (2022-09-22 10:03:51 -0600)

----------------------------------------------------------------
Jens Axboe (1):
      arm64: ensure CPU clock retrieval issues isb()

 arch/arch-aarch64.h | 3 +++
 1 file changed, 3 insertions(+)

---

Diff of recent changes:

diff --git a/arch/arch-aarch64.h b/arch/arch-aarch64.h
index 951d1718..919e5796 100644
--- a/arch/arch-aarch64.h
+++ b/arch/arch-aarch64.h
@@ -27,10 +27,13 @@ static inline int arch_ffz(unsigned long bitmask)
 
 #define ARCH_HAVE_FFZ
 
+#define isb()	asm volatile("isb" : : : "memory")
+
 static inline unsigned long long get_cpu_clock(void)
 {
 	unsigned long val;
 
+	isb();
 	asm volatile("mrs %0, cntvct_el0" : "=r" (val));
 	return val;
 }

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-09-20 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-09-20 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 5932cf0f2a03396b5f3f0b4667f5e66f7d8477e5:

  Merge branch 'fix-example-disk-zone-profile' of github.com:cvubrugier/fio (2022-09-15 11:02:49 -0400)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to d14687025c0c61d047e4252036d1b024d62cb0a6:

  configure: change grep -P to grep -E (2022-09-19 09:42:14 -0400)

----------------------------------------------------------------
Vincent Fu (2):
      gettime: cleanups
      configure: change grep -P to grep -E

 configure | 2 +-
 gettime.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

---

Diff of recent changes:

diff --git a/configure b/configure
index 7741ef4f..546541a2 100755
--- a/configure
+++ b/configure
@@ -117,7 +117,7 @@ has() {
 }
 
 num() {
-  echo "$1" | grep -P -q "^[0-9]+$"
+  echo "$1" | grep -E -q "^[0-9]+$"
 }
 
 check_define() {
diff --git a/gettime.c b/gettime.c
index 14462420..8993be16 100644
--- a/gettime.c
+++ b/gettime.c
@@ -313,7 +313,7 @@ static int calibrate_cpu_clock(void)
 
 	max_ticks = MAX_CLOCK_SEC * cycles_per_msec * 1000ULL;
 	max_mult = ULLONG_MAX / max_ticks;
-	dprint(FD_TIME, "\n\nmax_ticks=%llu, __builtin_clzll=%d, "
+	dprint(FD_TIME, "max_ticks=%llu, __builtin_clzll=%d, "
 			"max_mult=%llu\n", max_ticks,
 			__builtin_clzll(max_ticks), max_mult);
 
@@ -335,7 +335,7 @@ static int calibrate_cpu_clock(void)
 
 	/*
 	 * Find the greatest power of 2 clock ticks that is less than the
-	 * ticks in MAX_CLOCK_SEC_2STAGE
+	 * ticks in MAX_CLOCK_SEC
 	 */
 	max_cycles_shift = max_cycles_mask = 0;
 	tmp = MAX_CLOCK_SEC * 1000ULL * cycles_per_msec;

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-09-16 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-09-16 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 08996af41b2566565cbcdee71766030a2c8ba377:

  backend: number of ios not as expected for trimwrite (2022-09-13 15:03:21 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 5932cf0f2a03396b5f3f0b4667f5e66f7d8477e5:

  Merge branch 'fix-example-disk-zone-profile' of github.com:cvubrugier/fio (2022-09-15 11:02:49 -0400)

----------------------------------------------------------------
Christophe Vu-Brugier (2):
      examples: set zonemode to strided in disk-zone-profile.fio
      examples: fix bandwidth logs generation in disk-zone-profile.fio

Vincent Fu (2):
      Merge branch 'master' of github.com:uniontech-lilinjie/fio
      Merge branch 'fix-example-disk-zone-profile' of github.com:cvubrugier/fio

lilinjie (1):
      fix spelling error

 examples/disk-zone-profile.fio | 9 ++++++---
 fio.1                          | 4 ++--
 2 files changed, 8 insertions(+), 5 deletions(-)

---

Diff of recent changes:

diff --git a/examples/disk-zone-profile.fio b/examples/disk-zone-profile.fio
index 96e56695..577820eb 100644
--- a/examples/disk-zone-profile.fio
+++ b/examples/disk-zone-profile.fio
@@ -1,4 +1,4 @@
-; Read disk in zones of 128m/2g, generating a plot of that afterwards
+; Read disk in zones of 256m/2g. Generating a plot of that afterwards
 ; should give a nice picture of the zoning of this drive
 
 [global]
@@ -7,8 +7,11 @@ direct=1
 rw=read
 ioengine=libaio
 iodepth=2
+zonemode=strided
 zonesize=256m
 zoneskip=2g
-write_bw_log
 
-[/dev/sdb]
+[disk-zone-profile]
+filename=/dev/sdb
+write_bw_log
+log_offset=1
diff --git a/fio.1 b/fio.1
index c67bd464..39d6b4f4 100644
--- a/fio.1
+++ b/fio.1
@@ -2491,11 +2491,11 @@ Specify the label or UUID of the DAOS pool to connect to.
 Specify the label or UUID of the DAOS container to open.
 .TP
 .BI (dfs)chunk_size
-Specificy a different chunk size (in bytes) for the dfs file.
+Specify a different chunk size (in bytes) for the dfs file.
 Use DAOS container's chunk size by default.
 .TP
 .BI (dfs)object_class
-Specificy a different object class for the dfs file.
+Specify a different object class for the dfs file.
 Use DAOS container's object class by default.
 .TP
 .BI (nfs)nfs_url

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-09-14 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-09-14 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 53c82bb879532b994451c6abc7be80c94241d03b:

  stat: fix comment about memory consumption (2022-09-12 10:45:56 -0400)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 08996af41b2566565cbcdee71766030a2c8ba377:

  backend: number of ios not as expected for trimwrite (2022-09-13 15:03:21 -0600)

----------------------------------------------------------------
Ankit Kumar (1):
      backend: number of ios not as expected for trimwrite

 HOWTO.rst | 6 +++++-
 backend.c | 6 ++++--
 fio.1     | 5 ++++-
 3 files changed, 13 insertions(+), 4 deletions(-)

---

Diff of recent changes:

diff --git a/HOWTO.rst b/HOWTO.rst
index 2c6c6dbe..924f5ed9 100644
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -1129,7 +1129,11 @@ I/O type
 				Random mixed reads and writes.
 		**trimwrite**
 				Sequential trim+write sequences. Blocks will be trimmed first,
-				then the same blocks will be written to.
+				then the same blocks will be written to. So if ``io_size=64K``
+				is specified, Fio will trim a total of 64K bytes and also
+				write 64K bytes on the same trimmed blocks. This behaviour
+				will be consistent with ``number_ios`` or other Fio options
+				limiting the total bytes or number of I/O's.
 
 	Fio defaults to read if the option is not specified.  For the mixed I/O
 	types, the default is to split them 50/50.  For certain types of I/O the
diff --git a/backend.c b/backend.c
index fe614f6e..ec535bcc 100644
--- a/backend.c
+++ b/backend.c
@@ -971,9 +971,11 @@ static void do_io(struct thread_data *td, uint64_t *bytes_done)
 		total_bytes += td->o.size;
 
 	/* In trimwrite mode, each byte is trimmed and then written, so
-	 * allow total_bytes to be twice as big */
-	if (td_trimwrite(td))
+	 * allow total_bytes or number of ios to be twice as big */
+	if (td_trimwrite(td)) {
 		total_bytes += td->total_io_size;
+		td->o.number_ios *= 2;
+	}
 
 	while ((td->o.read_iolog_file && !flist_empty(&td->io_log_list)) ||
 		(!flist_empty(&td->trim_list)) || !io_issue_bytes_exceeded(td) ||
diff --git a/fio.1 b/fio.1
index 67d7c710..c67bd464 100644
--- a/fio.1
+++ b/fio.1
@@ -900,7 +900,10 @@ Random mixed reads and writes.
 .TP
 .B trimwrite
 Sequential trim+write sequences. Blocks will be trimmed first,
-then the same blocks will be written to.
+then the same blocks will be written to. So if `io_size=64K' is specified,
+Fio will trim a total of 64K bytes and also write 64K bytes on the same
+trimmed blocks. This behaviour will be consistent with `number_ios' or
+other Fio options limiting the total bytes or number of I/O's.
 .RE
 .P
 Fio defaults to read if the option is not specified. For the mixed I/O

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-09-13 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-09-13 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 10fc06dc4166ef7c69a6c06cb3a318878048f6be:

  Merge branch 'rpma-add-support-for-libpmem2-to-the-librpma-engine' of https://github.com/ldorau/fio (2022-09-06 06:58:48 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 53c82bb879532b994451c6abc7be80c94241d03b:

  stat: fix comment about memory consumption (2022-09-12 10:45:56 -0400)

----------------------------------------------------------------
Vincent Fu (1):
      stat: fix comment about memory consumption

 stat.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

---

Diff of recent changes:

diff --git a/stat.h b/stat.h
index eb7845af..4c3bf71f 100644
--- a/stat.h
+++ b/stat.h
@@ -51,7 +51,7 @@ struct group_run_stats {
  *
  * FIO_IO_U_PLAT_GROUP_NR and FIO_IO_U_PLAT_BITS determine the memory
  * requirement of storing those aggregate counts. The memory used will
- * be (FIO_IO_U_PLAT_GROUP_NR * 2^FIO_IO_U_PLAT_BITS) * sizeof(int)
+ * be (FIO_IO_U_PLAT_GROUP_NR * 2^FIO_IO_U_PLAT_BITS) * sizeof(uint64_t)
  * bytes.
  *
  * FIO_IO_U_PLAT_NR is the total number of buckets.

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-09-07 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-09-07 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 021ce718f5ae4bfd5f4e42290993578adb7c7bd5:

  t/io_uring: enable support for registered buffers for passthrough (2022-09-03 11:04:06 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 10fc06dc4166ef7c69a6c06cb3a318878048f6be:

  Merge branch 'rpma-add-support-for-libpmem2-to-the-librpma-engine' of https://github.com/ldorau/fio (2022-09-06 06:58:48 -0600)

----------------------------------------------------------------
Jens Axboe (2):
      Merge branch 'ci-build-the-librpma-fio-engine' of https://github.com/ldorau/fio
      Merge branch 'rpma-add-support-for-libpmem2-to-the-librpma-engine' of https://github.com/ldorau/fio

Kacper Stefanski (2):
      rpma: add support for libpmem2 to librpma engine in APM mode
      rpma: add support for libpmem2 to librpma engine in GPSPM mode

Lukasz Dorau (3):
      rpma: simplify server_cmpl_process()
      ci: build the librpma fio engine
      ci: remove the unused travis-install-pmdk.sh file

 Makefile                                           | 12 ++-
 ...stall-librpma.sh => actions-install-librpma.sh} |  3 +-
 ci/actions-install.sh                              |  6 ++
 ci/travis-install-pmdk.sh                          | 29 -------
 configure                                          | 29 ++++++-
 engines/librpma_fio.c                              | 52 ++++---------
 engines/librpma_fio.h                              |  7 +-
 engines/librpma_fio_pmem.h                         | 67 ++++++++++++++++
 engines/librpma_fio_pmem2.h                        | 91 ++++++++++++++++++++++
 engines/librpma_gpspm.c                            | 59 ++++++++------
 10 files changed, 257 insertions(+), 98 deletions(-)
 rename ci/{travis-install-librpma.sh => actions-install-librpma.sh} (74%)
 delete mode 100755 ci/travis-install-pmdk.sh
 create mode 100644 engines/librpma_fio_pmem.h
 create mode 100644 engines/librpma_fio_pmem2.h

---

Diff of recent changes:

diff --git a/Makefile b/Makefile
index 634d2c93..f947f11c 100644
--- a/Makefile
+++ b/Makefile
@@ -111,13 +111,21 @@ endif
 ifdef CONFIG_LIBRPMA_APM
   librpma_apm_SRCS = engines/librpma_apm.c
   librpma_fio_SRCS = engines/librpma_fio.c
-  librpma_apm_LIBS = -lrpma -lpmem
+  ifdef CONFIG_LIBPMEM2_INSTALLED
+    librpma_apm_LIBS = -lrpma -lpmem2
+  else
+    librpma_apm_LIBS = -lrpma -lpmem
+  endif
   ENGINES += librpma_apm
 endif
 ifdef CONFIG_LIBRPMA_GPSPM
   librpma_gpspm_SRCS = engines/librpma_gpspm.c engines/librpma_gpspm_flush.pb-c.c
   librpma_fio_SRCS = engines/librpma_fio.c
-  librpma_gpspm_LIBS = -lrpma -lpmem -lprotobuf-c
+  ifdef CONFIG_LIBPMEM2_INSTALLED
+    librpma_gpspm_LIBS = -lrpma -lpmem2 -lprotobuf-c
+  else
+    librpma_gpspm_LIBS = -lrpma -lpmem -lprotobuf-c
+  endif
   ENGINES += librpma_gpspm
 endif
 ifdef librpma_fio_SRCS
diff --git a/ci/travis-install-librpma.sh b/ci/actions-install-librpma.sh
similarity index 74%
rename from ci/travis-install-librpma.sh
rename to ci/actions-install-librpma.sh
index 4e5ed21d..31f9f712 100755
--- a/ci/travis-install-librpma.sh
+++ b/ci/actions-install-librpma.sh
@@ -1,7 +1,6 @@
 #!/bin/bash -e
 
-# 11.02.2021 Merge pull request #866 from ldorau/rpma-mmap-memory-for-rpma_mr_reg-in-rpma_flush_apm_new
-LIBRPMA_VERSION=fbac593917e98f3f26abf14f4fad5a832b330f5c
+LIBRPMA_VERSION="1.0.0"
 ZIP_FILE=rpma.zip
 
 WORKDIR=$(pwd)
diff --git a/ci/actions-install.sh b/ci/actions-install.sh
index c209a089..82e14d2a 100755
--- a/ci/actions-install.sh
+++ b/ci/actions-install.sh
@@ -44,7 +44,9 @@ DPKGCFG
                 libiscsi-dev
                 libnbd-dev
                 libpmem-dev
+                libpmem2-dev
                 libpmemblk-dev
+                libprotobuf-c-dev
                 librbd-dev
                 libtcmalloc-minimal4
                 nvidia-cuda-dev
@@ -67,6 +69,10 @@ DPKGCFG
     sudo apt-get -qq update
     echo "Installing packages... ${pkgs[@]}"
     sudo apt-get install -o APT::Immediate-Configure=false --no-install-recommends -qq -y "${pkgs[@]}"
+    if [ "${CI_TARGET_ARCH}" == "x86_64" ]; then
+        # install librpma from sources
+        ci/actions-install-librpma.sh
+    fi
 }
 
 install_linux() {
diff --git a/ci/travis-install-pmdk.sh b/ci/travis-install-pmdk.sh
deleted file mode 100755
index 7bde9fd0..00000000
--- a/ci/travis-install-pmdk.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash -e
-
-# pmdk v1.9.1 release
-PMDK_VERSION=1.9.1
-
-WORKDIR=$(pwd)
-
-#
-# The '/bin/sh' shell used by PMDK's 'make install'
-# does not know the exact localization of clang
-# and fails with:
-#    /bin/sh: 1: clang: not found
-# if CC is not set to the full path of clang.
-#
-CC=$(type -P "$CC")
-export CC
-
-# Install PMDK libraries, because PMDK's libpmem
-# is a dependency of the librpma fio engine.
-# Install it from a release package
-# with already generated documentation,
-# in order to not install 'pandoc'.
-wget https://github.com/pmem/pmdk/releases/download/${PMDK_VERSION}/pmdk-${PMDK_VERSION}.tar.gz
-tar -xzf pmdk-${PMDK_VERSION}.tar.gz
-cd pmdk-${PMDK_VERSION}
-make -j"$(nproc)" NDCTL_ENABLE=n
-sudo make -j"$(nproc)" install prefix=/usr NDCTL_ENABLE=n
-cd "$WORKDIR"
-rm -rf pmdk-${PMDK_VERSION}
diff --git a/configure b/configure
index a2b9bd4c..7741ef4f 100755
--- a/configure
+++ b/configure
@@ -2201,6 +2201,26 @@ EOF
 fi
 print_config "libpmem1_5" "$libpmem1_5"
 
+##########################################
+# Check whether we have libpmem2
+if test "$libpmem2" != "yes" ; then
+  libpmem2="no"
+fi
+cat > $TMPC << EOF
+#include <libpmem2.h>
+int main(int argc, char **argv)
+{
+  struct pmem2_config *cfg;
+  pmem2_config_new(&cfg);
+  pmem2_config_delete(&cfg);
+  return 0;
+}
+EOF
+if compile_prog "" "-lpmem2" "libpmem2"; then
+  libpmem2="yes"
+fi
+print_config "libpmem2" "$libpmem2"
+
 ##########################################
 # Check whether we have libpmemblk
 # libpmem is a prerequisite
@@ -2990,11 +3010,13 @@ if test "$libverbs" = "yes" -a "$rdmacm" = "yes" ; then
 fi
 # librpma is supported on the 'x86_64' architecture for now
 if test "$cpu" = "x86_64" -a "$libverbs" = "yes" -a "$rdmacm" = "yes" \
-    -a "$librpma" = "yes" -a "$libpmem" = "yes" ; then
+    -a "$librpma" = "yes" \
+    && test "$libpmem" = "yes" -o "$libpmem2" = "yes" ; then
   output_sym "CONFIG_LIBRPMA_APM"
 fi
 if test "$cpu" = "x86_64" -a "$libverbs" = "yes" -a "$rdmacm" = "yes" \
-    -a "$librpma" = "yes" -a "$libpmem" = "yes" -a "$libprotobuf_c" = "yes" ; then
+    -a "$librpma" = "yes" -a "$libprotobuf_c" = "yes" \
+    && test "$libpmem" = "yes" -o "$libpmem2" = "yes" ; then
   output_sym "CONFIG_LIBRPMA_GPSPM"
 fi
 if test "$clock_gettime" = "yes" ; then
@@ -3138,6 +3160,9 @@ fi
 if test "$pmem" = "yes" ; then
   output_sym "CONFIG_LIBPMEM"
 fi
+if test "$libpmem2" = "yes" ; then
+  output_sym "CONFIG_LIBPMEM2_INSTALLED"
+fi
 if test "$libime" = "yes" ; then
   output_sym "CONFIG_IME"
 fi
diff --git a/engines/librpma_fio.c b/engines/librpma_fio.c
index a78a1e57..42d6163e 100644
--- a/engines/librpma_fio.c
+++ b/engines/librpma_fio.c
@@ -1,7 +1,7 @@
 /*
  * librpma_fio: librpma_apm and librpma_gpspm engines' common part.
  *
- * Copyright 2021, Intel Corporation
+ * Copyright 2021-2022, Intel Corporation
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License,
@@ -13,9 +13,11 @@
  * GNU General Public License for more details.
  */
 
-#include "librpma_fio.h"
-
-#include <libpmem.h>
+#ifdef CONFIG_LIBPMEM2_INSTALLED
+#include "librpma_fio_pmem2.h"
+#else
+#include "librpma_fio_pmem.h"
+#endif /* CONFIG_LIBPMEM2_INSTALLED */
 
 struct fio_option librpma_fio_options[] = {
 	{
@@ -111,10 +113,8 @@ char *librpma_fio_allocate_dram(struct thread_data *td, size_t size,
 char *librpma_fio_allocate_pmem(struct thread_data *td, struct fio_file *f,
 		size_t size, struct librpma_fio_mem *mem)
 {
-	size_t size_mmap = 0;
-	char *mem_ptr = NULL;
-	int is_pmem = 0;
 	size_t ws_offset;
+	mem->mem_ptr = NULL;
 
 	if (size % page_size) {
 		log_err("fio: size (%zu) is not aligned to page size (%zu)\n",
@@ -135,48 +135,24 @@ char *librpma_fio_allocate_pmem(struct thread_data *td, struct fio_file *f,
 		return NULL;
 	}
 
-	/* map the file */
-	mem_ptr = pmem_map_file(f->file_name, 0 /* len */, 0 /* flags */,
-			0 /* mode */, &size_mmap, &is_pmem);
-	if (mem_ptr == NULL) {
-		log_err("fio: pmem_map_file(%s) failed\n", f->file_name);
-		/* pmem_map_file() sets errno on failure */
-		td_verror(td, errno, "pmem_map_file");
-		return NULL;
-	}
-
-	/* pmem is expected */
-	if (!is_pmem) {
-		log_err("fio: %s is not located in persistent memory\n",
+	if (librpma_fio_pmem_map_file(f, size, mem, ws_offset)) {
+		log_err("fio: librpma_fio_pmem_map_file(%s) failed\n",
 			f->file_name);
-		goto err_unmap;
-	}
-
-	/* check size of allocated persistent memory */
-	if (size_mmap < ws_offset + size) {
-		log_err(
-			"fio: %s is too small to handle so many threads (%zu < %zu)\n",
-			f->file_name, size_mmap, ws_offset + size);
-		goto err_unmap;
+		return NULL;
 	}
 
 	log_info("fio: size of memory mapped from the file %s: %zu\n",
-		f->file_name, size_mmap);
-
-	mem->mem_ptr = mem_ptr;
-	mem->size_mmap = size_mmap;
+		f->file_name, mem->size_mmap);
 
-	return mem_ptr + ws_offset;
+	log_info("fio: library used to map PMem from file: %s\n", RPMA_PMEM_USED);
 
-err_unmap:
-	(void) pmem_unmap(mem_ptr, size_mmap);
-	return NULL;
+	return mem->mem_ptr ? mem->mem_ptr + ws_offset : NULL;
 }
 
 void librpma_fio_free(struct librpma_fio_mem *mem)
 {
 	if (mem->size_mmap)
-		(void) pmem_unmap(mem->mem_ptr, mem->size_mmap);
+		librpma_fio_unmap(mem);
 	else
 		free(mem->mem_ptr);
 }
diff --git a/engines/librpma_fio.h b/engines/librpma_fio.h
index 91290235..480ded1b 100644
--- a/engines/librpma_fio.h
+++ b/engines/librpma_fio.h
@@ -1,7 +1,7 @@
 /*
  * librpma_fio: librpma_apm and librpma_gpspm engines' common header.
  *
- * Copyright 2021, Intel Corporation
+ * Copyright 2021-2022, Intel Corporation
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License,
@@ -72,6 +72,11 @@ struct librpma_fio_mem {
 
 	/* size of the mapped persistent memory */
 	size_t size_mmap;
+
+#ifdef CONFIG_LIBPMEM2_INSTALLED
+	/* libpmem2 structure used for mapping PMem */
+	struct pmem2_map *map;
+#endif
 };
 
 char *librpma_fio_allocate_dram(struct thread_data *td, size_t size,
diff --git a/engines/librpma_fio_pmem.h b/engines/librpma_fio_pmem.h
new file mode 100644
index 00000000..4854292c
--- /dev/null
+++ b/engines/librpma_fio_pmem.h
@@ -0,0 +1,67 @@
+/*
+ * librpma_fio_pmem: allocates pmem using libpmem.
+ *
+ * Copyright 2022, Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License,
+ * version 2 as published by the Free Software Foundation..
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <libpmem.h>
+#include "librpma_fio.h"
+
+#define RPMA_PMEM_USED "libpmem"
+
+static int librpma_fio_pmem_map_file(struct fio_file *f, size_t size,
+		struct librpma_fio_mem *mem, size_t ws_offset)
+{
+	int is_pmem = 0;
+	size_t size_mmap = 0;
+
+	/* map the file */
+	mem->mem_ptr = pmem_map_file(f->file_name, 0 /* len */, 0 /* flags */,
+			0 /* mode */, &size_mmap, &is_pmem);
+	if (mem->mem_ptr == NULL) {
+		/* pmem_map_file() sets errno on failure */
+		log_err("fio: pmem_map_file(%s) failed: %s (errno %i)\n",
+			f->file_name, strerror(errno), errno);
+		return -1;
+	}
+
+	/* pmem is expected */
+	if (!is_pmem) {
+		log_err("fio: %s is not located in persistent memory\n",
+			f->file_name);
+		goto err_unmap;
+	}
+
+	/* check size of allocated persistent memory */
+	if (size_mmap < ws_offset + size) {
+		log_err(
+			"fio: %s is too small to handle so many threads (%zu < %zu)\n",
+			f->file_name, size_mmap, ws_offset + size);
+		goto err_unmap;
+	}
+
+	log_info("fio: size of memory mapped from the file %s: %zu\n",
+		f->file_name, size_mmap);
+
+	mem->size_mmap = size_mmap;
+
+	return 0;
+
+err_unmap:
+	(void) pmem_unmap(mem->mem_ptr, size_mmap);
+	return -1;
+}
+
+static inline void librpma_fio_unmap(struct librpma_fio_mem *mem)
+{
+	(void) pmem_unmap(mem->mem_ptr, mem->size_mmap);
+}
diff --git a/engines/librpma_fio_pmem2.h b/engines/librpma_fio_pmem2.h
new file mode 100644
index 00000000..09a51f5f
--- /dev/null
+++ b/engines/librpma_fio_pmem2.h
@@ -0,0 +1,91 @@
+/*
+ * librpma_fio_pmem2: allocates pmem using libpmem2.
+ *
+ * Copyright 2022, Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License,
+ * version 2 as published by the Free Software Foundation..
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <libpmem2.h>
+#include "librpma_fio.h"
+
+#define RPMA_PMEM_USED "libpmem2"
+
+static int librpma_fio_pmem_map_file(struct fio_file *f, size_t size,
+		struct librpma_fio_mem *mem, size_t ws_offset)
+{
+	int fd;
+	struct pmem2_config *cfg = NULL;
+	struct pmem2_map *map = NULL;
+	struct pmem2_source *src = NULL;
+
+	size_t size_mmap;
+
+	if((fd = open(f->file_name, O_RDWR)) < 0) {
+		log_err("fio: cannot open fio file\n");
+		return -1;
+	}
+
+	if (pmem2_source_from_fd(&src, fd) != 0) {
+		log_err("fio: pmem2_source_from_fd() failed\n");
+		goto err_close;
+	}
+
+	if (pmem2_config_new(&cfg) != 0) {
+		log_err("fio: pmem2_config_new() failed\n");
+		goto err_source_delete;
+	}
+
+	if (pmem2_config_set_required_store_granularity(cfg,
+					PMEM2_GRANULARITY_CACHE_LINE) != 0) {
+		log_err("fio: pmem2_config_set_required_store_granularity() failed: %s\n", pmem2_errormsg());
+		goto err_config_delete;
+	}
+
+	if (pmem2_map_new(&map, cfg, src) != 0) {
+		log_err("fio: pmem2_map_new(%s) failed: %s\n", f->file_name, pmem2_errormsg());
+		goto err_config_delete;
+	}
+
+	size_mmap = pmem2_map_get_size(map);
+
+	/* check size of allocated persistent memory */
+	if (size_mmap < ws_offset + size) {
+		log_err(
+			"fio: %s is too small to handle so many threads (%zu < %zu)\n",
+			f->file_name, size_mmap, ws_offset + size);
+		goto err_map_delete;
+	}
+
+	mem->mem_ptr = pmem2_map_get_address(map);
+	mem->size_mmap = size_mmap;
+	mem->map = map;
+	pmem2_config_delete(&cfg);
+	pmem2_source_delete(&src);
+	close(fd);
+
+	return 0;
+
+err_map_delete:
+	pmem2_map_delete(&map);
+err_config_delete:
+	pmem2_config_delete(&cfg);
+err_source_delete:
+	pmem2_source_delete(&src);
+err_close:
+	close(fd);
+
+	return -1;
+}
+
+static inline void librpma_fio_unmap(struct librpma_fio_mem *mem)
+{
+	(void) pmem2_map_delete(&mem->map);
+}
diff --git a/engines/librpma_gpspm.c b/engines/librpma_gpspm.c
index f00717a7..70116d0d 100644
--- a/engines/librpma_gpspm.c
+++ b/engines/librpma_gpspm.c
@@ -2,7 +2,7 @@
  * librpma_gpspm: IO engine that uses PMDK librpma to write data,
  *		based on General Purpose Server Persistency Method
  *
- * Copyright 2020-2021, Intel Corporation
+ * Copyright 2020-2022, Intel Corporation
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License,
@@ -16,7 +16,11 @@
 
 #include "librpma_fio.h"
 
+#ifdef CONFIG_LIBPMEM2_INSTALLED
+#include <libpmem2.h>
+#else
 #include <libpmem.h>
+#endif
 
 /* Generated by the protocol buffer compiler from: librpma_gpspm_flush.proto */
 #include "librpma_gpspm_flush.pb-c.h"
@@ -361,6 +365,8 @@ FIO_STATIC struct ioengine_ops ioengine_client = {
 
 #define IO_U_BUFF_OFF_SERVER(i) (i * IO_U_BUF_LEN)
 
+typedef void (*librpma_fio_persist_fn)(const void *ptr, size_t size);
+
 struct server_data {
 	/* aligned td->orig_buffer */
 	char *orig_buffer_aligned;
@@ -373,6 +379,8 @@ struct server_data {
 	/* in-memory queues */
 	struct ibv_wc *msgs_queued;
 	uint32_t msg_queued_nr;
+
+	librpma_fio_persist_fn persist;
 };
 
 static int server_init(struct thread_data *td)
@@ -400,6 +408,13 @@ static int server_init(struct thread_data *td)
 		goto err_free_sd;
 	}
 
+#ifdef CONFIG_LIBPMEM2_INSTALLED
+	/* get libpmem2 persist function from pmem2_map */
+	sd->persist = pmem2_get_persist_fn(csd->mem.map);
+#else
+	sd->persist = pmem_persist;
+#endif
+
 	/*
 	 * Assure a single io_u buffer can store both SEND and RECV messages and
 	 * an io_us buffer allocation is page-size-aligned which is required
@@ -594,7 +609,7 @@ static int server_qe_process(struct thread_data *td, struct ibv_wc *wc)
 
 	if (IS_NOT_THE_LAST_MESSAGE(flush_req)) {
 		op_ptr = csd->ws_ptr + flush_req->offset;
-		pmem_persist(op_ptr, flush_req->length);
+		sd->persist(op_ptr, flush_req->length);
 	} else {
 		/*
 		 * This is the last message - the client is done.
@@ -685,29 +700,25 @@ static int server_cmpl_process(struct thread_data *td)
 
 	ret = rpma_cq_get_wc(csd->cq, 1, wc, NULL);
 	if (ret == RPMA_E_NO_COMPLETION) {
-		if (o->busy_wait_polling == 0) {
-			ret = rpma_cq_wait(csd->cq);
-			if (ret == RPMA_E_NO_COMPLETION) {
-				/* lack of completion is not an error */
-				return 0;
-			} else if (ret != 0) {
-				librpma_td_verror(td, ret, "rpma_cq_wait");
-				goto err_terminate;
-			}
-
-			ret = rpma_cq_get_wc(csd->cq, 1, wc, NULL);
-			if (ret == RPMA_E_NO_COMPLETION) {
-				/* lack of completion is not an error */
-				return 0;
-			} else if (ret != 0) {
-				librpma_td_verror(td, ret, "rpma_cq_get_wc");
-				goto err_terminate;
-			}
-		} else {
-			/* lack of completion is not an error */
-			return 0;
+		if (o->busy_wait_polling)
+			return 0; /* lack of completion is not an error */
+
+		ret = rpma_cq_wait(csd->cq);
+		if (ret == RPMA_E_NO_COMPLETION)
+			return 0; /* lack of completion is not an error */
+		if (ret) {
+			librpma_td_verror(td, ret, "rpma_cq_wait");
+			goto err_terminate;
+		}
+
+		ret = rpma_cq_get_wc(csd->cq, 1, wc, NULL);
+		if (ret == RPMA_E_NO_COMPLETION)
+			return 0; /* lack of completion is not an error */
+		if (ret) {
+			librpma_td_verror(td, ret, "rpma_cq_get_wc");
+			goto err_terminate;
 		}
-	} else if (ret != 0) {
+	} else if (ret) {
 		librpma_td_verror(td, ret, "rpma_cq_get_wc");
 		goto err_terminate;
 	}

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-09-04 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-09-04 12:00 UTC (permalink / raw)
  To: fio

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain, Size: 4952 bytes --]

The following changes since commit 0b2c736174402afc742a7ed97c37f872fa93ee25:

  Merge branch 'fiopr_windows_log_compression_storage_fixes' of https://github.com/PCPartPicker/fio (2022-09-02 17:29:45 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 021ce718f5ae4bfd5f4e42290993578adb7c7bd5:

  t/io_uring: enable support for registered buffers for passthrough (2022-09-03 11:04:06 -0600)

----------------------------------------------------------------
Jens Axboe (5):
      Merge branch 'fix/help-terse-version-5' of https://github.com/scop/fio
      Merge branch 'doc/showcmd-usage' of https://github.com/scop/fio
      Merge branch 'fix/howto-spelling' of https://github.com/scop/fio
      t/io_uring: properly detect numa nodes for passthrough mode
      t/io_uring: enable support for registered buffers for passthrough

Ville Skyttä (3):
      init: include 5 in --terse-version help
      HOWTO: spelling fixes
      doc: fix --showcmd usage

 HOWTO.rst           | 8 ++++----
 fio.1               | 4 ++--
 init.c              | 2 +-
 os/linux/io_uring.h | 8 ++++++++
 t/io_uring.c        | 9 ++++++++-
 5 files changed, 23 insertions(+), 8 deletions(-)

---

Diff of recent changes:

diff --git a/HOWTO.rst b/HOWTO.rst
index 08be687c..2c6c6dbe 100644
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -167,9 +167,9 @@ Command line options
 	defined by `ioengine`.  If no `ioengine` is given, list all
 	available ioengines.
 
-.. option:: --showcmd=jobfile
+.. option:: --showcmd
 
-	Convert `jobfile` to a set of command-line options.
+	Convert given job files to a set of command-line options.
 
 .. option:: --readonly
 
@@ -2550,7 +2550,7 @@ with the caveat that when used on the command line, they must come after the
 
    [dfs]
 
-	Specificy a different chunk size (in bytes) for the dfs file.
+	Specify a different chunk size (in bytes) for the dfs file.
 	Use DAOS container's chunk size by default.
 
    [libhdfs]
@@ -2559,7 +2559,7 @@ with the caveat that when used on the command line, they must come after the
 
 .. option:: object_class=str : [dfs]
 
-	Specificy a different object class for the dfs file.
+	Specify a different object class for the dfs file.
 	Use DAOS container's object class by default.
 
 .. option:: skip_bad=bool : [mtd]
diff --git a/fio.1 b/fio.1
index 27454b0b..67d7c710 100644
--- a/fio.1
+++ b/fio.1
@@ -67,8 +67,8 @@ List all commands defined by \fIioengine\fR, or print help for \fIcommand\fR
 defined by \fIioengine\fR. If no \fIioengine\fR is given, list all
 available ioengines.
 .TP
-.BI \-\-showcmd \fR=\fPjobfile
-Convert \fIjobfile\fR to a set of command\-line options.
+.BI \-\-showcmd
+Convert given \fIjobfile\fRs to a set of command\-line options.
 .TP
 .BI \-\-readonly
 Turn on safety read\-only checks, preventing writes and trims. The \fB\-\-readonly\fR
diff --git a/init.c b/init.c
index da800776..f6a8056a 100644
--- a/init.c
+++ b/init.c
@@ -2269,7 +2269,7 @@ static void usage(const char *name)
 	printf("  --minimal\t\tMinimal (terse) output\n");
 	printf("  --output-format=type\tOutput format (terse,json,json+,normal)\n");
 	printf("  --terse-version=type\tSet terse version output format"
-		" (default 3, or 2 or 4)\n");
+		" (default 3, or 2 or 4 or 5)\n");
 	printf("  --version\t\tPrint version info and exit\n");
 	printf("  --help\t\tPrint this page\n");
 	printf("  --cpuclock-test\tPerform test/validation of CPU clock\n");
diff --git a/os/linux/io_uring.h b/os/linux/io_uring.h
index 6604e736..c7a24ad8 100644
--- a/os/linux/io_uring.h
+++ b/os/linux/io_uring.h
@@ -46,6 +46,7 @@ struct io_uring_sqe {
 		__u32		rename_flags;
 		__u32		unlink_flags;
 		__u32		hardlink_flags;
+		__u32		uring_cmd_flags;
 	};
 	__u64	user_data;	/* data to be passed back at completion time */
 	/* pack this to avoid bogus arm OABI complaints */
@@ -197,6 +198,13 @@ enum {
 	IORING_OP_LAST,
 };
 
+/*
+ * sqe->uring_cmd_flags
+ * IORING_URING_CMD_FIXED	use registered buffer; pass thig flag
+ *				along with setting sqe->buf_index.
+ */
+#define IORING_URING_CMD_FIXED	(1U << 0)
+
 /*
  * sqe->fsync_flags
  */
diff --git a/t/io_uring.c b/t/io_uring.c
index 9d580b5a..b9353ac8 100644
--- a/t/io_uring.c
+++ b/t/io_uring.c
@@ -650,6 +650,10 @@ static void init_io_pt(struct submitter *s, unsigned index)
 	cmd->cdw12 = nlb;
 	cmd->addr = (unsigned long) s->iovecs[index].iov_base;
 	cmd->data_len = bs;
+	if (fixedbufs) {
+		sqe->uring_cmd_flags = IORING_URING_CMD_FIXED;
+		sqe->buf_index = index;
+	}
 	cmd->nsid = f->nsid;
 	cmd->opcode = 2;
 }
@@ -856,7 +860,10 @@ static int detect_node(struct submitter *s, const char *name)
 	char str[128];
 	int ret, fd, node;
 
-	sprintf(str, "/sys/block/%s/device/numa_node", base);
+	if (pt)
+		sprintf(str, "/sys/class/nvme-generic/%s/device/numa_node", base);
+	else
+		sprintf(str, "/sys/block/%s/device/numa_node", base);
 	fd = open(str, O_RDONLY);
 	if (fd < 0)
 		return -1;

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-09-03 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-09-03 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit e57758c12bdb24885e32ba143a04fcc8f98565ca:

  Merge branch 'fiopr_compressfixes' of https://github.com/PCPartPicker/fio (2022-09-01 12:03:23 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 0b2c736174402afc742a7ed97c37f872fa93ee25:

  Merge branch 'fiopr_windows_log_compression_storage_fixes' of https://github.com/PCPartPicker/fio (2022-09-02 17:29:45 -0600)

----------------------------------------------------------------
Jens Axboe (1):
      Merge branch 'fiopr_windows_log_compression_storage_fixes' of https://github.com/PCPartPicker/fio

aggieNick02 (1):
      Fix log compression storage on windows

 iolog.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

---

Diff of recent changes:

diff --git a/iolog.c b/iolog.c
index 41d3e473..aa9c3bb1 100644
--- a/iolog.c
+++ b/iolog.c
@@ -1218,7 +1218,7 @@ int iolog_file_inflate(const char *file)
 	void *buf;
 	FILE *f;
 
-	f = fopen(file, "r");
+	f = fopen(file, "rb");
 	if (!f) {
 		perror("fopen");
 		return 1;
@@ -1300,10 +1300,21 @@ void flush_log(struct io_log *log, bool do_append)
 	void *buf;
 	FILE *f;
 
+	/*
+	 * If log_gz_store is true, we are writing a binary file.
+	 * Set the mode appropriately (on all platforms) to avoid issues
+	 * on windows (line-ending conversions, etc.)
+	 */
 	if (!do_append)
-		f = fopen(log->filename, "w");
+		if (log->log_gz_store)
+			f = fopen(log->filename, "wb");
+		else
+			f = fopen(log->filename, "w");
 	else
-		f = fopen(log->filename, "a");
+		if (log->log_gz_store)
+			f = fopen(log->filename, "ab");
+		else
+			f = fopen(log->filename, "a");
 	if (!f) {
 		perror("fopen log");
 		return;

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-09-02 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-09-02 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 2be18f6b266f3fcba89719b354672090f49d53d9:

  t/io_uring: take advantage of new io_uring setup flags (2022-08-31 18:44:52 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to e57758c12bdb24885e32ba143a04fcc8f98565ca:

  Merge branch 'fiopr_compressfixes' of https://github.com/PCPartPicker/fio (2022-09-01 12:03:23 -0600)

----------------------------------------------------------------
Jens Axboe (2):
      t/io_uring: minor optimizations to IO init fast path
      Merge branch 'fiopr_compressfixes' of https://github.com/PCPartPicker/fio

aggieNick02 (1):
      Fix fio silently dropping log entries when using log_compression

 iolog.c              |   6 +--
 t/io_uring.c         |  10 +++--
 t/log_compression.py | 121 +++++++++++++++++++++++++++++++++++++++++++++++++++
 t/run-fio-tests.py   |   8 ++++
 4 files changed, 139 insertions(+), 6 deletions(-)
 create mode 100755 t/log_compression.py

---

Diff of recent changes:

diff --git a/iolog.c b/iolog.c
index 37e799a1..41d3e473 100644
--- a/iolog.c
+++ b/iolog.c
@@ -1574,14 +1574,14 @@ void iolog_compress_exit(struct thread_data *td)
  * Queue work item to compress the existing log entries. We reset the
  * current log to a small size, and reference the existing log in the
  * data that we queue for compression. Once compression has been done,
- * this old log is freed. If called with finish == true, will not return
- * until the log compression has completed, and will flush all previous
- * logs too
+ * this old log is freed. Will not return until the log compression
+ * has completed, and will flush all previous logs too
  */
 static int iolog_flush(struct io_log *log)
 {
 	struct iolog_flush_data *data;
 
+	workqueue_flush(&log->td->log_compress_wq);
 	data = malloc(sizeof(*data));
 	if (!data)
 		return 1;
diff --git a/t/io_uring.c b/t/io_uring.c
index 5b46015a..9d580b5a 100644
--- a/t/io_uring.c
+++ b/t/io_uring.c
@@ -658,11 +658,12 @@ static int prep_more_ios_uring(struct submitter *s, int max_ios)
 {
 	struct io_sq_ring *ring = &s->sq_ring;
 	unsigned index, tail, next_tail, prepped = 0;
+	unsigned int head = atomic_load_acquire(ring->head);
 
 	next_tail = tail = *ring->tail;
 	do {
 		next_tail++;
-		if (next_tail == atomic_load_acquire(ring->head))
+		if (next_tail == head)
 			break;
 
 		index = tail & sq_ring_mask;
@@ -670,7 +671,6 @@ static int prep_more_ios_uring(struct submitter *s, int max_ios)
 			init_io_pt(s, index);
 		else
 			init_io(s, index);
-		ring->array[index] = index;
 		prepped++;
 		tail = next_tail;
 	} while (prepped < max_ios);
@@ -908,7 +908,7 @@ static int setup_ring(struct submitter *s)
 	struct io_sq_ring *sring = &s->sq_ring;
 	struct io_cq_ring *cring = &s->cq_ring;
 	struct io_uring_params p;
-	int ret, fd;
+	int ret, fd, i;
 	void *ptr;
 	size_t len;
 
@@ -1003,6 +1003,10 @@ static int setup_ring(struct submitter *s)
 	cring->ring_entries = ptr + p.cq_off.ring_entries;
 	cring->cqes = ptr + p.cq_off.cqes;
 	cq_ring_mask = *cring->ring_mask;
+
+	for (i = 0; i < p.sq_entries; i++)
+		sring->array[i] = i;
+
 	return 0;
 }
 
diff --git a/t/log_compression.py b/t/log_compression.py
new file mode 100755
index 00000000..94c92db7
--- /dev/null
+++ b/t/log_compression.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python3
+#
+# log_compression.py
+#
+# Test log_compression and log_store_compressed. Uses null ioengine.
+# Previous bugs have caused output in per I/O log files to be missing
+# and/or out of order
+#
+# Expected result: 8000 log entries, offset starting at 0 and increasing by bs
+# Buggy result: Log entries out of order (usually without log_store_compressed)
+# and/or missing log entries (usually with log_store_compressed)
+#
+# USAGE
+# python log_compression.py [-f fio-executable]
+#
+# EXAMPLES
+# python t/log_compression.py
+# python t/log_compression.py -f ./fio
+#
+# REQUIREMENTS
+# Python 3.5+
+#
+# ===TEST MATRIX===
+#
+# With log_compression=10K
+# With log_store_compressed=1 and log_compression=10K
+
+import os
+import sys
+import platform
+import argparse
+import subprocess
+
+
+def parse_args():
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-f', '--fio',
+                        help='path to fio executable (e.g., ./fio)')
+    return parser.parse_args()
+
+
+def run_fio(fio,log_store_compressed):
+    fio_args = [
+        '--name=job',
+        '--ioengine=null',
+        '--filesize=1000M',
+        '--bs=128K',
+        '--rw=write',
+        '--iodepth=1',
+        '--write_bw_log=test',
+        '--per_job_logs=0',
+        '--log_offset=1',
+        '--log_compression=10K',
+        ]
+    if log_store_compressed:
+        fio_args.append('--log_store_compressed=1')
+
+    subprocess.check_output([fio] + fio_args)
+
+    if log_store_compressed:
+        fio_inflate_args = [
+            '--inflate-log=test_bw.log.fz'
+            ]
+        with open('test_bw.from_fz.log','wt') as f:
+            subprocess.check_call([fio]+fio_inflate_args,stdout=f)
+
+def check_log_file(log_store_compressed):
+    filename = 'test_bw.from_fz.log' if log_store_compressed else 'test_bw.log'
+    with open(filename,'rt') as f:
+        file_data = f.read()
+    log_lines = [x for x in file_data.split('\n') if len(x.strip())!=0]
+    log_ios = len(log_lines)
+
+    filesize = 1000*1024*1024
+    bs = 128*1024
+    ios = filesize//bs
+    if log_ios!=ios:
+        print('wrong number of ios ({}) in log; should be {}'.format(log_ios,ios))
+        return False
+
+    expected_offset = 0
+    for line_number,line in enumerate(log_lines):
+        log_offset = int(line.split(',')[4])
+        if log_offset != expected_offset:
+            print('wrong offset ({}) for io number {} in log; should be {}'.format(
+                log_offset, line_number, expected_offset))
+            return False
+        expected_offset += bs
+    return True
+
+def main():
+    """Entry point for this script."""
+    args = parse_args()
+    if args.fio:
+        fio_path = args.fio
+    else:
+        fio_path = os.path.join(os.path.dirname(__file__), '../fio')
+        if not os.path.exists(fio_path):
+            fio_path = 'fio'
+    print("fio path is", fio_path)
+
+    passed_count = 0
+    failed_count = 0
+    for log_store_compressed in [False, True]:
+        run_fio(fio_path, log_store_compressed)
+        passed = check_log_file(log_store_compressed)
+        print('Test with log_store_compressed={} {}'.format(log_store_compressed,
+            'PASSED' if passed else 'FAILED'))
+        if passed:
+            passed_count+=1
+        else:
+            failed_count+=1
+
+    print('{} tests passed, {} failed'.format(passed_count, failed_count))
+
+    sys.exit(failed_count)
+
+if __name__ == '__main__':
+    main()
+
diff --git a/t/run-fio-tests.py b/t/run-fio-tests.py
index 47823761..e72fa2a0 100755
--- a/t/run-fio-tests.py
+++ b/t/run-fio-tests.py
@@ -1124,6 +1124,14 @@ TEST_LIST = [
         'success':          SUCCESS_DEFAULT,
         'requirements':     [],
     },
+    {
+        'test_id':          1012,
+        'test_class':       FioExeTest,
+        'exe':              't/log_compression.py',
+        'parameters':       ['-f', '{fio_path}'],
+        'success':          SUCCESS_DEFAULT,
+        'requirements':     [],
+    },
 ]
 
 

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-09-01 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-09-01 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit c9be6f0007ab79e3f83952c650af8e7a0c324953:

  Merge branch 'master' of https://github.com/bvanassche/fio (2022-08-30 18:19:30 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 2be18f6b266f3fcba89719b354672090f49d53d9:

  t/io_uring: take advantage of new io_uring setup flags (2022-08-31 18:44:52 -0600)

----------------------------------------------------------------
Jens Axboe (4):
      engines/io_uring: set COOP_TASKRUN for ring setup
      engines/io_uring: set single issuer and defer taskrun
      t/io_uring: unify getting of the offset
      t/io_uring: take advantage of new io_uring setup flags

 engines/io_uring.c  | 21 +++++++++++++++
 os/linux/io_uring.h | 12 +++++++++
 t/io_uring.c        | 75 ++++++++++++++++++++++++++++++++---------------------
 3 files changed, 78 insertions(+), 30 deletions(-)

---

Diff of recent changes:

diff --git a/engines/io_uring.c b/engines/io_uring.c
index 94376efa..d0fc61dc 100644
--- a/engines/io_uring.c
+++ b/engines/io_uring.c
@@ -809,9 +809,30 @@ static int fio_ioring_queue_init(struct thread_data *td)
 	p.flags |= IORING_SETUP_CQSIZE;
 	p.cq_entries = depth;
 
+	/*
+	 * Setup COOP_TASKRUN as we don't need to get IPI interrupted for
+	 * completing IO operations.
+	 */
+	p.flags |= IORING_SETUP_COOP_TASKRUN;
+
+	/*
+	 * io_uring is always a single issuer, and we can defer task_work
+	 * runs until we reap events.
+	 */
+	p.flags |= IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN;
+
 retry:
 	ret = syscall(__NR_io_uring_setup, depth, &p);
 	if (ret < 0) {
+		if (errno == EINVAL && p.flags & IORING_SETUP_DEFER_TASKRUN) {
+			p.flags &= ~IORING_SETUP_DEFER_TASKRUN;
+			p.flags &= ~IORING_SETUP_SINGLE_ISSUER;
+			goto retry;
+		}
+		if (errno == EINVAL && p.flags & IORING_SETUP_COOP_TASKRUN) {
+			p.flags &= ~IORING_SETUP_COOP_TASKRUN;
+			goto retry;
+		}
 		if (errno == EINVAL && p.flags & IORING_SETUP_CQSIZE) {
 			p.flags &= ~IORING_SETUP_CQSIZE;
 			goto retry;
diff --git a/os/linux/io_uring.h b/os/linux/io_uring.h
index 929997f8..6604e736 100644
--- a/os/linux/io_uring.h
+++ b/os/linux/io_uring.h
@@ -131,6 +131,18 @@ enum {
 #define IORING_SETUP_SQE128		(1U << 10) /* SQEs are 128 byte */
 #define IORING_SETUP_CQE32		(1U << 11) /* CQEs are 32 byte */
 
+/*
+ * Only one task is allowed to submit requests
+ */
+#define IORING_SETUP_SINGLE_ISSUER	(1U << 12)
+
+/*
+ * Defer running task work to get events.
+ * Rather than running bits of task work whenever the task transitions
+ * try to do it just before it is needed.
+ */
+#define IORING_SETUP_DEFER_TASKRUN	(1U << 13)
+
 enum {
 	IORING_OP_NOP,
 	IORING_OP_READV,
diff --git a/t/io_uring.c b/t/io_uring.c
index e8e41796..5b46015a 100644
--- a/t/io_uring.c
+++ b/t/io_uring.c
@@ -449,6 +449,8 @@ static int io_uring_register_files(struct submitter *s)
 
 static int io_uring_setup(unsigned entries, struct io_uring_params *p)
 {
+	int ret;
+
 	/*
 	 * Clamp CQ ring size at our SQ ring size, we don't need more entries
 	 * than that.
@@ -456,7 +458,28 @@ static int io_uring_setup(unsigned entries, struct io_uring_params *p)
 	p->flags |= IORING_SETUP_CQSIZE;
 	p->cq_entries = entries;
 
-	return syscall(__NR_io_uring_setup, entries, p);
+	p->flags |= IORING_SETUP_COOP_TASKRUN;
+	p->flags |= IORING_SETUP_SINGLE_ISSUER;
+	p->flags |= IORING_SETUP_DEFER_TASKRUN;
+retry:
+	ret = syscall(__NR_io_uring_setup, entries, p);
+	if (!ret)
+		return 0;
+
+	if (errno == EINVAL && p->flags & IORING_SETUP_COOP_TASKRUN) {
+		p->flags &= ~IORING_SETUP_COOP_TASKRUN;
+		goto retry;
+	}
+	if (errno == EINVAL && p->flags & IORING_SETUP_SINGLE_ISSUER) {
+		p->flags &= ~IORING_SETUP_SINGLE_ISSUER;
+		goto retry;
+	}
+	if (errno == EINVAL && p->flags & IORING_SETUP_DEFER_TASKRUN) {
+		p->flags &= ~IORING_SETUP_DEFER_TASKRUN;
+		goto retry;
+	}
+
+	return ret;
 }
 
 static void io_uring_probe(int fd)
@@ -501,12 +524,28 @@ static unsigned file_depth(struct submitter *s)
 	return (depth + s->nr_files - 1) / s->nr_files;
 }
 
+static unsigned long long get_offset(struct submitter *s, struct file *f)
+{
+	unsigned long long offset;
+	long r;
+
+	if (random_io) {
+		r = __rand64(&s->rand_state);
+		offset = (r % (f->max_blocks - 1)) * bs;
+	} else {
+		offset = f->cur_off;
+		f->cur_off += bs;
+		if (f->cur_off + bs > f->max_size)
+			f->cur_off = 0;
+	}
+
+	return offset;
+}
+
 static void init_io(struct submitter *s, unsigned index)
 {
 	struct io_uring_sqe *sqe = &s->sqes[index];
-	unsigned long offset;
 	struct file *f;
-	long r;
 
 	if (do_nop) {
 		sqe->opcode = IORING_OP_NOP;
@@ -526,16 +565,6 @@ static void init_io(struct submitter *s, unsigned index)
 	}
 	f->pending_ios++;
 
-	if (random_io) {
-		r = __rand64(&s->rand_state);
-		offset = (r % (f->max_blocks - 1)) * bs;
-	} else {
-		offset = f->cur_off;
-		f->cur_off += bs;
-		if (f->cur_off + bs > f->max_size)
-			f->cur_off = 0;
-	}
-
 	if (register_files) {
 		sqe->flags = IOSQE_FIXED_FILE;
 		sqe->fd = f->fixed_fd;
@@ -560,7 +589,7 @@ static void init_io(struct submitter *s, unsigned index)
 		sqe->buf_index = 0;
 	}
 	sqe->ioprio = 0;
-	sqe->off = offset;
+	sqe->off = get_offset(s, f);
 	sqe->user_data = (unsigned long) f->fileno;
 	if (stats && stats_running)
 		sqe->user_data |= ((uint64_t)s->clock_index << 32);
@@ -1072,10 +1101,8 @@ static int submitter_init(struct submitter *s)
 static int prep_more_ios_aio(struct submitter *s, int max_ios, struct iocb *iocbs)
 {
 	uint64_t data;
-	long long offset;
 	struct file *f;
 	unsigned index;
-	long r;
 
 	index = 0;
 	while (index < max_ios) {
@@ -1094,10 +1121,8 @@ static int prep_more_ios_aio(struct submitter *s, int max_ios, struct iocb *iocb
 		}
 		f->pending_ios++;
 
-		r = lrand48();
-		offset = (r % (f->max_blocks - 1)) * bs;
 		io_prep_pread(iocb, f->real_fd, s->iovecs[index].iov_base,
-				s->iovecs[index].iov_len, offset);
+				s->iovecs[index].iov_len, get_offset(s, f));
 
 		data = f->fileno;
 		if (stats && stats_running)
@@ -1380,7 +1405,6 @@ static void *submitter_sync_fn(void *data)
 	do {
 		uint64_t offset;
 		struct file *f;
-		long r;
 
 		if (s->nr_files == 1) {
 			f = &s->files[0];
@@ -1395,16 +1419,6 @@ static void *submitter_sync_fn(void *data)
 		}
 		f->pending_ios++;
 
-		if (random_io) {
-			r = __rand64(&s->rand_state);
-			offset = (r % (f->max_blocks - 1)) * bs;
-		} else {
-			offset = f->cur_off;
-			f->cur_off += bs;
-			if (f->cur_off + bs > f->max_size)
-				f->cur_off = 0;
-		}
-
 #ifdef ARCH_HAVE_CPU_CLOCK
 		if (stats)
 			s->clock_batch[s->clock_index] = get_cpu_clock();
@@ -1413,6 +1427,7 @@ static void *submitter_sync_fn(void *data)
 		s->inflight++;
 		s->calls++;
 
+		offset = get_offset(s, f);
 		if (polled)
 			ret = preadv2(f->real_fd, &s->iovecs[0], 1, offset, RWF_HIPRI);
 		else

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-08-31 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-08-31 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit b68ba328173f5a4714d888f6ce80fd24a4e4c504:

  test: get 32-bit Ubuntu 22.04 build working (2022-08-29 16:42:18 -0400)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to c9be6f0007ab79e3f83952c650af8e7a0c324953:

  Merge branch 'master' of https://github.com/bvanassche/fio (2022-08-30 18:19:30 -0600)

----------------------------------------------------------------
Bart Van Assche (4):
      Remove two casts from os-linux.h
      Linux: Use the byte order functions from <asm/byteorder.h>
      Split os-android.h
      Merge os-android.h into os-linux.h

Jens Axboe (3):
      backend: revert bad memory leak fix
      Fio 3.32
      Merge branch 'master' of https://github.com/bvanassche/fio

Vincent Fu (1):
      test: add tests for lfsr and norandommap

 FIO-VERSION-GEN    |   2 +-
 backend.c          |   5 -
 os/os-android.h    | 342 -----------------------------------------------------
 os/os-ashmem.h     |  84 +++++++++++++
 os/os-linux.h      |  14 ++-
 os/os.h            |   4 +-
 t/jobs/t0021.fio   |  15 +++
 t/jobs/t0022.fio   |  13 ++
 t/run-fio-tests.py |  55 ++++++++-
 9 files changed, 180 insertions(+), 354 deletions(-)
 delete mode 100644 os/os-android.h
 create mode 100644 os/os-ashmem.h
 create mode 100644 t/jobs/t0021.fio
 create mode 100644 t/jobs/t0022.fio

---

Diff of recent changes:

diff --git a/FIO-VERSION-GEN b/FIO-VERSION-GEN
index 72630dd0..db073818 100755
--- a/FIO-VERSION-GEN
+++ b/FIO-VERSION-GEN
@@ -1,7 +1,7 @@
 #!/bin/sh
 
 GVF=FIO-VERSION-FILE
-DEF_VER=fio-3.31
+DEF_VER=fio-3.32
 
 LF='
 '
diff --git a/backend.c b/backend.c
index 375a23e4..fe614f6e 100644
--- a/backend.c
+++ b/backend.c
@@ -2451,10 +2451,8 @@ reap:
 							strerror(ret));
 			} else {
 				pid_t pid;
-				struct fio_file **files;
 				void *eo;
 				dprint(FD_PROCESS, "will fork\n");
-				files = td->files;
 				eo = td->eo;
 				read_barrier();
 				pid = fork();
@@ -2465,9 +2463,6 @@ reap:
 					_exit(ret);
 				} else if (i == fio_debug_jobno)
 					*fio_debug_jobp = pid;
-				// freeing previously allocated memory for files
-				// this memory freed MUST NOT be shared between processes, only the pointer itself may be shared within TD
-				free(files);
 				free(eo);
 				free(fd);
 				fd = NULL;
diff --git a/os/os-android.h b/os/os-android.h
deleted file mode 100644
index 34534239..00000000
--- a/os/os-android.h
+++ /dev/null
@@ -1,342 +0,0 @@
-#ifndef FIO_OS_ANDROID_H
-#define FIO_OS_ANDROID_H
-
-#define	FIO_OS	os_android
-
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-#include <sys/uio.h>
-#include <sys/syscall.h>
-#include <sys/sysmacros.h>
-#include <sys/vfs.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <sched.h>
-#include <linux/unistd.h>
-#include <linux/major.h>
-#include <asm/byteorder.h>
-
-#include "./os-linux-syscall.h"
-#include "../file.h"
-
-#ifndef __has_builtin         // Optional of course.
-  #define __has_builtin(x) 0  // Compatibility with non-clang compilers.
-#endif
-
-#define FIO_HAVE_CPU_AFFINITY
-#define FIO_HAVE_DISK_UTIL
-#define FIO_HAVE_IOSCHED_SWITCH
-#define FIO_HAVE_IOPRIO
-#define FIO_HAVE_IOPRIO_CLASS
-#define FIO_HAVE_ODIRECT
-#define FIO_HAVE_HUGETLB
-#define FIO_HAVE_BLKTRACE
-#define FIO_HAVE_CL_SIZE
-#define FIO_HAVE_CGROUPS
-#define FIO_HAVE_FS_STAT
-#define FIO_HAVE_TRIM
-#define FIO_HAVE_GETTID
-#define FIO_USE_GENERIC_INIT_RANDOM_STATE
-#define FIO_HAVE_E4_ENG
-#define FIO_HAVE_BYTEORDER_FUNCS
-#define FIO_HAVE_MMAP_HUGE
-#define FIO_NO_HAVE_SHM_H
-
-#define OS_MAP_ANON		MAP_ANONYMOUS
-
-typedef cpu_set_t os_cpu_mask_t;
-
-#define fio_setaffinity(pid, cpumask)		\
-	sched_setaffinity((pid), sizeof(cpumask), &(cpumask))
-#define fio_getaffinity(pid, ptr)	\
-	sched_getaffinity((pid), sizeof(cpu_set_t), (ptr))
-
-#ifndef POSIX_MADV_DONTNEED
-#define posix_madvise   madvise
-#define POSIX_MADV_DONTNEED MADV_DONTNEED
-#define POSIX_MADV_SEQUENTIAL	MADV_SEQUENTIAL
-#define POSIX_MADV_RANDOM	MADV_RANDOM
-#endif
-
-#ifdef MADV_REMOVE
-#define FIO_MADV_FREE	MADV_REMOVE
-#endif
-#ifndef MAP_HUGETLB
-#define MAP_HUGETLB 0x40000 /* arch specific */
-#endif
-
-#ifdef CONFIG_PTHREAD_GETAFFINITY
-#define FIO_HAVE_GET_THREAD_AFFINITY
-#define fio_get_thread_affinity(mask)	\
-	pthread_getaffinity_np(pthread_self(), sizeof(mask), &(mask))
-#endif
-
-#define fio_cpu_clear(mask, cpu)	CPU_CLR((cpu), (mask))
-#define fio_cpu_set(mask, cpu)		CPU_SET((cpu), (mask))
-#define fio_cpu_isset(mask, cpu)	(CPU_ISSET((cpu), (mask)) != 0)
-#define fio_cpu_count(mask)		CPU_COUNT((mask))
-
-static inline int fio_cpuset_init(os_cpu_mask_t *mask)
-{
-	CPU_ZERO(mask);
-	return 0;
-}
-
-static inline int fio_cpuset_exit(os_cpu_mask_t *mask)
-{
-	return 0;
-}
-
-#define FIO_MAX_CPUS			CPU_SETSIZE
-
-#ifndef CONFIG_NO_SHM
-/*
- * Bionic doesn't support SysV shared memory, so implement it using ashmem
- */
-#include <stdio.h>
-#include <linux/ashmem.h>
-#include <linux/shm.h>
-#include <android/api-level.h>
-#if __ANDROID_API__ >= __ANDROID_API_O__
-#include <android/sharedmem.h>
-#else
-#define ASHMEM_DEVICE	"/dev/ashmem"
-#endif
-#define shmid_ds shmid64_ds
-#define SHM_HUGETLB    04000
-
-static inline int shmctl(int __shmid, int __cmd, struct shmid_ds *__buf)
-{
-	int ret=0;
-	if (__cmd == IPC_RMID)
-	{
-		int length = ioctl(__shmid, ASHMEM_GET_SIZE, NULL);
-		struct ashmem_pin pin = {0 , length};
-		ret = ioctl(__shmid, ASHMEM_UNPIN, &pin);
-		close(__shmid);
-	}
-	return ret;
-}
-
-#if __ANDROID_API__ >= __ANDROID_API_O__
-static inline int shmget(key_t __key, size_t __size, int __shmflg)
-{
-	char keybuf[11];
-
-	sprintf(keybuf, "%d", __key);
-
-	return ASharedMemory_create(keybuf, __size + sizeof(uint64_t));
-}
-#else
-static inline int shmget(key_t __key, size_t __size, int __shmflg)
-{
-	int fd,ret;
-	char keybuf[11];
-
-	fd = open(ASHMEM_DEVICE, O_RDWR);
-	if (fd < 0)
-		return fd;
-
-	sprintf(keybuf,"%d",__key);
-	ret = ioctl(fd, ASHMEM_SET_NAME, keybuf);
-	if (ret < 0)
-		goto error;
-
-	/* Stores size in first 8 bytes, allocate extra space */
-	ret = ioctl(fd, ASHMEM_SET_SIZE, __size + sizeof(uint64_t));
-	if (ret < 0)
-		goto error;
-
-	return fd;
-
-error:
-	close(fd);
-	return ret;
-}
-#endif
-
-static inline void *shmat(int __shmid, const void *__shmaddr, int __shmflg)
-{
-	size_t size = ioctl(__shmid, ASHMEM_GET_SIZE, NULL);
-	/* Needs to be 8-byte aligned to prevent SIGBUS on 32-bit ARM */
-	uint64_t *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, __shmid, 0);
-	/* Save size at beginning of buffer, for use with munmap */
-	*ptr = size;
-	return ptr + 1;
-}
-
-static inline int shmdt (const void *__shmaddr)
-{
-	/* Find mmap size which we stored at the beginning of the buffer */
-	uint64_t *ptr = (uint64_t *)__shmaddr - 1;
-	size_t size = *ptr;
-	return munmap(ptr, size);
-}
-#endif
-
-#define SPLICE_DEF_SIZE	(64*1024)
-
-enum {
-	IOPRIO_CLASS_NONE,
-	IOPRIO_CLASS_RT,
-	IOPRIO_CLASS_BE,
-	IOPRIO_CLASS_IDLE,
-};
-
-enum {
-	IOPRIO_WHO_PROCESS = 1,
-	IOPRIO_WHO_PGRP,
-	IOPRIO_WHO_USER,
-};
-
-#define IOPRIO_BITS		16
-#define IOPRIO_CLASS_SHIFT	13
-
-#define IOPRIO_MIN_PRIO		0	/* highest priority */
-#define IOPRIO_MAX_PRIO		7	/* lowest priority */
-
-#define IOPRIO_MIN_PRIO_CLASS	0
-#define IOPRIO_MAX_PRIO_CLASS	3
-
-static inline int ioprio_value(int ioprio_class, int ioprio)
-{
-	/*
-	 * If no class is set, assume BE
-	 */
-        if (!ioprio_class)
-                ioprio_class = IOPRIO_CLASS_BE;
-
-	return (ioprio_class << IOPRIO_CLASS_SHIFT) | ioprio;
-}
-
-static inline bool ioprio_value_is_class_rt(unsigned int priority)
-{
-	return (priority >> IOPRIO_CLASS_SHIFT) == IOPRIO_CLASS_RT;
-}
-
-static inline int ioprio_set(int which, int who, int ioprio_class, int ioprio)
-{
-	return syscall(__NR_ioprio_set, which, who,
-		       ioprio_value(ioprio_class, ioprio));
-}
-
-#ifndef BLKGETSIZE64
-#define BLKGETSIZE64	_IOR(0x12,114,size_t)
-#endif
-
-#ifndef BLKFLSBUF
-#define BLKFLSBUF	_IO(0x12,97)
-#endif
-
-#ifndef BLKDISCARD
-#define BLKDISCARD	_IO(0x12,119)
-#endif
-
-static inline int blockdev_invalidate_cache(struct fio_file *f)
-{
-	return ioctl(f->fd, BLKFLSBUF);
-}
-
-static inline int blockdev_size(struct fio_file *f, unsigned long long *bytes)
-{
-	if (!ioctl(f->fd, BLKGETSIZE64, bytes))
-		return 0;
-
-	return errno;
-}
-
-static inline unsigned long long os_phys_mem(void)
-{
-	long pagesize, pages;
-
-	pagesize = sysconf(_SC_PAGESIZE);
-	pages = sysconf(_SC_PHYS_PAGES);
-	if (pages == -1 || pagesize == -1)
-		return 0;
-
-	return (unsigned long long) pages * (unsigned long long) pagesize;
-}
-
-#ifdef O_NOATIME
-#define FIO_O_NOATIME	O_NOATIME
-#else
-#define FIO_O_NOATIME	0
-#endif
-
-/* Check for GCC or Clang byte swap intrinsics */
-#if (__has_builtin(__builtin_bswap16) && __has_builtin(__builtin_bswap32) \
-     && __has_builtin(__builtin_bswap64)) || (__GNUC__ > 4 \
-     || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) /* fio_swapN */
-#define fio_swap16(x)	__builtin_bswap16(x)
-#define fio_swap32(x)	__builtin_bswap32(x)
-#define fio_swap64(x)	__builtin_bswap64(x)
-#else
-#include <byteswap.h>
-#define fio_swap16(x)	bswap_16(x)
-#define fio_swap32(x)	bswap_32(x)
-#define fio_swap64(x)	bswap_64(x)
-#endif /* fio_swapN */
-
-#define CACHE_LINE_FILE	\
-	"/sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size"
-
-static inline int arch_cache_line_size(void)
-{
-	char size[32];
-	int fd, ret;
-
-	fd = open(CACHE_LINE_FILE, O_RDONLY);
-	if (fd < 0)
-		return -1;
-
-	ret = read(fd, size, sizeof(size));
-
-	close(fd);
-
-	if (ret <= 0)
-		return -1;
-	else
-		return atoi(size);
-}
-
-static inline unsigned long long get_fs_free_size(const char *path)
-{
-	unsigned long long ret;
-	struct statfs s;
-
-	if (statfs(path, &s) < 0)
-		return -1ULL;
-
-	ret = s.f_bsize;
-	ret *= (unsigned long long) s.f_bfree;
-	return ret;
-}
-
-static inline int os_trim(struct fio_file *f, unsigned long long start,
-			  unsigned long long len)
-{
-	uint64_t range[2];
-
-	range[0] = start;
-	range[1] = len;
-
-	if (!ioctl(f->fd, BLKDISCARD, range))
-		return 0;
-
-	return errno;
-}
-
-#ifdef CONFIG_SCHED_IDLE
-static inline int fio_set_sched_idle(void)
-{
-        struct sched_param p = { .sched_priority = 0, };
-        return sched_setscheduler(gettid(), SCHED_IDLE, &p);
-}
-#endif
-
-#ifndef RWF_UNCACHED
-#define RWF_UNCACHED	0x00000040
-#endif
-
-#endif
diff --git a/os/os-ashmem.h b/os/os-ashmem.h
new file mode 100644
index 00000000..c34ff656
--- /dev/null
+++ b/os/os-ashmem.h
@@ -0,0 +1,84 @@
+#ifndef CONFIG_NO_SHM
+/*
+ * Bionic doesn't support SysV shared memory, so implement it using ashmem
+ */
+#include <stdio.h>
+#include <linux/ashmem.h>
+#include <linux/shm.h>
+#include <android/api-level.h>
+#if __ANDROID_API__ >= __ANDROID_API_O__
+#include <android/sharedmem.h>
+#else
+#define ASHMEM_DEVICE	"/dev/ashmem"
+#endif
+#define shmid_ds shmid64_ds
+#define SHM_HUGETLB    04000
+
+static inline int shmctl(int __shmid, int __cmd, struct shmid_ds *__buf)
+{
+	int ret=0;
+	if (__cmd == IPC_RMID)
+	{
+		int length = ioctl(__shmid, ASHMEM_GET_SIZE, NULL);
+		struct ashmem_pin pin = {0 , length};
+		ret = ioctl(__shmid, ASHMEM_UNPIN, &pin);
+		close(__shmid);
+	}
+	return ret;
+}
+
+#if __ANDROID_API__ >= __ANDROID_API_O__
+static inline int shmget(key_t __key, size_t __size, int __shmflg)
+{
+	char keybuf[11];
+
+	sprintf(keybuf, "%d", __key);
+
+	return ASharedMemory_create(keybuf, __size + sizeof(uint64_t));
+}
+#else
+static inline int shmget(key_t __key, size_t __size, int __shmflg)
+{
+	int fd,ret;
+	char keybuf[11];
+
+	fd = open(ASHMEM_DEVICE, O_RDWR);
+	if (fd < 0)
+		return fd;
+
+	sprintf(keybuf,"%d",__key);
+	ret = ioctl(fd, ASHMEM_SET_NAME, keybuf);
+	if (ret < 0)
+		goto error;
+
+	/* Stores size in first 8 bytes, allocate extra space */
+	ret = ioctl(fd, ASHMEM_SET_SIZE, __size + sizeof(uint64_t));
+	if (ret < 0)
+		goto error;
+
+	return fd;
+
+error:
+	close(fd);
+	return ret;
+}
+#endif
+
+static inline void *shmat(int __shmid, const void *__shmaddr, int __shmflg)
+{
+	size_t size = ioctl(__shmid, ASHMEM_GET_SIZE, NULL);
+	/* Needs to be 8-byte aligned to prevent SIGBUS on 32-bit ARM */
+	uint64_t *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, __shmid, 0);
+	/* Save size at beginning of buffer, for use with munmap */
+	*ptr = size;
+	return ptr + 1;
+}
+
+static inline int shmdt (const void *__shmaddr)
+{
+	/* Find mmap size which we stored at the beginning of the buffer */
+	uint64_t *ptr = (uint64_t *)__shmaddr - 1;
+	size_t size = *ptr;
+	return munmap(ptr, size);
+}
+#endif
diff --git a/os/os-linux.h b/os/os-linux.h
index 3001140c..831f0ad0 100644
--- a/os/os-linux.h
+++ b/os/os-linux.h
@@ -1,7 +1,11 @@
 #ifndef FIO_OS_LINUX_H
 #define FIO_OS_LINUX_H
 
+#ifdef __ANDROID__
+#define FIO_OS  os_android
+#else
 #define	FIO_OS	os_linux
+#endif
 
 #include <sys/ioctl.h>
 #include <sys/uio.h>
@@ -17,6 +21,11 @@
 #include <linux/major.h>
 #include <linux/fs.h>
 #include <scsi/sg.h>
+#include <asm/byteorder.h>
+#ifdef __ANDROID__
+#include "os-ashmem.h"
+#define FIO_NO_HAVE_SHM_H
+#endif
 
 #ifdef ARCH_HAVE_CRC_CRYPTO
 #include <sys/auxv.h>
@@ -50,6 +59,7 @@
 #define FIO_HAVE_TRIM
 #define FIO_HAVE_GETTID
 #define FIO_USE_GENERIC_INIT_RANDOM_STATE
+#define FIO_HAVE_BYTEORDER_FUNCS
 #define FIO_HAVE_PWRITEV2
 #define FIO_HAVE_SHM_ATTACH_REMOVED
 
@@ -81,8 +91,8 @@ typedef cpu_set_t os_cpu_mask_t;
 	pthread_getaffinity_np(pthread_self(), sizeof(mask), &(mask))
 #endif
 
-#define fio_cpu_clear(mask, cpu)	(void) CPU_CLR((cpu), (mask))
-#define fio_cpu_set(mask, cpu)		(void) CPU_SET((cpu), (mask))
+#define fio_cpu_clear(mask, cpu)	CPU_CLR((cpu), (mask))
+#define fio_cpu_set(mask, cpu)		CPU_SET((cpu), (mask))
 #define fio_cpu_isset(mask, cpu)	(CPU_ISSET((cpu), (mask)) != 0)
 #define fio_cpu_count(mask)		CPU_COUNT((mask))
 
diff --git a/os/os.h b/os/os.h
index 810e6166..aba6813f 100644
--- a/os/os.h
+++ b/os/os.h
@@ -33,9 +33,7 @@ typedef enum {
 } cpu_features;
 
 /* IWYU pragma: begin_exports */
-#if defined(__ANDROID__)
-#include "os-android.h"
-#elif defined(__linux__)
+#if defined(__linux__)
 #include "os-linux.h"
 #elif defined(__FreeBSD__)
 #include "os-freebsd.h"
diff --git a/t/jobs/t0021.fio b/t/jobs/t0021.fio
new file mode 100644
index 00000000..47fbae71
--- /dev/null
+++ b/t/jobs/t0021.fio
@@ -0,0 +1,15 @@
+# make sure the lfsr random generator actually does touch all the offsets
+#
+# Expected result: offsets are not accessed sequentially and all offsets are touched
+# Buggy result: offsets are accessed sequentially and one or more offsets are missed
+# run with --debug=io or logging to see which offsets are read
+
+[test]
+ioengine=null
+filesize=1M
+rw=randread
+write_bw_log=test
+per_job_logs=0
+log_offset=1
+norandommap=1
+random_generator=lfsr
diff --git a/t/jobs/t0022.fio b/t/jobs/t0022.fio
new file mode 100644
index 00000000..2324571e
--- /dev/null
+++ b/t/jobs/t0022.fio
@@ -0,0 +1,13 @@
+# make sure that when we enable norandommap we touch some offsets more than once
+#
+# Expected result: at least one offset is touched more than once
+# Buggy result: each offset is touched only once
+
+[test]
+ioengine=null
+filesize=1M
+rw=randread
+write_bw_log=test
+per_job_logs=0
+log_offset=1
+norandommap=1
diff --git a/t/run-fio-tests.py b/t/run-fio-tests.py
index 78f43521..47823761 100755
--- a/t/run-fio-tests.py
+++ b/t/run-fio-tests.py
@@ -576,7 +576,7 @@ class FioJobTest_t0019(FioJobTest):
 
 
 class FioJobTest_t0020(FioJobTest):
-    """Test consists of fio test job t0020
+    """Test consists of fio test jobs t0020 and t0021
     Confirm that almost all offsets were touched non-sequentially"""
 
     def check_result(self):
@@ -614,6 +614,41 @@ class FioJobTest_t0020(FioJobTest):
                 self.failure_reason += " missing offset {0}".format(i*4096)
 
 
+class FioJobTest_t0022(FioJobTest):
+    """Test consists of fio test job t0022"""
+
+    def check_result(self):
+        super(FioJobTest_t0022, self).check_result()
+
+        bw_log_filename = os.path.join(self.test_dir, "test_bw.log")
+        file_data, success = self.get_file(bw_log_filename)
+        log_lines = file_data.split('\n')
+
+        filesize = 1024*1024
+        bs = 4096
+        seq_count = 0
+        offsets = set()
+
+        prev = int(log_lines[0].split(',')[4])
+        for line in log_lines[1:]:
+            offsets.add(prev/bs)
+            if len(line.strip()) == 0:
+                continue
+            cur = int(line.split(',')[4])
+            if cur - prev == bs:
+                seq_count += 1
+            prev = cur
+
+        # 10 is an arbitrary threshold
+        if seq_count > 10:
+            self.passed = False
+            self.failure_reason = "too many ({0}) consecutive offsets".format(seq_count)
+
+        if len(offsets) == filesize/bs:
+            self.passed = False
+            self.failure_reason += " no duplicate offsets found with norandommap=1".format(len(offsets))
+
+
 class FioJobTest_iops_rate(FioJobTest):
     """Test consists of fio test job t0009
     Confirm that job0 iops == 1000
@@ -973,6 +1008,24 @@ TEST_LIST = [
         'pre_success':      None,
         'requirements':     [],
     },
+    {
+        'test_id':          21,
+        'test_class':       FioJobTest_t0020,
+        'job':              't0021.fio',
+        'success':          SUCCESS_DEFAULT,
+        'pre_job':          None,
+        'pre_success':      None,
+        'requirements':     [],
+    },
+    {
+        'test_id':          22,
+        'test_class':       FioJobTest_t0022,
+        'job':              't0022.fio',
+        'success':          SUCCESS_DEFAULT,
+        'pre_job':          None,
+        'pre_success':      None,
+        'requirements':     [],
+    },
     {
         'test_id':          1000,
         'test_class':       FioExeTest,

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-08-30 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-08-30 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit a5a2429ece9b2a7e35e2b8a0248e7b1de6d075c3:

  t/io_uring: remove duplicate definition of gettid() (2022-08-26 14:17:40 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to b68ba328173f5a4714d888f6ce80fd24a4e4c504:

  test: get 32-bit Ubuntu 22.04 build working (2022-08-29 16:42:18 -0400)

----------------------------------------------------------------
Vincent Fu (3):
      test: add some tests for seq and rand offsets
      test: use Ubuntu 22.04 for 64-bit tests
      test: get 32-bit Ubuntu 22.04 build working

 .github/workflows/ci.yml |  8 ++---
 ci/actions-install.sh    | 13 ++++----
 t/jobs/t0019.fio         | 10 ++++++
 t/jobs/t0020.fio         | 11 +++++++
 t/run-fio-tests.py       | 84 ++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 115 insertions(+), 11 deletions(-)
 create mode 100644 t/jobs/t0019.fio
 create mode 100644 t/jobs/t0020.fio

---

Diff of recent changes:

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 650366b2..bdc4db85 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -18,18 +18,18 @@ jobs:
         - android
         include:
         - build: linux-gcc
-          os: ubuntu-20.04
+          os: ubuntu-22.04
           cc: gcc
         - build: linux-clang
-          os: ubuntu-20.04
+          os: ubuntu-22.04
           cc: clang
         - build: macos
           os: macos-11
         - build: linux-i686-gcc
-          os: ubuntu-20.04
+          os: ubuntu-22.04
           arch: i686
         - build: android
-          os: ubuntu-20.04
+          os: ubuntu-22.04
           arch: aarch64-linux-android32
 
     env:
diff --git a/ci/actions-install.sh b/ci/actions-install.sh
index b5c4198f..c209a089 100755
--- a/ci/actions-install.sh
+++ b/ci/actions-install.sh
@@ -23,26 +23,21 @@ DPKGCFG
         libcunit1-dev
         libcurl4-openssl-dev
         libfl-dev
-        libibverbs-dev
         libnuma-dev
-        librdmacm-dev
 	libnfs-dev
         valgrind
     )
     case "${CI_TARGET_ARCH}" in
         "i686")
             sudo dpkg --add-architecture i386
-            opts="--allow-downgrades"
             pkgs=("${pkgs[@]/%/:i386}")
             pkgs+=(
                 gcc-multilib
                 pkg-config:i386
                 zlib1g-dev:i386
-		libpcre2-8-0=10.34-7
             )
             ;;
         "x86_64")
-            opts=""
             pkgs+=(
                 libglusterfs-dev
                 libgoogle-perftools-dev
@@ -53,7 +48,11 @@ DPKGCFG
                 librbd-dev
                 libtcmalloc-minimal4
                 nvidia-cuda-dev
+                libibverbs-dev
+                librdmacm-dev
             )
+	    echo "Removing libunwind-14-dev because of conflicts with libunwind-dev"
+	    sudo apt remove -y libunwind-14-dev
             ;;
     esac
 
@@ -66,8 +65,8 @@ DPKGCFG
 
     echo "Updating APT..."
     sudo apt-get -qq update
-    echo "Installing packages..."
-    sudo apt-get install "$opts" -o APT::Immediate-Configure=false --no-install-recommends -qq -y "${pkgs[@]}"
+    echo "Installing packages... ${pkgs[@]}"
+    sudo apt-get install -o APT::Immediate-Configure=false --no-install-recommends -qq -y "${pkgs[@]}"
 }
 
 install_linux() {
diff --git a/t/jobs/t0019.fio b/t/jobs/t0019.fio
new file mode 100644
index 00000000..b60d27d2
--- /dev/null
+++ b/t/jobs/t0019.fio
@@ -0,0 +1,10 @@
+# Expected result: offsets are accessed sequentially and all offsets are read
+# Buggy result: offsets are not accessed sequentially and one or more offsets are missed
+# run with --debug=io or logging to see which offsets are accessed
+
+[test]
+ioengine=null
+filesize=1M
+write_bw_log=test
+per_job_logs=0
+log_offset=1
diff --git a/t/jobs/t0020.fio b/t/jobs/t0020.fio
new file mode 100644
index 00000000..1c1c5166
--- /dev/null
+++ b/t/jobs/t0020.fio
@@ -0,0 +1,11 @@
+# Expected result: offsets are not accessed sequentially and all offsets are touched
+# Buggy result: offsets are accessed sequentially and one or more offsets are missed
+# run with --debug=io or logging to see which offsets are read
+
+[test]
+ioengine=null
+filesize=1M
+rw=randread
+write_bw_log=test
+per_job_logs=0
+log_offset=1
diff --git a/t/run-fio-tests.py b/t/run-fio-tests.py
index 1e5e9f24..78f43521 100755
--- a/t/run-fio-tests.py
+++ b/t/run-fio-tests.py
@@ -548,6 +548,72 @@ class FioJobTest_t0015(FioJobTest):
             self.passed = False
 
 
+class FioJobTest_t0019(FioJobTest):
+    """Test consists of fio test job t0019
+    Confirm that all offsets were touched sequentially"""
+
+    def check_result(self):
+        super(FioJobTest_t0019, self).check_result()
+
+        bw_log_filename = os.path.join(self.test_dir, "test_bw.log")
+        file_data, success = self.get_file(bw_log_filename)
+        log_lines = file_data.split('\n')
+
+        prev = -4096
+        for line in log_lines:
+            if len(line.strip()) == 0:
+                continue
+            cur = int(line.split(',')[4])
+            if cur - prev != 4096:
+                self.passed = False
+                self.failure_reason = "offsets {0}, {1} not sequential".format(prev, cur)
+                return
+            prev = cur
+
+        if cur/4096 != 255:
+            self.passed = False
+            self.failure_reason = "unexpected last offset {0}".format(cur)
+
+
+class FioJobTest_t0020(FioJobTest):
+    """Test consists of fio test job t0020
+    Confirm that almost all offsets were touched non-sequentially"""
+
+    def check_result(self):
+        super(FioJobTest_t0020, self).check_result()
+
+        bw_log_filename = os.path.join(self.test_dir, "test_bw.log")
+        file_data, success = self.get_file(bw_log_filename)
+        log_lines = file_data.split('\n')
+
+        seq_count = 0
+        offsets = set()
+
+        prev = int(log_lines[0].split(',')[4])
+        for line in log_lines[1:]:
+            offsets.add(prev/4096)
+            if len(line.strip()) == 0:
+                continue
+            cur = int(line.split(',')[4])
+            if cur - prev == 4096:
+                seq_count += 1
+            prev = cur
+
+        # 10 is an arbitrary threshold
+        if seq_count > 10:
+            self.passed = False
+            self.failure_reason = "too many ({0}) consecutive offsets".format(seq_count)
+
+        if len(offsets) != 256:
+            self.passed = False
+            self.failure_reason += " number of offsets is {0} instead of 256".format(len(offsets))
+
+        for i in range(256):
+            if not i in offsets:
+                self.passed = False
+                self.failure_reason += " missing offset {0}".format(i*4096)
+
+
 class FioJobTest_iops_rate(FioJobTest):
     """Test consists of fio test job t0009
     Confirm that job0 iops == 1000
@@ -889,6 +955,24 @@ TEST_LIST = [
         'pre_success':      None,
         'requirements':     [Requirements.linux, Requirements.io_uring],
     },
+    {
+        'test_id':          19,
+        'test_class':       FioJobTest_t0019,
+        'job':              't0019.fio',
+        'success':          SUCCESS_DEFAULT,
+        'pre_job':          None,
+        'pre_success':      None,
+        'requirements':     [],
+    },
+    {
+        'test_id':          20,
+        'test_class':       FioJobTest_t0020,
+        'job':              't0020.fio',
+        'success':          SUCCESS_DEFAULT,
+        'pre_job':          None,
+        'pre_success':      None,
+        'requirements':     [],
+    },
     {
         'test_id':          1000,
         'test_class':       FioExeTest,

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-08-27 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-08-27 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit c27ae7ae6c3d9108bba80ff71cf36bf7fc8b34c9:

  engines/io_uring: delete debug code (2022-08-25 11:19:34 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to a5a2429ece9b2a7e35e2b8a0248e7b1de6d075c3:

  t/io_uring: remove duplicate definition of gettid() (2022-08-26 14:17:40 -0600)

----------------------------------------------------------------
Anuj Gupta (2):
      t/io_uring: prep for including engines/nvme.h in t/io_uring
      t/io_uring: add support for async-passthru

Jens Axboe (2):
      t/io_uring: fix 64-bit cast on 32-bit archs
      t/io_uring: remove duplicate definition of gettid()

Vincent Fu (1):
      test: add basic test for io_uring ioengine

 t/io_uring.c       | 264 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 t/jobs/t0018.fio   |   9 ++
 t/run-fio-tests.py |  22 +++++
 3 files changed, 271 insertions(+), 24 deletions(-)
 create mode 100644 t/jobs/t0018.fio

---

Diff of recent changes:

diff --git a/t/io_uring.c b/t/io_uring.c
index f34a3554..e8e41796 100644
--- a/t/io_uring.c
+++ b/t/io_uring.c
@@ -30,11 +30,13 @@
 #include <sched.h>
 
 #include "../arch/arch.h"
+#include "../os/os.h"
 #include "../lib/types.h"
 #include "../lib/roundup.h"
 #include "../lib/rand.h"
 #include "../minmax.h"
 #include "../os/linux/io_uring.h"
+#include "../engines/nvme.h"
 
 struct io_sq_ring {
 	unsigned *head;
@@ -67,6 +69,8 @@ struct file {
 	unsigned long max_size;
 	unsigned long cur_off;
 	unsigned pending_ios;
+	unsigned int nsid;	/* nsid field required for nvme-passthrough */
+	unsigned int lba_shift;	/* lba_shift field required for nvme-passthrough */
 	int real_fd;
 	int fixed_fd;
 	int fileno;
@@ -117,7 +121,7 @@ static struct submitter *submitter;
 static volatile int finish;
 static int stats_running;
 static unsigned long max_iops;
-static long page_size;
+static long t_io_uring_page_size;
 
 static int depth = DEPTH;
 static int batch_submit = BATCH_SUBMIT;
@@ -139,6 +143,7 @@ static int random_io = 1;	/* random or sequential IO */
 static int register_ring = 1;	/* register ring */
 static int use_sync = 0;	/* use preadv2 */
 static int numa_placement = 0;	/* set to node of device */
+static int pt = 0;		/* passthrough I/O or not */
 
 static unsigned long tsc_rate;
 
@@ -161,6 +166,54 @@ struct io_uring_map_buffers {
 };
 #endif
 
+static int nvme_identify(int fd, __u32 nsid, enum nvme_identify_cns cns,
+			 enum nvme_csi csi, void *data)
+{
+	struct nvme_passthru_cmd cmd = {
+		.opcode         = nvme_admin_identify,
+		.nsid           = nsid,
+		.addr           = (__u64)(uintptr_t)data,
+		.data_len       = NVME_IDENTIFY_DATA_SIZE,
+		.cdw10          = cns,
+		.cdw11          = csi << NVME_IDENTIFY_CSI_SHIFT,
+		.timeout_ms     = NVME_DEFAULT_IOCTL_TIMEOUT,
+	};
+
+	return ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd);
+}
+
+static int nvme_get_info(int fd, __u32 *nsid, __u32 *lba_sz, __u64 *nlba)
+{
+	struct nvme_id_ns ns;
+	int namespace_id;
+	int err;
+
+	namespace_id = ioctl(fd, NVME_IOCTL_ID);
+	if (namespace_id < 0) {
+		fprintf(stderr, "error failed to fetch namespace-id\n");
+		close(fd);
+		return -errno;
+	}
+
+	/*
+	 * Identify namespace to get namespace-id, namespace size in LBA's
+	 * and LBA data size.
+	 */
+	err = nvme_identify(fd, namespace_id, NVME_IDENTIFY_CNS_NS,
+				NVME_CSI_NVM, &ns);
+	if (err) {
+		fprintf(stderr, "error failed to fetch identify namespace\n");
+		close(fd);
+		return err;
+	}
+
+	*nsid = namespace_id;
+	*lba_sz = 1 << ns.lbaf[(ns.flbas & 0x0f)].ds;
+	*nlba = ns.nsze;
+
+	return 0;
+}
+
 static unsigned long cycles_to_nsec(unsigned long cycles)
 {
 	uint64_t val;
@@ -195,9 +248,9 @@ static unsigned long plat_idx_to_val(unsigned int idx)
 	return cycles_to_nsec(base + ((k + 0.5) * (1 << error_bits)));
 }
 
-unsigned int calc_clat_percentiles(unsigned long *io_u_plat, unsigned long nr,
-				   unsigned long **output,
-				   unsigned long *maxv, unsigned long *minv)
+unsigned int calculate_clat_percentiles(unsigned long *io_u_plat,
+		unsigned long nr, unsigned long **output,
+		unsigned long *maxv, unsigned long *minv)
 {
 	unsigned long sum = 0;
 	unsigned int len = plist_len, i, j = 0;
@@ -251,7 +304,7 @@ static void show_clat_percentiles(unsigned long *io_u_plat, unsigned long nr,
 	bool is_last;
 	char fmt[32];
 
-	len = calc_clat_percentiles(io_u_plat, nr, &ovals, &maxv, &minv);
+	len = calculate_clat_percentiles(io_u_plat, nr, &ovals, &maxv, &minv);
 	if (!len || !ovals)
 		goto out;
 
@@ -443,13 +496,6 @@ static int io_uring_enter(struct submitter *s, unsigned int to_submit,
 #endif
 }
 
-#ifndef CONFIG_HAVE_GETTID
-static int gettid(void)
-{
-	return syscall(__NR_gettid);
-}
-#endif
-
 static unsigned file_depth(struct submitter *s)
 {
 	return (depth + s->nr_files - 1) / s->nr_files;
@@ -520,6 +566,65 @@ static void init_io(struct submitter *s, unsigned index)
 		sqe->user_data |= ((uint64_t)s->clock_index << 32);
 }
 
+static void init_io_pt(struct submitter *s, unsigned index)
+{
+	struct io_uring_sqe *sqe = &s->sqes[index << 1];
+	unsigned long offset;
+	struct file *f;
+	struct nvme_uring_cmd *cmd;
+	unsigned long long slba;
+	unsigned long long nlb;
+	long r;
+
+	if (s->nr_files == 1) {
+		f = &s->files[0];
+	} else {
+		f = &s->files[s->cur_file];
+		if (f->pending_ios >= file_depth(s)) {
+			s->cur_file++;
+			if (s->cur_file == s->nr_files)
+				s->cur_file = 0;
+			f = &s->files[s->cur_file];
+		}
+	}
+	f->pending_ios++;
+
+	if (random_io) {
+		r = __rand64(&s->rand_state);
+		offset = (r % (f->max_blocks - 1)) * bs;
+	} else {
+		offset = f->cur_off;
+		f->cur_off += bs;
+		if (f->cur_off + bs > f->max_size)
+			f->cur_off = 0;
+	}
+
+	if (register_files) {
+		sqe->fd = f->fixed_fd;
+		sqe->flags = IOSQE_FIXED_FILE;
+	} else {
+		sqe->fd = f->real_fd;
+		sqe->flags = 0;
+	}
+	sqe->opcode = IORING_OP_URING_CMD;
+	sqe->user_data = (unsigned long) f->fileno;
+	if (stats)
+		sqe->user_data |= ((__u64) s->clock_index << 32ULL);
+	sqe->cmd_op = NVME_URING_CMD_IO;
+	slba = offset >> f->lba_shift;
+	nlb = (bs >> f->lba_shift) - 1;
+	cmd = (struct nvme_uring_cmd *)&sqe->cmd;
+	/* cdw10 and cdw11 represent starting slba*/
+	cmd->cdw10 = slba & 0xffffffff;
+	cmd->cdw11 = slba >> 32;
+	/* cdw12 represent number of lba to be read*/
+	cmd->cdw12 = nlb;
+	cmd->addr = (unsigned long) s->iovecs[index].iov_base;
+	cmd->data_len = bs;
+	cmd->nsid = f->nsid;
+	cmd->opcode = 2;
+}
+
 static int prep_more_ios_uring(struct submitter *s, int max_ios)
 {
 	struct io_sq_ring *ring = &s->sq_ring;
@@ -532,7 +637,10 @@ static int prep_more_ios_uring(struct submitter *s, int max_ios)
 			break;
 
 		index = tail & sq_ring_mask;
-		init_io(s, index);
+		if (pt)
+			init_io_pt(s, index);
+		else
+			init_io(s, index);
 		ring->array[index] = index;
 		prepped++;
 		tail = next_tail;
@@ -549,7 +657,29 @@ static int get_file_size(struct file *f)
 
 	if (fstat(f->real_fd, &st) < 0)
 		return -1;
-	if (S_ISBLK(st.st_mode)) {
+	if (pt) {
+		__u64 nlba;
+		__u32 lbs;
+		int ret;
+
+		if (!S_ISCHR(st.st_mode)) {
+			fprintf(stderr, "passthrough works with only nvme-ns "
+					"generic devices (/dev/ngXnY)\n");
+			return -1;
+		}
+		ret = nvme_get_info(f->real_fd, &f->nsid, &lbs, &nlba);
+		if (ret)
+			return -1;
+		if ((bs % lbs) != 0) {
+			printf("error: bs:%d should be a multiple logical_block_size:%d\n",
+					bs, lbs);
+			return -1;
+		}
+		f->max_blocks = nlba / bs;
+		f->max_size = nlba;
+		f->lba_shift = ilog2(lbs);
+		return 0;
+	} else if (S_ISBLK(st.st_mode)) {
 		unsigned long long bytes;
 
 		if (ioctl(f->real_fd, BLKGETSIZE64, &bytes) != 0)
@@ -620,6 +750,60 @@ static int reap_events_uring(struct submitter *s)
 	return reaped;
 }
 
+static int reap_events_uring_pt(struct submitter *s)
+{
+	struct io_cq_ring *ring = &s->cq_ring;
+	struct io_uring_cqe *cqe;
+	unsigned head, reaped = 0;
+	int last_idx = -1, stat_nr = 0;
+	unsigned index;
+	int fileno;
+
+	head = *ring->head;
+	do {
+		struct file *f;
+
+		read_barrier();
+		if (head == atomic_load_acquire(ring->tail))
+			break;
+		index = head & cq_ring_mask;
+		cqe = &ring->cqes[index << 1];
+		fileno = cqe->user_data & 0xffffffff;
+		f = &s->files[fileno];
+		f->pending_ios--;
+
+		if (cqe->res != 0) {
+			printf("io: unexpected ret=%d\n", cqe->res);
+			if (polled && cqe->res == -EINVAL)
+				printf("passthrough doesn't support polled IO\n");
+			return -1;
+		}
+		if (stats) {
+			int clock_index = cqe->user_data >> 32;
+
+			if (last_idx != clock_index) {
+				if (last_idx != -1) {
+					add_stat(s, last_idx, stat_nr);
+					stat_nr = 0;
+				}
+				last_idx = clock_index;
+			}
+			stat_nr++;
+		}
+		reaped++;
+		head++;
+	} while (1);
+
+	if (stat_nr)
+		add_stat(s, last_idx, stat_nr);
+
+	if (reaped) {
+		s->inflight -= reaped;
+		atomic_store_release(ring->head, head);
+	}
+	return reaped;
+}
+
 static void set_affinity(struct submitter *s)
 {
 #ifdef CONFIG_LIBNUMA
@@ -697,6 +881,7 @@ static int setup_ring(struct submitter *s)
 	struct io_uring_params p;
 	int ret, fd;
 	void *ptr;
+	size_t len;
 
 	memset(&p, 0, sizeof(p));
 
@@ -709,6 +894,10 @@ static int setup_ring(struct submitter *s)
 			p.sq_thread_cpu = sq_thread_cpu;
 		}
 	}
+	if (pt) {
+		p.flags |= IORING_SETUP_SQE128;
+		p.flags |= IORING_SETUP_CQE32;
+	}
 
 	fd = io_uring_setup(depth, &p);
 	if (fd < 0) {
@@ -761,11 +950,22 @@ static int setup_ring(struct submitter *s)
 	sring->array = ptr + p.sq_off.array;
 	sq_ring_mask = *sring->ring_mask;
 
-	s->sqes = mmap(0, p.sq_entries * sizeof(struct io_uring_sqe),
+	if (p.flags & IORING_SETUP_SQE128)
+		len = 2 * p.sq_entries * sizeof(struct io_uring_sqe);
+	else
+		len = p.sq_entries * sizeof(struct io_uring_sqe);
+	s->sqes = mmap(0, len,
 			PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
 			IORING_OFF_SQES);
 
-	ptr = mmap(0, p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe),
+	if (p.flags & IORING_SETUP_CQE32) {
+		len = p.cq_off.cqes +
+			2 * p.cq_entries * sizeof(struct io_uring_cqe);
+	} else {
+		len = p.cq_off.cqes +
+			p.cq_entries * sizeof(struct io_uring_cqe);
+	}
+	ptr = mmap(0, len,
 			PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
 			IORING_OFF_CQ_RING);
 	cring->head = ptr + p.cq_off.head;
@@ -786,7 +986,7 @@ static void *allocate_mem(struct submitter *s, int size)
 		return numa_alloc_onnode(size, s->numa_node);
 #endif
 
-	if (posix_memalign(&buf, page_size, bs)) {
+	if (posix_memalign(&buf, t_io_uring_page_size, bs)) {
 		printf("failed alloc\n");
 		return NULL;
 	}
@@ -855,7 +1055,16 @@ static int submitter_init(struct submitter *s)
 		s->plat = NULL;
 		nr_batch = 0;
 	}
+	/* perform the expensive command initialization part for passthrough here
+	 * rather than in the fast path
+	 */
+	if (pt) {
+		for (i = 0; i < roundup_pow2(depth); i++) {
+			struct io_uring_sqe *sqe = &s->sqes[i << 1];
 
+			memset(&sqe->cmd, 0, sizeof(struct nvme_uring_cmd));
+		}
+	}
 	return nr_batch;
 }
 
@@ -1111,7 +1320,10 @@ submit:
 		do {
 			int r;
 
-			r = reap_events_uring(s);
+			if (pt)
+				r = reap_events_uring_pt(s);
+			else
+				r = reap_events_uring(s);
 			if (r == -1) {
 				s->finish = 1;
 				break;
@@ -1305,11 +1517,12 @@ static void usage(char *argv, int status)
 		" -a <bool> : Use legacy aio, default %d\n"
 		" -S <bool> : Use sync IO (preadv2), default %d\n"
 		" -X <bool> : Use registered ring %d\n"
-		" -P <bool> : Automatically place on device home node %d\n",
+		" -P <bool> : Automatically place on device home node %d\n"
+		" -u <bool> : Use nvme-passthrough I/O, default %d\n",
 		argv, DEPTH, BATCH_SUBMIT, BATCH_COMPLETE, BS, polled,
 		fixedbufs, dma_map, register_files, nthreads, !buffered, do_nop,
 		stats, runtime == 0 ? "unlimited" : runtime_str, random_io, aio,
-		use_sync, register_ring, numa_placement);
+		use_sync, register_ring, numa_placement, pt);
 	exit(status);
 }
 
@@ -1368,7 +1581,7 @@ int main(int argc, char *argv[])
 	if (!do_nop && argc < 2)
 		usage(argv[0], 1);
 
-	while ((opt = getopt(argc, argv, "d:s:c:b:p:B:F:n:N:O:t:T:a:r:D:R:X:S:P:h?")) != -1) {
+	while ((opt = getopt(argc, argv, "d:s:c:b:p:B:F:n:N:O:t:T:a:r:D:R:X:S:P:u:h?")) != -1) {
 		switch (opt) {
 		case 'a':
 			aio = !!atoi(optarg);
@@ -1449,6 +1662,9 @@ int main(int argc, char *argv[])
 		case 'P':
 			numa_placement = !!atoi(optarg);
 			break;
+		case 'u':
+			pt = !!atoi(optarg);
+			break;
 		case 'h':
 		case '?':
 		default:
@@ -1542,9 +1758,9 @@ int main(int argc, char *argv[])
 
 	arm_sig_int();
 
-	page_size = sysconf(_SC_PAGESIZE);
-	if (page_size < 0)
-		page_size = 4096;
+	t_io_uring_page_size = sysconf(_SC_PAGESIZE);
+	if (t_io_uring_page_size < 0)
+		t_io_uring_page_size = 4096;
 
 	for (j = 0; j < nthreads; j++) {
 		s = get_submitter(j);
diff --git a/t/jobs/t0018.fio b/t/jobs/t0018.fio
new file mode 100644
index 00000000..e2298b1f
--- /dev/null
+++ b/t/jobs/t0018.fio
@@ -0,0 +1,9 @@
+# Expected result: job completes without error
+# Buggy result: job fails
+
+[test]
+ioengine=io_uring
+filesize=256K
+time_based
+runtime=3s
+rw=randrw
diff --git a/t/run-fio-tests.py b/t/run-fio-tests.py
index 504b7cdb..1e5e9f24 100755
--- a/t/run-fio-tests.py
+++ b/t/run-fio-tests.py
@@ -582,6 +582,7 @@ class Requirements(object):
 
     _linux = False
     _libaio = False
+    _io_uring = False
     _zbd = False
     _root = False
     _zoned_nullb = False
@@ -605,6 +606,12 @@ class Requirements(object):
                 Requirements._zbd = "CONFIG_HAS_BLKZONED" in contents
                 Requirements._libaio = "CONFIG_LIBAIO" in contents
 
+            contents, success = FioJobTest.get_file("/proc/kallsyms")
+            if not success:
+                print("Unable to open '/proc/kallsyms' to probe for io_uring support")
+            else:
+                Requirements._io_uring = "io_uring_setup" in contents
+
             Requirements._root = (os.geteuid() == 0)
             if Requirements._zbd and Requirements._root:
                 try:
@@ -627,6 +634,7 @@ class Requirements(object):
 
         req_list = [Requirements.linux,
                     Requirements.libaio,
+                    Requirements.io_uring,
                     Requirements.zbd,
                     Requirements.root,
                     Requirements.zoned_nullb,
@@ -648,6 +656,11 @@ class Requirements(object):
         """Is libaio available?"""
         return Requirements._libaio, "libaio required"
 
+    @classmethod
+    def io_uring(cls):
+        """Is io_uring available?"""
+        return Requirements._io_uring, "io_uring required"
+
     @classmethod
     def zbd(cls):
         """Is ZBD support available?"""
@@ -867,6 +880,15 @@ TEST_LIST = [
         'output_format':    'json',
         'requirements':     [Requirements.not_windows],
     },
+    {
+        'test_id':          18,
+        'test_class':       FioJobTest,
+        'job':              't0018.fio',
+        'success':          SUCCESS_DEFAULT,
+        'pre_job':          None,
+        'pre_success':      None,
+        'requirements':     [Requirements.linux, Requirements.io_uring],
+    },
     {
         'test_id':          1000,
         'test_class':       FioExeTest,

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-08-26 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-08-26 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 05ef0e4e822ffa81d6e92ed538d32cc37a907279:

  Merge branch 'master' of https://github.com/kraj/fio (2022-08-24 20:09:29 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to c27ae7ae6c3d9108bba80ff71cf36bf7fc8b34c9:

  engines/io_uring: delete debug code (2022-08-25 11:19:34 -0600)

----------------------------------------------------------------
Jens Axboe (1):
      engines/io_uring: delete debug code

 engines/io_uring.c | 6 ------
 1 file changed, 6 deletions(-)

---

Diff of recent changes:

diff --git a/engines/io_uring.c b/engines/io_uring.c
index 89d64b06..94376efa 100644
--- a/engines/io_uring.c
+++ b/engines/io_uring.c
@@ -445,18 +445,12 @@ static struct io_u *fio_ioring_event(struct thread_data *td, int event)
 	struct io_uring_cqe *cqe;
 	struct io_u *io_u;
 	unsigned index;
-	static int eio;
 
 	index = (event + ld->cq_ring_off) & ld->cq_ring_mask;
 
 	cqe = &ld->cq_ring.cqes[index];
 	io_u = (struct io_u *) (uintptr_t) cqe->user_data;
 
-	if (eio++ == 5) {
-		printf("mark EIO\n");
-		cqe->res = -EIO;
-	}
-
 	if (cqe->res != io_u->xfer_buflen) {
 		if (cqe->res > io_u->xfer_buflen)
 			io_u->error = -cqe->res;

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-08-25 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-08-25 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 995c45c08c7a362ae0fb2e54e2de27b555a757ab:

  Merge branch 'sigbreak-wait' of github.com:bjpaupor/fio (2022-08-23 17:09:25 -0400)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 05ef0e4e822ffa81d6e92ed538d32cc37a907279:

  Merge branch 'master' of https://github.com/kraj/fio (2022-08-24 20:09:29 -0600)

----------------------------------------------------------------
Bart Van Assche (1):
      Enable CPU affinity support on Android

Jens Axboe (3):
      engines/io_uring: pass back correct error value when interrupted
      Merge branch 'master' of https://github.com/bvanassche/fio
      Merge branch 'master' of https://github.com/kraj/fio

Khem Raj (1):
      io_uring: Replace pthread_self with s->tid

 engines/io_uring.c |  8 ++++++++
 os/os-android.h    | 26 ++++++++++++++++++++++++++
 t/io_uring.c       |  5 ++---
 3 files changed, 36 insertions(+), 3 deletions(-)

---

Diff of recent changes:

diff --git a/engines/io_uring.c b/engines/io_uring.c
index cffc7371..89d64b06 100644
--- a/engines/io_uring.c
+++ b/engines/io_uring.c
@@ -445,12 +445,18 @@ static struct io_u *fio_ioring_event(struct thread_data *td, int event)
 	struct io_uring_cqe *cqe;
 	struct io_u *io_u;
 	unsigned index;
+	static int eio;
 
 	index = (event + ld->cq_ring_off) & ld->cq_ring_mask;
 
 	cqe = &ld->cq_ring.cqes[index];
 	io_u = (struct io_u *) (uintptr_t) cqe->user_data;
 
+	if (eio++ == 5) {
+		printf("mark EIO\n");
+		cqe->res = -EIO;
+	}
+
 	if (cqe->res != io_u->xfer_buflen) {
 		if (cqe->res > io_u->xfer_buflen)
 			io_u->error = -cqe->res;
@@ -532,6 +538,7 @@ static int fio_ioring_getevents(struct thread_data *td, unsigned int min,
 			if (r < 0) {
 				if (errno == EAGAIN || errno == EINTR)
 					continue;
+				r = -errno;
 				td_verror(td, errno, "io_uring_enter");
 				break;
 			}
@@ -665,6 +672,7 @@ static int fio_ioring_commit(struct thread_data *td)
 				usleep(1);
 				continue;
 			}
+			ret = -errno;
 			td_verror(td, errno, "io_uring_enter submit");
 			break;
 		}
diff --git a/os/os-android.h b/os/os-android.h
index 2f73d249..34534239 100644
--- a/os/os-android.h
+++ b/os/os-android.h
@@ -24,6 +24,7 @@
   #define __has_builtin(x) 0  // Compatibility with non-clang compilers.
 #endif
 
+#define FIO_HAVE_CPU_AFFINITY
 #define FIO_HAVE_DISK_UTIL
 #define FIO_HAVE_IOSCHED_SWITCH
 #define FIO_HAVE_IOPRIO
@@ -44,6 +45,13 @@
 
 #define OS_MAP_ANON		MAP_ANONYMOUS
 
+typedef cpu_set_t os_cpu_mask_t;
+
+#define fio_setaffinity(pid, cpumask)		\
+	sched_setaffinity((pid), sizeof(cpumask), &(cpumask))
+#define fio_getaffinity(pid, ptr)	\
+	sched_getaffinity((pid), sizeof(cpu_set_t), (ptr))
+
 #ifndef POSIX_MADV_DONTNEED
 #define posix_madvise   madvise
 #define POSIX_MADV_DONTNEED MADV_DONTNEED
@@ -64,6 +72,24 @@
 	pthread_getaffinity_np(pthread_self(), sizeof(mask), &(mask))
 #endif
 
+#define fio_cpu_clear(mask, cpu)	CPU_CLR((cpu), (mask))
+#define fio_cpu_set(mask, cpu)		CPU_SET((cpu), (mask))
+#define fio_cpu_isset(mask, cpu)	(CPU_ISSET((cpu), (mask)) != 0)
+#define fio_cpu_count(mask)		CPU_COUNT((mask))
+
+static inline int fio_cpuset_init(os_cpu_mask_t *mask)
+{
+	CPU_ZERO(mask);
+	return 0;
+}
+
+static inline int fio_cpuset_exit(os_cpu_mask_t *mask)
+{
+	return 0;
+}
+
+#define FIO_MAX_CPUS			CPU_SETSIZE
+
 #ifndef CONFIG_NO_SHM
 /*
  * Bionic doesn't support SysV shared memory, so implement it using ashmem
diff --git a/t/io_uring.c b/t/io_uring.c
index 35bf1956..f34a3554 100644
--- a/t/io_uring.c
+++ b/t/io_uring.c
@@ -799,15 +799,14 @@ static int submitter_init(struct submitter *s)
 	int i, nr_batch, err;
 	static int init_printed;
 	char buf[80];
-
 	s->tid = gettid();
 	printf("submitter=%d, tid=%d, file=%s, node=%d\n", s->index, s->tid,
 							s->filename, s->numa_node);
 
 	set_affinity(s);
 
-	__init_rand64(&s->rand_state, pthread_self());
-	srand48(pthread_self());
+	__init_rand64(&s->rand_state, s->tid);
+	srand48(s->tid);
 
 	for (i = 0; i < MAX_FDS; i++)
 		s->files[i].fileno = i;

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-08-24 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-08-24 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit d33c7846cc5f175177e194a5489282780e2a04c4:

  Merge branch 'clarify-io-errors' of https://github.com/Hi-Angel/fio (2022-08-16 19:54:17 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 995c45c08c7a362ae0fb2e54e2de27b555a757ab:

  Merge branch 'sigbreak-wait' of github.com:bjpaupor/fio (2022-08-23 17:09:25 -0400)

----------------------------------------------------------------
Brandon Paupore (1):
      Add wait for handling SIGBREAK

Vincent Fu (3):
      Revert "Minor style fixups"
      Revert "Fix multithread issues when operating on a single shared file"
      Merge branch 'sigbreak-wait' of github.com:bjpaupor/fio

 backend.c   | 40 +++++++++++++++++++++-------------------
 file.h      |  1 -
 filesetup.c | 45 ++-------------------------------------------
 3 files changed, 23 insertions(+), 63 deletions(-)

---

Diff of recent changes:

diff --git a/backend.c b/backend.c
index 5159b60d..375a23e4 100644
--- a/backend.c
+++ b/backend.c
@@ -90,6 +90,25 @@ static void sig_int(int sig)
 	}
 }
 
+#ifdef WIN32
+static void sig_break(int sig)
+{
+	struct thread_data *td;
+	int i;
+
+	sig_int(sig);
+
+	/**
+	 * Windows terminates all job processes on SIGBREAK after the handler
+	 * returns, so give them time to wrap-up and give stats
+	 */
+	for_each_td(td, i) {
+		while (td->runstate < TD_EXITED)
+			sleep(1);
+	}
+}
+#endif
+
 void sig_show_status(int sig)
 {
 	show_running_run_stats();
@@ -112,7 +131,7 @@ static void set_sig_handlers(void)
 /* Windows uses SIGBREAK as a quit signal from other applications */
 #ifdef WIN32
 	memset(&act, 0, sizeof(act));
-	act.sa_handler = sig_int;
+	act.sa_handler = sig_break;
 	act.sa_flags = SA_RESTART;
 	sigaction(SIGBREAK, &act, NULL);
 #endif
@@ -2314,25 +2333,8 @@ static void run_threads(struct sk_out *sk_out)
 	for_each_td(td, i) {
 		print_status_init(td->thread_number - 1);
 
-		if (!td->o.create_serialize) {
-			/*
-			 *  When operating on a single rile in parallel,
-			 *  perform single-threaded early setup so that
-			 *  when setup_files() does not run into issues
-			 *  later.
-			*/
-			if (!i && td->o.nr_files == 1) {
-				if (setup_shared_file(td)) {
-					exit_value++;
-					if (td->error)
-						log_err("fio: pid=%d, err=%d/%s\n",
-							(int) td->pid, td->error, td->verror);
-					td_set_runstate(td, TD_REAPED);
-					todo--;
-				}
-			}
+		if (!td->o.create_serialize)
 			continue;
-		}
 
 		if (fio_verify_load_state(td))
 			goto reap;
diff --git a/file.h b/file.h
index e646cf22..da1b8947 100644
--- a/file.h
+++ b/file.h
@@ -201,7 +201,6 @@ struct thread_data;
 extern void close_files(struct thread_data *);
 extern void close_and_free_files(struct thread_data *);
 extern uint64_t get_start_offset(struct thread_data *, struct fio_file *);
-extern int __must_check setup_shared_file(struct thread_data *);
 extern int __must_check setup_files(struct thread_data *);
 extern int __must_check file_invalidate_cache(struct thread_data *, struct fio_file *);
 #ifdef __cplusplus
diff --git a/filesetup.c b/filesetup.c
index 3e2ccf9b..1d3cc5ad 100644
--- a/filesetup.c
+++ b/filesetup.c
@@ -143,7 +143,7 @@ static int extend_file(struct thread_data *td, struct fio_file *f)
 	if (unlink_file || new_layout) {
 		int ret;
 
-		dprint(FD_FILE, "layout %d unlink %d %s\n", new_layout, unlink_file, f->file_name);
+		dprint(FD_FILE, "layout unlink %s\n", f->file_name);
 
 		ret = td_io_unlink_file(td, f);
 		if (ret != 0 && ret != ENOENT) {
@@ -198,9 +198,6 @@ static int extend_file(struct thread_data *td, struct fio_file *f)
 		}
 	}
 
-
-	dprint(FD_FILE, "fill file %s, size %llu\n", f->file_name, (unsigned long long) f->real_file_size);
-
 	left = f->real_file_size;
 	bs = td->o.max_bs[DDIR_WRITE];
 	if (bs > left)
@@ -1081,44 +1078,6 @@ static bool create_work_dirs(struct thread_data *td, const char *fname)
 	return true;
 }
 
-int setup_shared_file(struct thread_data *td)
-{
-	struct fio_file *f;
-	uint64_t file_size;
-	int err = 0;
-
-	if (td->o.nr_files > 1) {
-		log_err("fio: shared file setup called for multiple files\n");
-		return -1;
-	}
-
-	get_file_sizes(td);
-
-	f = td->files[0];
-
-	if (f == NULL) {
-		log_err("fio: NULL shared file\n");
-		return -1;
-	}
-
-	file_size = thread_number * td->o.size;
-	dprint(FD_FILE, "shared setup %s real_file_size=%llu, desired=%llu\n", 
-			f->file_name, (unsigned long long)f->real_file_size, (unsigned long long)file_size);
-
-	if (f->real_file_size < file_size) {
-		dprint(FD_FILE, "fio: extending shared file\n");
-		f->real_file_size = file_size;
-		err = extend_file(td, f);
-		if (!err)
-			err = __file_invalidate_cache(td, f, 0, f->real_file_size);
-		get_file_sizes(td);
-		dprint(FD_FILE, "shared setup new real_file_size=%llu\n", 
-				(unsigned long long)f->real_file_size);
-	}
-
-	return err;
-}
-
 /*
  * Open the files and setup files sizes, creating files if necessary.
  */
@@ -1133,7 +1092,7 @@ int setup_files(struct thread_data *td)
 	const unsigned long long bs = td_min_bs(td);
 	uint64_t fs = 0;
 
-	dprint(FD_FILE, "setup files (thread_number=%d, subjob_number=%d)\n", td->thread_number, td->subjob_number);
+	dprint(FD_FILE, "setup files\n");
 
 	old_state = td_bump_runstate(td, TD_SETTING_UP);
 

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-08-17 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-08-17 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit eeb302f9bfa4bbe121cae2a12a679c888164fc93:

  README: link to GitHub releases for Windows (2022-08-15 10:37:57 -0400)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to d33c7846cc5f175177e194a5489282780e2a04c4:

  Merge branch 'clarify-io-errors' of https://github.com/Hi-Angel/fio (2022-08-16 19:54:17 -0600)

----------------------------------------------------------------
Ankit Kumar (2):
      engines/xnvme: fix segfault issue with xnvme ioengine
      doc: update fio doc for xnvme engine

Jens Axboe (1):
      Merge branch 'clarify-io-errors' of https://github.com/Hi-Angel/fio

Konstantin Kharlamov (2):
      doc: get rid of trailing whitespace
      doc: clarify that I/O errors may go unnoticed without direct=1

Vincent Fu (2):
      test: add latency test using posixaio ioengine
      test: fix hash for t0016

 HOWTO.rst                                        | 48 +++++++++++++++------
 engines/xnvme.c                                  | 17 ++++++--
 fio.1                                            | 54 ++++++++++++++++--------
 t/jobs/{t0016-259ebc00.fio => t0016-d54ae22.fio} |  0
 t/jobs/t0017.fio                                 |  9 ++++
 t/run-fio-tests.py                               | 12 +++++-
 6 files changed, 105 insertions(+), 35 deletions(-)
 rename t/jobs/{t0016-259ebc00.fio => t0016-d54ae22.fio} (100%)
 create mode 100644 t/jobs/t0017.fio

---

Diff of recent changes:

diff --git a/HOWTO.rst b/HOWTO.rst
index 05fc117f..08be687c 100644
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -1301,7 +1301,7 @@ I/O type
 	effectively caps the file size at `real_size - offset`. Can be combined with
 	:option:`size` to constrain the start and end range of the I/O workload.
 	A percentage can be specified by a number between 1 and 100 followed by '%',
-	for example, ``offset=20%`` to specify 20%. In ZBD mode, value can be set as 
+	for example, ``offset=20%`` to specify 20%. In ZBD mode, value can be set as
         number of zones using 'z'.
 
 .. option:: offset_align=int
@@ -1877,7 +1877,7 @@ I/O size
 	If this option is not specified, fio will use the full size of the given
 	files or devices.  If the files do not exist, size must be given. It is also
 	possible to give size as a percentage between 1 and 100. If ``size=20%`` is
-	given, fio will use 20% of the full size of the given files or devices. 
+	given, fio will use 20% of the full size of the given files or devices.
 	In ZBD mode, value can also be set as number of zones using 'z'.
 	Can be combined with :option:`offset` to constrain the start and end range
 	that I/O will be done within.
@@ -2780,41 +2780,56 @@ with the caveat that when used on the command line, they must come after the
 	Select the xnvme async command interface. This can take these values.
 
 	**emu**
-		This is default and used to emulate asynchronous I/O.
+		This is default and use to emulate asynchronous I/O by using a
+		single thread to create a queue pair on top of a synchronous
+		I/O interface using the NVMe driver IOCTL.
 	**thrpool**
-		Use thread pool for Asynchronous I/O.
+		Emulate an asynchronous I/O interface with a pool of userspace
+		threads on top of a synchronous I/O interface using the NVMe
+		driver IOCTL. By default four threads are used.
 	**io_uring**
-		Use Linux io_uring/liburing for Asynchronous I/O.
+		Linux native asynchronous I/O interface which supports both
+		direct and buffered I/O.
+	**io_uring_cmd**
+		Fast Linux native asynchronous I/O interface for NVMe pass
+		through commands. This only works with NVMe character device
+		(/dev/ngXnY).
 	**libaio**
 		Use Linux aio for Asynchronous I/O.
 	**posix**
-		Use POSIX aio for Asynchronous I/O.
+		Use the posix asynchronous I/O interface to perform one or
+		more I/O operations asynchronously.
 	**nil**
-		Use nil-io; For introspective perf. evaluation
+		Do not transfer any data; just pretend to. This is mainly used
+		for introspective performance evaluation.
 
 .. option:: xnvme_sync=str : [xnvme]
 
 	Select the xnvme synchronous command interface. This can take these values.
 
 	**nvme**
-		This is default and uses Linux NVMe Driver ioctl() for synchronous I/O.
+		This is default and uses Linux NVMe Driver ioctl() for
+		synchronous I/O.
 	**psync**
-		Use pread()/write() for synchronous I/O.
+		This supports regular as well as vectored pread() and pwrite()
+		commands.
+	**block**
+		This is the same as psync except that it also supports zone
+		management commands using Linux block layer IOCTLs.
 
 .. option:: xnvme_admin=str : [xnvme]
 
 	Select the xnvme admin command interface. This can take these values.
 
 	**nvme**
-		This is default and uses linux NVMe Driver ioctl() for admin commands.
+		This is default and uses linux NVMe Driver ioctl() for admin
+		commands.
 	**block**
 		Use Linux Block Layer ioctl() and sysfs for admin commands.
-	**file_as_ns**
-		Use file-stat to construct NVMe idfy responses.
 
 .. option:: xnvme_dev_nsid=int : [xnvme]
 
-	xnvme namespace identifier, for userspace NVMe driver.
+	xnvme namespace identifier for userspace NVMe driver, such as SPDK.
 
 .. option:: xnvme_iovec=int : [xnvme]
 
@@ -3912,6 +3927,13 @@ Error handling
 	appended, the total error count and the first error. The error field given
 	in the stats is the first error that was hit during the run.
 
+	Note: a write error from the device may go unnoticed by fio when using
+	buffered IO, as the write() (or similar) system call merely dirties the
+	kernel pages, unless :option:`sync` or :option:`direct` is used. Device IO
+	errors occur when the dirty data is actually written out to disk. If fully
+	sync writes aren't desirable, :option:`fsync` or :option:`fdatasync` can be
+	used as well. This is specific to writes, as reads are always synchronous.
+
 	The allowed values are:
 
 		**none**
diff --git a/engines/xnvme.c b/engines/xnvme.c
index c11b33a8..d8647481 100644
--- a/engines/xnvme.c
+++ b/engines/xnvme.c
@@ -205,9 +205,14 @@ static void _dev_close(struct thread_data *td, struct xnvme_fioe_fwrap *fwrap)
 
 static void xnvme_fioe_cleanup(struct thread_data *td)
 {
-	struct xnvme_fioe_data *xd = td->io_ops_data;
+	struct xnvme_fioe_data *xd = NULL;
 	int err;
 
+	if (!td->io_ops_data)
+		return;
+
+	xd = td->io_ops_data;
+
 	err = pthread_mutex_lock(&g_serialize);
 	if (err)
 		log_err("ioeng->cleanup(): pthread_mutex_lock(), err(%d)\n", err);
@@ -367,8 +372,14 @@ static int xnvme_fioe_iomem_alloc(struct thread_data *td, size_t total_mem)
 /* NOTE: using the first device for buffer-allocators) */
 static void xnvme_fioe_iomem_free(struct thread_data *td)
 {
-	struct xnvme_fioe_data *xd = td->io_ops_data;
-	struct xnvme_fioe_fwrap *fwrap = &xd->files[0];
+	struct xnvme_fioe_data *xd = NULL;
+	struct xnvme_fioe_fwrap *fwrap = NULL;
+
+	if (!td->io_ops_data)
+		return;
+
+	xd = td->io_ops_data;
+	fwrap = &xd->files[0];
 
 	if (!fwrap->dev) {
 		log_err("ioeng->iomem_free(): failed no dev-handle\n");
diff --git a/fio.1 b/fio.1
index 6630525f..27454b0b 100644
--- a/fio.1
+++ b/fio.1
@@ -292,7 +292,7 @@ For Zone Block Device Mode:
 .RS
 .P
 .PD 0
-z means Zone 
+z means Zone
 .P
 .PD
 .RE
@@ -1083,7 +1083,7 @@ provided. Data before the given offset will not be touched. This
 effectively caps the file size at `real_size \- offset'. Can be combined with
 \fBsize\fR to constrain the start and end range of the I/O workload.
 A percentage can be specified by a number between 1 and 100 followed by '%',
-for example, `offset=20%' to specify 20%. In ZBD mode, value can be set as 
+for example, `offset=20%' to specify 20%. In ZBD mode, value can be set as
 number of zones using 'z'.
 .TP
 .BI offset_align \fR=\fPint
@@ -1099,7 +1099,7 @@ specified). This option is useful if there are several jobs which are
 intended to operate on a file in parallel disjoint segments, with even
 spacing between the starting points. Percentages can be used for this option.
 If a percentage is given, the generated offset will be aligned to the minimum
-\fBblocksize\fR or to the value of \fBoffset_align\fR if provided.In ZBD mode, value 
+\fBblocksize\fR or to the value of \fBoffset_align\fR if provided.In ZBD mode, value
 can be set as number of zones using 'z'.
 .TP
 .BI number_ios \fR=\fPint
@@ -1678,7 +1678,7 @@ If this option is not specified, fio will use the full size of the given
 files or devices. If the files do not exist, size must be given. It is also
 possible to give size as a percentage between 1 and 100. If `size=20%' is
 given, fio will use 20% of the full size of the given files or devices. In ZBD mode,
-size can be given in units of number of zones using 'z'. Can be combined with \fBoffset\fR to 
+size can be given in units of number of zones using 'z'. Can be combined with \fBoffset\fR to
 constrain the start and end range that I/O will be done within.
 .TP
 .BI io_size \fR=\fPint[%|z] "\fR,\fB io_limit" \fR=\fPint[%|z]
@@ -1697,7 +1697,7 @@ also be set as number of zones using 'z'.
 .BI filesize \fR=\fPirange(int)
 Individual file sizes. May be a range, in which case fio will select sizes
 for files at random within the given range. If not given, each created file
-is the same size. This option overrides \fBsize\fR in terms of file size, 
+is the same size. This option overrides \fBsize\fR in terms of file size,
 i.e. \fBsize\fR becomes merely the default for \fBio_size\fR (and
 has no effect it all if \fBio_size\fR is set explicitly).
 .TP
@@ -2530,22 +2530,29 @@ Select the xnvme async command interface. This can take these values.
 .RS
 .TP
 .B emu
-This is default and used to emulate asynchronous I/O
+This is default and use to emulate asynchronous I/O by using a single thread to
+create a queue pair on top of a synchronous I/O interface using the NVMe driver
+IOCTL.
 .TP
 .BI thrpool
-Use thread pool for Asynchronous I/O
+Emulate an asynchronous I/O interface with a pool of userspace threads on top
+of a synchronous I/O interface using the NVMe driver IOCTL. By default four
+threads are used.
 .TP
 .BI io_uring
-Use Linux io_uring/liburing for Asynchronous I/O
+Linux native asynchronous I/O interface which supports both direct and buffered
+I/O.
 .TP
 .BI libaio
 Use Linux aio for Asynchronous I/O
 .TP
 .BI posix
-Use POSIX aio for Asynchronous I/O
+Use the posix asynchronous I/O interface to perform one or more I/O operations
+asynchronously.
 .TP
 .BI nil
-Use nil-io; For introspective perf. evaluation
+Do not transfer any data; just pretend to. This is mainly used for
+introspective performance evaluation.
 .RE
 .RE
 .TP
@@ -2555,10 +2562,14 @@ Select the xnvme synchronous command interface. This can take these values.
 .RS
 .TP
 .B nvme
-This is default and uses Linux NVMe Driver ioctl() for synchronous I/O
+This is default and uses Linux NVMe Driver ioctl() for synchronous I/O.
 .TP
 .BI psync
-Use pread()/write() for synchronous I/O
+This supports regular as well as vectored pread() and pwrite() commands.
+.TP
+.BI block
+This is the same as psync except that it also supports zone management
+commands using Linux block layer IOCTLs.
 .RE
 .RE
 .TP
@@ -2568,18 +2579,15 @@ Select the xnvme admin command interface. This can take these values.
 .RS
 .TP
 .B nvme
-This is default and uses Linux NVMe Driver ioctl() for admin commands
+This is default and uses Linux NVMe Driver ioctl() for admin commands.
 .TP
 .BI block
-Use Linux Block Layer ioctl() and sysfs for admin commands
-.TP
-.BI file_as_ns
-Use file-stat as to construct NVMe idfy responses
+Use Linux Block Layer ioctl() and sysfs for admin commands.
 .RE
 .RE
 .TP
 .BI (xnvme)xnvme_dev_nsid\fR=\fPint
-xnvme namespace identifier, for userspace NVMe driver.
+xnvme namespace identifier for userspace NVMe driver such as SPDK.
 .TP
 .BI (xnvme)xnvme_iovec
 If this option is set, xnvme will use vectored read/write commands.
@@ -3598,6 +3606,16 @@ EILSEQ) until the runtime is exceeded or the I/O size specified is
 completed. If this option is used, there are two more stats that are
 appended, the total error count and the first error. The error field given
 in the stats is the first error that was hit during the run.
+.RS
+.P
+Note: a write error from the device may go unnoticed by fio when using buffered
+IO, as the write() (or similar) system call merely dirties the kernel pages,
+unless `sync' or `direct' is used. Device IO errors occur when the dirty data is
+actually written out to disk. If fully sync writes aren't desirable, `fsync' or
+`fdatasync' can be used as well. This is specific to writes, as reads are always
+synchronous.
+.RS
+.P
 The allowed values are:
 .RS
 .RS
diff --git a/t/jobs/t0016-259ebc00.fio b/t/jobs/t0016-d54ae22.fio
similarity index 100%
rename from t/jobs/t0016-259ebc00.fio
rename to t/jobs/t0016-d54ae22.fio
diff --git a/t/jobs/t0017.fio b/t/jobs/t0017.fio
new file mode 100644
index 00000000..14486d98
--- /dev/null
+++ b/t/jobs/t0017.fio
@@ -0,0 +1,9 @@
+# Expected result: mean(slat) + mean(clat) = mean(lat)
+# Buggy result: equality does not hold
+# This is similar to t0015 and t0016 except that is uses posixaio which is
+# available on more platforms and does not have a commit hook
+
+[test]
+ioengine=posixaio
+size=1M
+iodepth=16
diff --git a/t/run-fio-tests.py b/t/run-fio-tests.py
index d77f20e0..504b7cdb 100755
--- a/t/run-fio-tests.py
+++ b/t/run-fio-tests.py
@@ -850,13 +850,23 @@ TEST_LIST = [
     {
         'test_id':          16,
         'test_class':       FioJobTest_t0015,
-        'job':              't0016-259ebc00.fio',
+        'job':              't0016-d54ae22.fio',
         'success':          SUCCESS_DEFAULT,
         'pre_job':          None,
         'pre_success':      None,
         'output_format':    'json',
         'requirements':     [],
     },
+    {
+        'test_id':          17,
+        'test_class':       FioJobTest_t0015,
+        'job':              't0017.fio',
+        'success':          SUCCESS_DEFAULT,
+        'pre_job':          None,
+        'pre_success':      None,
+        'output_format':    'json',
+        'requirements':     [Requirements.not_windows],
+    },
     {
         'test_id':          1000,
         'test_class':       FioExeTest,

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-08-16 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-08-16 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 7a7bcae0610d872951bc22dc310105c7ec1157af:

  Merge branch 's3_crypto' of github.com:hualongfeng/fio (2022-08-11 15:39:02 -0400)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to eeb302f9bfa4bbe121cae2a12a679c888164fc93:

  README: link to GitHub releases for Windows (2022-08-15 10:37:57 -0400)

----------------------------------------------------------------
Vincent Fu (1):
      README: link to GitHub releases for Windows

 README.rst | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

---

Diff of recent changes:

diff --git a/README.rst b/README.rst
index 67420903..79582dea 100644
--- a/README.rst
+++ b/README.rst
@@ -123,10 +123,12 @@ Solaris:
 	``pkgutil -i fio``.
 
 Windows:
-	Rebecca Cran <rebecca@bsdio.com> has fio packages for Windows at
-	https://bsdio.com/fio/ . The latest builds for Windows can also
-	be grabbed from https://ci.appveyor.com/project/axboe/fio by clicking
-	the latest x86 or x64 build, then selecting the ARTIFACTS tab.
+        Beginning with fio 3.31 Windows installers are available on GitHub at
+        https://github.com/axboe/fio/releases.  Rebecca Cran
+        <rebecca@bsdio.com> has fio packages for Windows at
+        https://bsdio.com/fio/ . The latest builds for Windows can also be
+        grabbed from https://ci.appveyor.com/project/axboe/fio by clicking the
+        latest x86 or x64 build and then selecting the Artifacts tab.
 
 BSDs:
 	Packages for BSDs may be available from their binary package repositories.

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-08-12 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-08-12 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 9dc528b1638b625b5e167983a74de4e85c5859ea:

  lib/rand: get rid of unused MAX_SEED_BUCKETS (2022-08-10 09:51:49 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 7a7bcae0610d872951bc22dc310105c7ec1157af:

  Merge branch 's3_crypto' of github.com:hualongfeng/fio (2022-08-11 15:39:02 -0400)

----------------------------------------------------------------
Feng, Hualong (3):
      engines/http: Add storage class option for s3
      engines/http: Add s3 crypto options for s3
      doc: Add usage and example about s3 storage class and crypto

Friendy.Su@sony.com (1):
      ioengines: merge filecreate, filestat, filedelete engines to fileoperations.c

Vincent Fu (1):
      Merge branch 's3_crypto' of github.com:hualongfeng/fio

 HOWTO.rst                          |  14 ++
 Makefile                           |   2 +-
 engines/filecreate.c               | 118 --------------
 engines/filedelete.c               | 115 --------------
 engines/fileoperations.c           | 318 +++++++++++++++++++++++++++++++++++++
 engines/filestat.c                 | 190 ----------------------
 engines/http.c                     | 178 ++++++++++++++++++---
 examples/http-s3-crypto.fio        |  38 +++++
 examples/http-s3-storage-class.fio |  37 +++++
 fio.1                              |   9 ++
 10 files changed, 577 insertions(+), 442 deletions(-)
 delete mode 100644 engines/filecreate.c
 delete mode 100644 engines/filedelete.c
 create mode 100644 engines/fileoperations.c
 delete mode 100644 engines/filestat.c
 create mode 100644 examples/http-s3-crypto.fio
 create mode 100644 examples/http-s3-storage-class.fio

---

Diff of recent changes:

diff --git a/HOWTO.rst b/HOWTO.rst
index 104cce2d..05fc117f 100644
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -2692,6 +2692,20 @@ with the caveat that when used on the command line, they must come after the
 
 	The S3 key/access id.
 
+.. option:: http_s3_sse_customer_key=str : [http]
+
+        The encryption customer key in SSE server side.
+
+.. option:: http_s3_sse_customer_algorithm=str : [http]
+
+        The encryption customer algorithm in SSE server side.
+        Default is **AES256**
+
+.. option:: http_s3_storage_class=str : [http]
+
+        Which storage class to access. User-customizable settings.
+        Default is **STANDARD**
+
 .. option:: http_swift_auth_token=str : [http]
 
 	The Swift auth token. See the example configuration file on how
diff --git a/Makefile b/Makefile
index 188a74d7..634d2c93 100644
--- a/Makefile
+++ b/Makefile
@@ -56,7 +56,7 @@ SOURCE :=	$(sort $(patsubst $(SRCDIR)/%,%,$(wildcard $(SRCDIR)/crc/*.c)) \
 		pshared.c options.c \
 		smalloc.c filehash.c profile.c debug.c engines/cpu.c \
 		engines/mmap.c engines/sync.c engines/null.c engines/net.c \
-		engines/ftruncate.c engines/filecreate.c engines/filestat.c engines/filedelete.c \
+		engines/ftruncate.c engines/fileoperations.c \
 		engines/exec.c \
 		server.c client.c iolog.c backend.c libfio.c flow.c cconv.c \
 		gettime-thread.c helpers.c json.c idletime.c td_error.c \
diff --git a/engines/filecreate.c b/engines/filecreate.c
deleted file mode 100644
index 7884752d..00000000
--- a/engines/filecreate.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * filecreate engine
- *
- * IO engine that doesn't do any IO, just creates files and tracks the latency
- * of the file creation.
- */
-#include <stdio.h>
-#include <fcntl.h>
-#include <errno.h>
-
-#include "../fio.h"
-
-struct fc_data {
-	enum fio_ddir stat_ddir;
-};
-
-static int open_file(struct thread_data *td, struct fio_file *f)
-{
-	struct timespec start;
-	int do_lat = !td->o.disable_lat;
-
-	dprint(FD_FILE, "fd open %s\n", f->file_name);
-
-	if (f->filetype != FIO_TYPE_FILE) {
-		log_err("fio: only files are supported\n");
-		return 1;
-	}
-	if (!strcmp(f->file_name, "-")) {
-		log_err("fio: can't read/write to stdin/out\n");
-		return 1;
-	}
-
-	if (do_lat)
-		fio_gettime(&start, NULL);
-
-	f->fd = open(f->file_name, O_CREAT|O_RDWR, 0600);
-
-	if (f->fd == -1) {
-		char buf[FIO_VERROR_SIZE];
-		int e = errno;
-
-		snprintf(buf, sizeof(buf), "open(%s)", f->file_name);
-		td_verror(td, e, buf);
-		return 1;
-	}
-
-	if (do_lat) {
-		struct fc_data *data = td->io_ops_data;
-		uint64_t nsec;
-
-		nsec = ntime_since_now(&start);
-		add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, 0);
-	}
-
-	return 0;
-}
-
-static enum fio_q_status queue_io(struct thread_data *td,
-				  struct io_u fio_unused *io_u)
-{
-	return FIO_Q_COMPLETED;
-}
-
-/*
- * Ensure that we at least have a block size worth of IO to do for each
- * file. If the job file has td->o.size < nr_files * block_size, then
- * fio won't do anything.
- */
-static int get_file_size(struct thread_data *td, struct fio_file *f)
-{
-	f->real_file_size = td_min_bs(td);
-	return 0;
-}
-
-static int init(struct thread_data *td)
-{
-	struct fc_data *data;
-
-	data = calloc(1, sizeof(*data));
-
-	if (td_read(td))
-		data->stat_ddir = DDIR_READ;
-	else if (td_write(td))
-		data->stat_ddir = DDIR_WRITE;
-
-	td->io_ops_data = data;
-	return 0;
-}
-
-static void cleanup(struct thread_data *td)
-{
-	struct fc_data *data = td->io_ops_data;
-
-	free(data);
-}
-
-static struct ioengine_ops ioengine = {
-	.name		= "filecreate",
-	.version	= FIO_IOOPS_VERSION,
-	.init		= init,
-	.cleanup	= cleanup,
-	.queue		= queue_io,
-	.get_file_size	= get_file_size,
-	.open_file	= open_file,
-	.close_file	= generic_close_file,
-	.flags		= FIO_DISKLESSIO | FIO_SYNCIO | FIO_FAKEIO |
-				FIO_NOSTATS | FIO_NOFILEHASH,
-};
-
-static void fio_init fio_filecreate_register(void)
-{
-	register_ioengine(&ioengine);
-}
-
-static void fio_exit fio_filecreate_unregister(void)
-{
-	unregister_ioengine(&ioengine);
-}
diff --git a/engines/filedelete.c b/engines/filedelete.c
deleted file mode 100644
index df388ac9..00000000
--- a/engines/filedelete.c
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * file delete engine
- *
- * IO engine that doesn't do any IO, just delete files and track the latency
- * of the file deletion.
- */
-#include <stdio.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include "../fio.h"
-
-struct fc_data {
-	enum fio_ddir stat_ddir;
-};
-
-static int delete_file(struct thread_data *td, struct fio_file *f)
-{
-	struct timespec start;
-	int do_lat = !td->o.disable_lat;
-	int ret;
-
-	dprint(FD_FILE, "fd delete %s\n", f->file_name);
-
-	if (f->filetype != FIO_TYPE_FILE) {
-		log_err("fio: only files are supported\n");
-		return 1;
-	}
-	if (!strcmp(f->file_name, "-")) {
-		log_err("fio: can't read/write to stdin/out\n");
-		return 1;
-	}
-
-	if (do_lat)
-		fio_gettime(&start, NULL);
-
-	ret = unlink(f->file_name);
-
-	if (ret == -1) {
-		char buf[FIO_VERROR_SIZE];
-		int e = errno;
-
-		snprintf(buf, sizeof(buf), "delete(%s)", f->file_name);
-		td_verror(td, e, buf);
-		return 1;
-	}
-
-	if (do_lat) {
-		struct fc_data *data = td->io_ops_data;
-		uint64_t nsec;
-
-		nsec = ntime_since_now(&start);
-		add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, 0);
-	}
-
-	return 0;
-}
-
-
-static enum fio_q_status queue_io(struct thread_data *td, struct io_u fio_unused *io_u)
-{
-	return FIO_Q_COMPLETED;
-}
-
-static int init(struct thread_data *td)
-{
-	struct fc_data *data;
-
-	data = calloc(1, sizeof(*data));
-
-	if (td_read(td))
-		data->stat_ddir = DDIR_READ;
-	else if (td_write(td))
-		data->stat_ddir = DDIR_WRITE;
-
-	td->io_ops_data = data;
-	return 0;
-}
-
-static int delete_invalidate(struct thread_data *td, struct fio_file *f)
-{
-    /* do nothing because file not opened */
-    return 0;
-}
-
-static void cleanup(struct thread_data *td)
-{
-	struct fc_data *data = td->io_ops_data;
-
-	free(data);
-}
-
-static struct ioengine_ops ioengine = {
-	.name		= "filedelete",
-	.version	= FIO_IOOPS_VERSION,
-	.init		= init,
-	.invalidate	= delete_invalidate,
-	.cleanup	= cleanup,
-	.queue		= queue_io,
-	.get_file_size	= generic_get_file_size,
-	.open_file	= delete_file,
-	.flags		=  FIO_SYNCIO | FIO_FAKEIO |
-				FIO_NOSTATS | FIO_NOFILEHASH,
-};
-
-static void fio_init fio_filedelete_register(void)
-{
-	register_ioengine(&ioengine);
-}
-
-static void fio_exit fio_filedelete_unregister(void)
-{
-	unregister_ioengine(&ioengine);
-}
diff --git a/engines/fileoperations.c b/engines/fileoperations.c
new file mode 100644
index 00000000..1db60da1
--- /dev/null
+++ b/engines/fileoperations.c
@@ -0,0 +1,318 @@
+/*
+ * fileoperations engine
+ *
+ * IO engine that doesn't do any IO, just operates files and tracks the latency
+ * of the file operation.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include "../fio.h"
+#include "../optgroup.h"
+#include "../oslib/statx.h"
+
+
+struct fc_data {
+	enum fio_ddir stat_ddir;
+};
+
+struct filestat_options {
+	void *pad;
+	unsigned int stat_type;
+};
+
+enum {
+	FIO_FILESTAT_STAT	= 1,
+	FIO_FILESTAT_LSTAT	= 2,
+	FIO_FILESTAT_STATX	= 3,
+};
+
+static struct fio_option options[] = {
+	{
+		.name	= "stat_type",
+		.lname	= "stat_type",
+		.type	= FIO_OPT_STR,
+		.off1	= offsetof(struct filestat_options, stat_type),
+		.help	= "Specify stat system call type to measure lookup/getattr performance",
+		.def	= "stat",
+		.posval = {
+			  { .ival = "stat",
+			    .oval = FIO_FILESTAT_STAT,
+			    .help = "Use stat(2)",
+			  },
+			  { .ival = "lstat",
+			    .oval = FIO_FILESTAT_LSTAT,
+			    .help = "Use lstat(2)",
+			  },
+			  { .ival = "statx",
+			    .oval = FIO_FILESTAT_STATX,
+			    .help = "Use statx(2) if exists",
+			  },
+		},
+		.category = FIO_OPT_C_ENGINE,
+		.group	= FIO_OPT_G_FILESTAT,
+	},
+	{
+		.name	= NULL,
+	},
+};
+
+
+static int open_file(struct thread_data *td, struct fio_file *f)
+{
+	struct timespec start;
+	int do_lat = !td->o.disable_lat;
+
+	dprint(FD_FILE, "fd open %s\n", f->file_name);
+
+	if (f->filetype != FIO_TYPE_FILE) {
+		log_err("fio: only files are supported\n");
+		return 1;
+	}
+	if (!strcmp(f->file_name, "-")) {
+		log_err("fio: can't read/write to stdin/out\n");
+		return 1;
+	}
+
+	if (do_lat)
+		fio_gettime(&start, NULL);
+
+	f->fd = open(f->file_name, O_CREAT|O_RDWR, 0600);
+
+	if (f->fd == -1) {
+		char buf[FIO_VERROR_SIZE];
+		int e = errno;
+
+		snprintf(buf, sizeof(buf), "open(%s)", f->file_name);
+		td_verror(td, e, buf);
+		return 1;
+	}
+
+	if (do_lat) {
+		struct fc_data *data = td->io_ops_data;
+		uint64_t nsec;
+
+		nsec = ntime_since_now(&start);
+		add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, 0);
+	}
+
+	return 0;
+}
+
+static int stat_file(struct thread_data *td, struct fio_file *f)
+{
+	struct filestat_options *o = td->eo;
+	struct timespec start;
+	int do_lat = !td->o.disable_lat;
+	struct stat statbuf;
+#ifndef WIN32
+	struct statx statxbuf;
+	char *abspath;
+#endif
+	int ret;
+
+	dprint(FD_FILE, "fd stat %s\n", f->file_name);
+
+	if (f->filetype != FIO_TYPE_FILE) {
+		log_err("fio: only files are supported\n");
+		return 1;
+	}
+	if (!strcmp(f->file_name, "-")) {
+		log_err("fio: can't read/write to stdin/out\n");
+		return 1;
+	}
+
+	if (do_lat)
+		fio_gettime(&start, NULL);
+
+	switch (o->stat_type) {
+	case FIO_FILESTAT_STAT:
+		ret = stat(f->file_name, &statbuf);
+		break;
+	case FIO_FILESTAT_LSTAT:
+		ret = lstat(f->file_name, &statbuf);
+		break;
+	case FIO_FILESTAT_STATX:
+#ifndef WIN32
+		abspath = realpath(f->file_name, NULL);
+		if (abspath) {
+			ret = statx(-1, abspath, 0, STATX_ALL, &statxbuf);
+			free(abspath);
+		} else
+			ret = -1;
+#else
+		ret = -1;
+#endif
+		break;
+	default:
+		ret = -1;
+		break;
+	}
+
+	if (ret == -1) {
+		char buf[FIO_VERROR_SIZE];
+		int e = errno;
+
+		snprintf(buf, sizeof(buf), "stat(%s) type=%u", f->file_name,
+			o->stat_type);
+		td_verror(td, e, buf);
+		return 1;
+	}
+
+	if (do_lat) {
+		struct fc_data *data = td->io_ops_data;
+		uint64_t nsec;
+
+		nsec = ntime_since_now(&start);
+		add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, 0);
+	}
+
+	return 0;
+}
+
+
+static int delete_file(struct thread_data *td, struct fio_file *f)
+{
+	struct timespec start;
+	int do_lat = !td->o.disable_lat;
+	int ret;
+
+	dprint(FD_FILE, "fd delete %s\n", f->file_name);
+
+	if (f->filetype != FIO_TYPE_FILE) {
+		log_err("fio: only files are supported\n");
+		return 1;
+	}
+	if (!strcmp(f->file_name, "-")) {
+		log_err("fio: can't read/write to stdin/out\n");
+		return 1;
+	}
+
+	if (do_lat)
+		fio_gettime(&start, NULL);
+
+	ret = unlink(f->file_name);
+
+	if (ret == -1) {
+		char buf[FIO_VERROR_SIZE];
+		int e = errno;
+
+		snprintf(buf, sizeof(buf), "delete(%s)", f->file_name);
+		td_verror(td, e, buf);
+		return 1;
+	}
+
+	if (do_lat) {
+		struct fc_data *data = td->io_ops_data;
+		uint64_t nsec;
+
+		nsec = ntime_since_now(&start);
+		add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, 0);
+	}
+
+	return 0;
+}
+
+static int invalidate_do_nothing(struct thread_data *td, struct fio_file *f)
+{
+	/* do nothing because file not opened */
+	return 0;
+}
+
+static enum fio_q_status queue_io(struct thread_data *td, struct io_u *io_u)
+{
+	return FIO_Q_COMPLETED;
+}
+
+/*
+ * Ensure that we at least have a block size worth of IO to do for each
+ * file. If the job file has td->o.size < nr_files * block_size, then
+ * fio won't do anything.
+ */
+static int get_file_size(struct thread_data *td, struct fio_file *f)
+{
+	f->real_file_size = td_min_bs(td);
+	return 0;
+}
+
+static int init(struct thread_data *td)
+{
+	struct fc_data *data;
+
+	data = calloc(1, sizeof(*data));
+
+	if (td_read(td))
+		data->stat_ddir = DDIR_READ;
+	else if (td_write(td))
+		data->stat_ddir = DDIR_WRITE;
+
+	td->io_ops_data = data;
+	return 0;
+}
+
+static void cleanup(struct thread_data *td)
+{
+	struct fc_data *data = td->io_ops_data;
+
+	free(data);
+}
+
+static struct ioengine_ops ioengine_filecreate = {
+	.name		= "filecreate",
+	.version	= FIO_IOOPS_VERSION,
+	.init		= init,
+	.cleanup	= cleanup,
+	.queue		= queue_io,
+	.get_file_size	= get_file_size,
+	.open_file	= open_file,
+	.close_file	= generic_close_file,
+	.flags		= FIO_DISKLESSIO | FIO_SYNCIO | FIO_FAKEIO |
+				FIO_NOSTATS | FIO_NOFILEHASH,
+};
+
+static struct ioengine_ops ioengine_filestat = {
+	.name		= "filestat",
+	.version	= FIO_IOOPS_VERSION,
+	.init		= init,
+	.cleanup	= cleanup,
+	.queue		= queue_io,
+	.invalidate	= invalidate_do_nothing,
+	.get_file_size	= generic_get_file_size,
+	.open_file	= stat_file,
+	.flags		=  FIO_SYNCIO | FIO_FAKEIO |
+				FIO_NOSTATS | FIO_NOFILEHASH,
+	.options	= options,
+	.option_struct_size = sizeof(struct filestat_options),
+};
+
+static struct ioengine_ops ioengine_filedelete = {
+	.name		= "filedelete",
+	.version	= FIO_IOOPS_VERSION,
+	.init		= init,
+	.invalidate	= invalidate_do_nothing,
+	.cleanup	= cleanup,
+	.queue		= queue_io,
+	.get_file_size	= generic_get_file_size,
+	.open_file	= delete_file,
+	.flags		=  FIO_SYNCIO | FIO_FAKEIO |
+				FIO_NOSTATS | FIO_NOFILEHASH,
+};
+
+
+static void fio_init fio_fileoperations_register(void)
+{
+	register_ioengine(&ioengine_filecreate);
+	register_ioengine(&ioengine_filestat);
+	register_ioengine(&ioengine_filedelete);
+}
+
+static void fio_exit fio_fileoperations_unregister(void)
+{
+	unregister_ioengine(&ioengine_filecreate);
+	unregister_ioengine(&ioengine_filestat);
+	unregister_ioengine(&ioengine_filedelete);
+}
diff --git a/engines/filestat.c b/engines/filestat.c
deleted file mode 100644
index e587eb54..00000000
--- a/engines/filestat.c
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * filestat engine
- *
- * IO engine that doesn't do any IO, just stat files and tracks the latency
- * of the file stat.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include "../fio.h"
-#include "../optgroup.h"
-#include "../oslib/statx.h"
-
-struct fc_data {
-	enum fio_ddir stat_ddir;
-};
-
-struct filestat_options {
-	void *pad;
-	unsigned int stat_type;
-};
-
-enum {
-	FIO_FILESTAT_STAT	= 1,
-	FIO_FILESTAT_LSTAT	= 2,
-	FIO_FILESTAT_STATX	= 3,
-};
-
-static struct fio_option options[] = {
-	{
-		.name	= "stat_type",
-		.lname	= "stat_type",
-		.type	= FIO_OPT_STR,
-		.off1	= offsetof(struct filestat_options, stat_type),
-		.help	= "Specify stat system call type to measure lookup/getattr performance",
-		.def	= "stat",
-		.posval = {
-			  { .ival = "stat",
-			    .oval = FIO_FILESTAT_STAT,
-			    .help = "Use stat(2)",
-			  },
-			  { .ival = "lstat",
-			    .oval = FIO_FILESTAT_LSTAT,
-			    .help = "Use lstat(2)",
-			  },
-			  { .ival = "statx",
-			    .oval = FIO_FILESTAT_STATX,
-			    .help = "Use statx(2) if exists",
-			  },
-		},
-		.category = FIO_OPT_C_ENGINE,
-		.group	= FIO_OPT_G_FILESTAT,
-	},
-	{
-		.name	= NULL,
-	},
-};
-
-static int stat_file(struct thread_data *td, struct fio_file *f)
-{
-	struct filestat_options *o = td->eo;
-	struct timespec start;
-	int do_lat = !td->o.disable_lat;
-	struct stat statbuf;
-#ifndef WIN32
-	struct statx statxbuf;
-	char *abspath;
-#endif
-	int ret;
-
-	dprint(FD_FILE, "fd stat %s\n", f->file_name);
-
-	if (f->filetype != FIO_TYPE_FILE) {
-		log_err("fio: only files are supported\n");
-		return 1;
-	}
-	if (!strcmp(f->file_name, "-")) {
-		log_err("fio: can't read/write to stdin/out\n");
-		return 1;
-	}
-
-	if (do_lat)
-		fio_gettime(&start, NULL);
-
-	switch (o->stat_type){
-	case FIO_FILESTAT_STAT:
-		ret = stat(f->file_name, &statbuf);
-		break;
-	case FIO_FILESTAT_LSTAT:
-		ret = lstat(f->file_name, &statbuf);
-		break;
-	case FIO_FILESTAT_STATX:
-#ifndef WIN32
-		abspath = realpath(f->file_name, NULL);
-		if (abspath) {
-			ret = statx(-1, abspath, 0, STATX_ALL, &statxbuf);
-			free(abspath);
-		} else
-			ret = -1;
-#else
-		ret = -1;
-#endif
-		break;
-	default:
-		ret = -1;
-		break;
-	}
-
-	if (ret == -1) {
-		char buf[FIO_VERROR_SIZE];
-		int e = errno;
-
-		snprintf(buf, sizeof(buf), "stat(%s) type=%u", f->file_name,
-			o->stat_type);
-		td_verror(td, e, buf);
-		return 1;
-	}
-
-	if (do_lat) {
-		struct fc_data *data = td->io_ops_data;
-		uint64_t nsec;
-
-		nsec = ntime_since_now(&start);
-		add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, 0);
-	}
-
-	return 0;
-}
-
-static enum fio_q_status queue_io(struct thread_data *td, struct io_u fio_unused *io_u)
-{
-	return FIO_Q_COMPLETED;
-}
-
-static int init(struct thread_data *td)
-{
-	struct fc_data *data;
-
-	data = calloc(1, sizeof(*data));
-
-	if (td_read(td))
-		data->stat_ddir = DDIR_READ;
-	else if (td_write(td))
-		data->stat_ddir = DDIR_WRITE;
-
-	td->io_ops_data = data;
-	return 0;
-}
-
-static void cleanup(struct thread_data *td)
-{
-	struct fc_data *data = td->io_ops_data;
-
-	free(data);
-}
-
-static int stat_invalidate(struct thread_data *td, struct fio_file *f)
-{
-	/* do nothing because file not opened */
-	return 0;
-}
-
-static struct ioengine_ops ioengine = {
-	.name		= "filestat",
-	.version	= FIO_IOOPS_VERSION,
-	.init		= init,
-	.cleanup	= cleanup,
-	.queue		= queue_io,
-	.invalidate	= stat_invalidate,
-	.get_file_size	= generic_get_file_size,
-	.open_file	= stat_file,
-	.flags		=  FIO_SYNCIO | FIO_FAKEIO |
-				FIO_NOSTATS | FIO_NOFILEHASH,
-	.options	= options,
-	.option_struct_size = sizeof(struct filestat_options),
-};
-
-static void fio_init fio_filestat_register(void)
-{
-	register_ioengine(&ioengine);
-}
-
-static void fio_exit fio_filestat_unregister(void)
-{
-	unregister_ioengine(&ioengine);
-}
diff --git a/engines/http.c b/engines/http.c
index 1de9e66c..56dc7d1b 100644
--- a/engines/http.c
+++ b/engines/http.c
@@ -57,6 +57,9 @@ struct http_options {
 	char *s3_key;
 	char *s3_keyid;
 	char *s3_region;
+	char *s3_sse_customer_key;
+	char *s3_sse_customer_algorithm;
+	char *s3_storage_class;
 	char *swift_auth_token;
 	int verbose;
 	unsigned int mode;
@@ -161,6 +164,36 @@ static struct fio_option options[] = {
 		.category = FIO_OPT_C_ENGINE,
 		.group    = FIO_OPT_G_HTTP,
 	},
+	{
+		.name     = "http_s3_sse_customer_key",
+		.lname    = "SSE Customer Key",
+		.type     = FIO_OPT_STR_STORE,
+		.help     = "S3 SSE Customer Key",
+		.off1     = offsetof(struct http_options, s3_sse_customer_key),
+		.def	  = "",
+		.category = FIO_OPT_C_ENGINE,
+		.group    = FIO_OPT_G_HTTP,
+	},
+	{
+		.name     = "http_s3_sse_customer_algorithm",
+		.lname    = "SSE Customer Algorithm",
+		.type     = FIO_OPT_STR_STORE,
+		.help     = "S3 SSE Customer Algorithm",
+		.off1     = offsetof(struct http_options, s3_sse_customer_algorithm),
+		.def	  = "AES256",
+		.category = FIO_OPT_C_ENGINE,
+		.group    = FIO_OPT_G_HTTP,
+	},
+	{
+		.name     = "http_s3_storage_class",
+		.lname    = "S3 Storage class",
+		.type     = FIO_OPT_STR_STORE,
+		.help     = "S3 Storage Class",
+		.off1     = offsetof(struct http_options, s3_storage_class),
+		.def	  = "STANDARD",
+		.category = FIO_OPT_C_ENGINE,
+		.group    = FIO_OPT_G_HTTP,
+	},
 	{
 		.name     = "http_mode",
 		.lname    = "Request mode to use",
@@ -266,6 +299,54 @@ static char *_gen_hex_md5(const char *p, size_t len)
 	return _conv_hex(hash, MD5_DIGEST_LENGTH);
 }
 
+static char *_conv_base64_encode(const unsigned char *p, size_t len)
+{
+	char *r, *ret;
+	int i;
+	static const char sEncodingTable[] = {
+		'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
+		'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
+		'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
+		'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
+		'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
+		'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
+		'w', 'x', 'y', 'z', '0', '1', '2', '3',
+		'4', '5', '6', '7', '8', '9', '+', '/'
+	};
+
+	size_t out_len = 4 * ((len + 2) / 3);
+	ret = r = malloc(out_len + 1);
+
+	for (i = 0; i < len - 2; i += 3) {
+		*r++ = sEncodingTable[(p[i] >> 2) & 0x3F];
+		*r++ = sEncodingTable[((p[i] & 0x3) << 4) | ((int) (p[i + 1] & 0xF0) >> 4)];
+		*r++ = sEncodingTable[((p[i + 1] & 0xF) << 2) | ((int) (p[i + 2] & 0xC0) >> 6)];
+		*r++ = sEncodingTable[p[i + 2] & 0x3F];
+	}
+
+	if (i < len) {
+		*r++ = sEncodingTable[(p[i] >> 2) & 0x3F];
+		if (i == (len - 1)) {
+			*r++ = sEncodingTable[((p[i] & 0x3) << 4)];
+			*r++ = '=';
+		} else {
+			*r++ = sEncodingTable[((p[i] & 0x3) << 4) | ((int) (p[i + 1] & 0xF0) >> 4)];
+			*r++ = sEncodingTable[((p[i + 1] & 0xF) << 2)];
+		}
+		*r++ = '=';
+	}
+
+	ret[out_len]=0;
+	return ret;
+}
+
+static char *_gen_base64_md5(const unsigned char *p, size_t len)
+{
+	unsigned char hash[MD5_DIGEST_LENGTH];
+	MD5((unsigned char*)p, len, hash);
+	return _conv_base64_encode(hash, MD5_DIGEST_LENGTH);
+}
+
 static void _hmac(unsigned char *md, void *key, int key_len, char *data) {
 #ifndef CONFIG_HAVE_OPAQUE_HMAC_CTX
 	HMAC_CTX _ctx;
@@ -335,8 +416,8 @@ static void _add_aws_auth_header(CURL *curl, struct curl_slist *slist, struct ht
 	char date_iso[32];
 	char method[8];
 	char dkey[128];
-	char creq[512];
-	char sts[256];
+	char creq[4096];
+	char sts[512];
 	char s[512];
 	char *uri_encoded = NULL;
 	char *dsha = NULL;
@@ -345,6 +426,9 @@ static void _add_aws_auth_header(CURL *curl, struct curl_slist *slist, struct ht
 	const char *service = "s3";
 	const char *aws = "aws4_request";
 	unsigned char md[SHA256_DIGEST_LENGTH];
+	unsigned char sse_key[33] = {0};
+	char *sse_key_base64 = NULL;
+	char *sse_key_md5_base64 = NULL;
 
 	time_t t = time(NULL);
 	struct tm *gtm = gmtime(&t);
@@ -353,6 +437,9 @@ static void _add_aws_auth_header(CURL *curl, struct curl_slist *slist, struct ht
 	strftime (date_iso, sizeof(date_iso), "%Y%m%dT%H%M%SZ", gtm);
 	uri_encoded = _aws_uriencode(uri);
 
+	if (o->s3_sse_customer_key != NULL)
+		strncpy((char*)sse_key, o->s3_sse_customer_key, sizeof(sse_key) - 1);
+
 	if (op == DDIR_WRITE) {
 		dsha = _gen_hex_sha256(buf, len);
 		sprintf(method, "PUT");
@@ -366,22 +453,50 @@ static void _add_aws_auth_header(CURL *curl, struct curl_slist *slist, struct ht
 	}
 
 	/* Create the canonical request first */
-	snprintf(creq, sizeof(creq),
-	"%s\n"
-	"%s\n"
-	"\n"
-	"host:%s\n"
-	"x-amz-content-sha256:%s\n"
-	"x-amz-date:%s\n"
-	"\n"
-	"host;x-amz-content-sha256;x-amz-date\n"
-	"%s"
-	, method
-	, uri_encoded, o->host, dsha, date_iso, dsha);
+	if (sse_key[0] != '\0') {
+		sse_key_base64 = _conv_base64_encode(sse_key, sizeof(sse_key) - 1);
+		sse_key_md5_base64 = _gen_base64_md5(sse_key, sizeof(sse_key) - 1);
+		snprintf(creq, sizeof(creq),
+			"%s\n"
+			"%s\n"
+			"\n"
+			"host:%s\n"
+			"x-amz-content-sha256:%s\n"
+			"x-amz-date:%s\n"
+			"x-amz-server-side-encryption-customer-algorithm:%s\n"
+			"x-amz-server-side-encryption-customer-key:%s\n"
+			"x-amz-server-side-encryption-customer-key-md5:%s\n"
+			"x-amz-storage-class:%s\n"
+			"\n"
+			"host;x-amz-content-sha256;x-amz-date;"
+			"x-amz-server-side-encryption-customer-algorithm;"
+			"x-amz-server-side-encryption-customer-key;"
+			"x-amz-server-side-encryption-customer-key-md5;"
+			"x-amz-storage-class\n"
+			"%s"
+			, method
+			, uri_encoded, o->host, dsha, date_iso
+			, o->s3_sse_customer_algorithm, sse_key_base64
+			, sse_key_md5_base64, o->s3_storage_class, dsha);
+	} else {
+		snprintf(creq, sizeof(creq),
+			"%s\n"
+			"%s\n"
+			"\n"
+			"host:%s\n"
+			"x-amz-content-sha256:%s\n"
+			"x-amz-date:%s\n"
+			"x-amz-storage-class:%s\n"
+			"\n"
+			"host;x-amz-content-sha256;x-amz-date;x-amz-storage-class\n"
+			"%s"
+			, method
+			, uri_encoded, o->host, dsha, date_iso, o->s3_storage_class, dsha);
+	}
 
 	csha = _gen_hex_sha256(creq, strlen(creq));
 	snprintf(sts, sizeof(sts), "AWS4-HMAC-SHA256\n%s\n%s/%s/%s/%s\n%s",
-		date_iso, date_short, o->s3_region, service, aws, csha);
+			date_iso, date_short, o->s3_region, service, aws, csha);
 
 	snprintf((char *)dkey, sizeof(dkey), "AWS4%s", o->s3_key);
 	_hmac(md, dkey, strlen(dkey), date_short);
@@ -401,9 +516,32 @@ static void _add_aws_auth_header(CURL *curl, struct curl_slist *slist, struct ht
 	snprintf(s, sizeof(s), "x-amz-date: %s", date_iso);
 	slist = curl_slist_append(slist, s);
 
-	snprintf(s, sizeof(s), "Authorization: AWS4-HMAC-SHA256 Credential=%s/%s/%s/s3/aws4_request,"
-	"SignedHeaders=host;x-amz-content-sha256;x-amz-date,Signature=%s",
-	o->s3_keyid, date_short, o->s3_region, signature);
+	if (sse_key[0] != '\0') {
+		snprintf(s, sizeof(s), "x-amz-server-side-encryption-customer-algorithm: %s", o->s3_sse_customer_algorithm);
+		slist = curl_slist_append(slist, s);
+		snprintf(s, sizeof(s), "x-amz-server-side-encryption-customer-key: %s", sse_key_base64);
+		slist = curl_slist_append(slist, s);
+		snprintf(s, sizeof(s), "x-amz-server-side-encryption-customer-key-md5: %s", sse_key_md5_base64);
+		slist = curl_slist_append(slist, s);
+	}
+
+	snprintf(s, sizeof(s), "x-amz-storage-class: %s", o->s3_storage_class);
+	slist = curl_slist_append(slist, s);
+
+	if (sse_key[0] != '\0') {
+		snprintf(s, sizeof(s), "Authorization: AWS4-HMAC-SHA256 Credential=%s/%s/%s/s3/aws4_request,"
+			"SignedHeaders=host;x-amz-content-sha256;"
+			"x-amz-date;x-amz-server-side-encryption-customer-algorithm;"
+			"x-amz-server-side-encryption-customer-key;"
+			"x-amz-server-side-encryption-customer-key-md5;"
+			"x-amz-storage-class,"
+			"Signature=%s",
+		o->s3_keyid, date_short, o->s3_region, signature);
+	} else {
+		snprintf(s, sizeof(s), "Authorization: AWS4-HMAC-SHA256 Credential=%s/%s/%s/s3/aws4_request,"
+			"SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-storage-class,Signature=%s",
+			o->s3_keyid, date_short, o->s3_region, signature);
+	}
 	slist = curl_slist_append(slist, s);
 
 	curl_easy_setopt(curl, CURLOPT_HTTPHEADER, slist);
@@ -412,6 +550,10 @@ static void _add_aws_auth_header(CURL *curl, struct curl_slist *slist, struct ht
 	free(csha);
 	free(dsha);
 	free(signature);
+	if (sse_key_base64 != NULL) {
+		free(sse_key_base64);
+		free(sse_key_md5_base64);
+	}
 }
 
 static void _add_swift_header(CURL *curl, struct curl_slist *slist, struct http_options *o,
diff --git a/examples/http-s3-crypto.fio b/examples/http-s3-crypto.fio
new file mode 100644
index 00000000..2403746e
--- /dev/null
+++ b/examples/http-s3-crypto.fio
@@ -0,0 +1,38 @@
+# Example test for the HTTP engine's S3 support against Amazon AWS.
+# Obviously, you have to adjust the S3 credentials; for this example,
+# they're passed in via the environment.
+# And you can set the SSE Customer Key and Algorithm to test Server
+# Side Encryption.
+#
+
+[global]
+ioengine=http
+name=test
+direct=1
+filename=/larsmb-fio-test/object
+http_verbose=0
+https=on
+http_mode=s3
+http_s3_key=${S3_KEY}
+http_s3_keyid=${S3_ID}
+http_host=s3.eu-central-1.amazonaws.com
+http_s3_region=eu-central-1
+http_s3_sse_customer_key=${SSE_KEY}
+http_s3_sse_customer_algorithm=AES256
+group_reporting
+
+# With verify, this both writes and reads the object
+[create]
+rw=write
+bs=4k
+size=64k
+io_size=4k
+verify=sha256
+
+[trim]
+stonewall
+rw=trim
+bs=4k
+size=64k
+io_size=4k
+
diff --git a/examples/http-s3-storage-class.fio b/examples/http-s3-storage-class.fio
new file mode 100644
index 00000000..9ee23837
--- /dev/null
+++ b/examples/http-s3-storage-class.fio
@@ -0,0 +1,37 @@
+# Example test for the HTTP engine's S3 support against Amazon AWS.
+# Obviously, you have to adjust the S3 credentials; for this example,
+# they're passed in via the environment.
+# And here add storage class parameter, you can set normal test for
+# STANDARD and compression test for another storage class.
+#
+
+[global]
+ioengine=http
+name=test
+direct=1
+filename=/larsmb-fio-test/object
+http_verbose=0
+https=on
+http_mode=s3
+http_s3_key=${S3_KEY}
+http_s3_keyid=${S3_ID}
+http_host=s3.eu-central-1.amazonaws.com
+http_s3_region=eu-central-1
+http_s3_storage_class=${STORAGE_CLASS}
+group_reporting
+
+# With verify, this both writes and reads the object
+[create]
+rw=write
+bs=4k
+size=64k
+io_size=4k
+verify=sha256
+
+[trim]
+stonewall
+rw=trim
+bs=4k
+size=64k
+io_size=4k
+
diff --git a/fio.1 b/fio.1
index ce9bf3ef..6630525f 100644
--- a/fio.1
+++ b/fio.1
@@ -2308,6 +2308,15 @@ The S3 secret key.
 .BI (http)http_s3_keyid \fR=\fPstr
 The S3 key/access id.
 .TP
+.BI (http)http_s3_sse_customer_key \fR=\fPstr
+The encryption customer key in SSE server side.
+.TP
+.BI (http)http_s3_sse_customer_algorithm \fR=\fPstr
+The encryption customer algorithm in SSE server side. Default is \fBAES256\fR
+.TP
+.BI (http)http_s3_storage_class \fR=\fPstr
+Which storage class to access. User-customizable settings. Default is \fBSTANDARD\fR
+.TP
 .BI (http)http_swift_auth_token \fR=\fPstr
 The Swift auth token. See the example configuration file on how to
 retrieve this.

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-08-11 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-08-11 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 6cafe8445fd1e04e5f7d67bbc73029a538d1b253:

  Fio 3.31 (2022-08-09 14:41:25 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 9dc528b1638b625b5e167983a74de4e85c5859ea:

  lib/rand: get rid of unused MAX_SEED_BUCKETS (2022-08-10 09:51:49 -0600)

----------------------------------------------------------------
Jens Axboe (2):
      Merge branch 'multi_seed_refill' of https://github.com/sungup/fio
      lib/rand: get rid of unused MAX_SEED_BUCKETS

Sungup Moon (1):
      lib/rand: Enhance __fill_random_buf using the multi random seed

 configure  | 17 +++++++++++++++++
 lib/rand.c | 33 ++++++++++++++++++++++++++++++++-
 2 files changed, 49 insertions(+), 1 deletion(-)

---

Diff of recent changes:

diff --git a/configure b/configure
index 36450df8..a2b9bd4c 100755
--- a/configure
+++ b/configure
@@ -116,6 +116,10 @@ has() {
   type "$1" >/dev/null 2>&1
 }
 
+num() {
+  echo "$1" | grep -P -q "^[0-9]+$"
+}
+
 check_define() {
   cat > $TMPC <<EOF
 #if !defined($1)
@@ -174,6 +178,7 @@ libnfs=""
 xnvme=""
 libzbc=""
 dfs=""
+seed_buckets=""
 dynamic_engines="no"
 prefix=/usr/local
 
@@ -255,6 +260,8 @@ for opt do
   ;;
   --enable-asan) asan="yes"
   ;;
+  --seed-buckets=*) seed_buckets="$optarg"
+  ;;
   --help)
     show_help="yes"
     ;;
@@ -302,6 +309,7 @@ if test "$show_help" = "yes" ; then
   echo "--dynamic-libengines    Lib-based ioengines as dynamic libraries"
   echo "--disable-dfs           Disable DAOS File System support even if found"
   echo "--enable-asan           Enable address sanitizer"
+  echo "--seed-buckets=         Number of seed buckets for the refill-buffer"
   exit $exit_val
 fi
 
@@ -3273,6 +3281,15 @@ if test "$disable_tcmalloc" != "yes"; then
   fi
 fi
 print_config "TCMalloc support" "$tcmalloc"
+if ! num "$seed_buckets"; then
+  seed_buckets=4
+elif test "$seed_buckets" -lt 2; then
+  seed_buckets=2
+elif test "$seed_buckets" -gt 16; then
+  seed_buckets=16
+fi
+echo "#define CONFIG_SEED_BUCKETS $seed_buckets" >> $config_host_h
+print_config "seed_buckets" "$seed_buckets"
 
 echo "LIBS+=$LIBS" >> $config_host_mak
 echo "GFIO_LIBS+=$GFIO_LIBS" >> $config_host_mak
diff --git a/lib/rand.c b/lib/rand.c
index 1e669116..0e787a62 100644
--- a/lib/rand.c
+++ b/lib/rand.c
@@ -95,7 +95,7 @@ void init_rand_seed(struct frand_state *state, uint64_t seed, bool use64)
 		__init_rand64(&state->state64, seed);
 }
 
-void __fill_random_buf(void *buf, unsigned int len, uint64_t seed)
+void __fill_random_buf_small(void *buf, unsigned int len, uint64_t seed)
 {
 	uint64_t *b = buf;
 	uint64_t *e = b  + len / sizeof(*b);
@@ -110,6 +110,37 @@ void __fill_random_buf(void *buf, unsigned int len, uint64_t seed)
 		__builtin_memcpy(e, &seed, rest);
 }
 
+void __fill_random_buf(void *buf, unsigned int len, uint64_t seed)
+{
+	static uint64_t prime[] = {1, 2, 3, 5, 7, 11, 13, 17,
+				   19, 23, 29, 31, 37, 41, 43, 47};
+	uint64_t *b, *e, s[CONFIG_SEED_BUCKETS];
+	unsigned int rest;
+	int p;
+
+	/*
+	 * Calculate the max index which is multiples of the seed buckets.
+	 */
+	rest = (len / sizeof(*b) / CONFIG_SEED_BUCKETS) * CONFIG_SEED_BUCKETS;
+
+	b = buf;
+	e = b + rest;
+
+	rest = len - (rest * sizeof(*b));
+
+	for (p = 0; p < CONFIG_SEED_BUCKETS; p++)
+		s[p] = seed * prime[p];
+
+	for (; b != e; b += CONFIG_SEED_BUCKETS) {
+		for (p = 0; p < CONFIG_SEED_BUCKETS; ++p) {
+			b[p] = s[p];
+			s[p] = __hash_u64(s[p]);
+		}
+	}
+
+	__fill_random_buf_small(b, rest, s[0]);
+}
+
 uint64_t fill_random_buf(struct frand_state *fs, void *buf,
 			 unsigned int len)
 {

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-08-10 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-08-10 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit de31fe9ab3dd6115cd0d5c77354f67f06595570d:

  testing: add test for slat + clat = tlat (2022-08-07 12:27:55 -0400)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 6cafe8445fd1e04e5f7d67bbc73029a538d1b253:

  Fio 3.31 (2022-08-09 14:41:25 -0600)

----------------------------------------------------------------
Jens Axboe (2):
      Merge branch 'master' of ssh://git.kernel.dk/data/git/fio
      Fio 3.31

Vincent Fu (2):
      ci: upload tagged AppVeyor installers as GitHub releases
      ci: drop master branch requirement for AppVeyor releases

 .appveyor.yml   | 12 ++++++++++++
 FIO-VERSION-GEN |  2 +-
 2 files changed, 13 insertions(+), 1 deletion(-)

---

Diff of recent changes:

diff --git a/.appveyor.yml b/.appveyor.yml
index b94eefe3..92301ca9 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -50,5 +50,17 @@ after_build:
 test_script:
   - python.exe t/run-fio-tests.py --artifact-root test-artifacts --debug
 
+deploy:
+  - provider: GitHub
+    description: fio Windows installer
+    auth_token:                      # encrypted token from GitHub
+      secure: Tjj+xRQEV25P6dQgboUblTCKx/LtUOUav2bvzSCtwMhHMAxrrn2adod6nlTf0ItV
+    artifact: fio.msi                # upload installer to release assets
+    draft: false
+    prerelease: false
+    on:
+      APPVEYOR_REPO_TAG: true        # deploy on tag push only
+      DISTRO: cygwin
+
 on_finish:
   - 'bash.exe -lc "cd \"${APPVEYOR_BUILD_FOLDER}\" && [ -d test-artifacts ] && 7z a -t7z test-artifacts.7z test-artifacts -xr!foo.0.0 -xr!latency.?.0 -xr!fio_jsonplus_clat2csv.test && appveyor PushArtifact test-artifacts.7z'
diff --git a/FIO-VERSION-GEN b/FIO-VERSION-GEN
index fa64f50f..72630dd0 100755
--- a/FIO-VERSION-GEN
+++ b/FIO-VERSION-GEN
@@ -1,7 +1,7 @@
 #!/bin/sh
 
 GVF=FIO-VERSION-FILE
-DEF_VER=fio-3.30
+DEF_VER=fio-3.31
 
 LF='
 '

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-08-08 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-08-08 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit c08f9533042e909d4b4b12fdb8d14f1bc8e23dff:

  filesetup: use correct random seed for non-uniform distributions (2022-08-03 16:18:53 -0400)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to de31fe9ab3dd6115cd0d5c77354f67f06595570d:

  testing: add test for slat + clat = tlat (2022-08-07 12:27:55 -0400)

----------------------------------------------------------------
Vincent Fu (3):
      testing: add test for slat + clat = tlat
      engines/null: add FIO_ASYNCIO_SETS_ISSUE_TIME flag
      testing: add test for slat + clat = tlat

 engines/null.c            |  2 ++
 t/jobs/t0015-e78980ff.fio |  7 +++++++
 t/jobs/t0016-259ebc00.fio |  7 +++++++
 t/run-fio-tests.py        | 41 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 57 insertions(+)
 create mode 100644 t/jobs/t0015-e78980ff.fio
 create mode 100644 t/jobs/t0016-259ebc00.fio

---

Diff of recent changes:

diff --git a/engines/null.c b/engines/null.c
index 2df56718..68759c26 100644
--- a/engines/null.c
+++ b/engines/null.c
@@ -113,9 +113,11 @@ static struct null_data *null_init(struct thread_data *td)
 	if (td->o.iodepth != 1) {
 		nd->io_us = (struct io_u **) malloc(td->o.iodepth * sizeof(struct io_u *));
 		memset(nd->io_us, 0, td->o.iodepth * sizeof(struct io_u *));
+		td->io_ops->flags |= FIO_ASYNCIO_SETS_ISSUE_TIME;
 	} else
 		td->io_ops->flags |= FIO_SYNCIO;
 
+	td_set_ioengine_flags(td);
 	return nd;
 }
 
diff --git a/t/jobs/t0015-e78980ff.fio b/t/jobs/t0015-e78980ff.fio
new file mode 100644
index 00000000..c650c0b2
--- /dev/null
+++ b/t/jobs/t0015-e78980ff.fio
@@ -0,0 +1,7 @@
+# Expected result: mean(slat) + mean(clat) = mean(lat)
+# Buggy result: equality does not hold
+
+[test]
+ioengine=libaio
+size=1M
+iodepth=16
diff --git a/t/jobs/t0016-259ebc00.fio b/t/jobs/t0016-259ebc00.fio
new file mode 100644
index 00000000..1b418e7c
--- /dev/null
+++ b/t/jobs/t0016-259ebc00.fio
@@ -0,0 +1,7 @@
+# Expected result: mean(slat) + mean(clat) = mean(lat)
+# Buggy result: equality does not hold
+
+[test]
+ioengine=null
+size=1M
+iodepth=16
diff --git a/t/run-fio-tests.py b/t/run-fio-tests.py
index 32cdbc19..d77f20e0 100755
--- a/t/run-fio-tests.py
+++ b/t/run-fio-tests.py
@@ -527,6 +527,27 @@ class FioJobTest_t0014(FioJobTest):
             return
 
 
+class FioJobTest_t0015(FioJobTest):
+    """Test consists of fio test jobs t0015 and t0016
+    Confirm that mean(slat) + mean(clat) = mean(tlat)"""
+
+    def check_result(self):
+        super(FioJobTest_t0015, self).check_result()
+
+        if not self.passed:
+            return
+
+        slat = self.json_data['jobs'][0]['read']['slat_ns']['mean']
+        clat = self.json_data['jobs'][0]['read']['clat_ns']['mean']
+        tlat = self.json_data['jobs'][0]['read']['lat_ns']['mean']
+        logging.debug('Test %d: slat %f, clat %f, tlat %f', self.testnum, slat, clat, tlat)
+
+        if abs(slat + clat - tlat) > 1:
+            self.failure_reason = "{0} slat {1} + clat {2} = {3} != tlat {4},".format(
+                self.failure_reason, slat, clat, slat+clat, tlat)
+            self.passed = False
+
+
 class FioJobTest_iops_rate(FioJobTest):
     """Test consists of fio test job t0009
     Confirm that job0 iops == 1000
@@ -816,6 +837,26 @@ TEST_LIST = [
         'output_format':    'json',
         'requirements':     [],
     },
+    {
+        'test_id':          15,
+        'test_class':       FioJobTest_t0015,
+        'job':              't0015-e78980ff.fio',
+        'success':          SUCCESS_DEFAULT,
+        'pre_job':          None,
+        'pre_success':      None,
+        'output_format':    'json',
+        'requirements':     [Requirements.linux, Requirements.libaio],
+    },
+    {
+        'test_id':          16,
+        'test_class':       FioJobTest_t0015,
+        'job':              't0016-259ebc00.fio',
+        'success':          SUCCESS_DEFAULT,
+        'pre_job':          None,
+        'pre_success':      None,
+        'output_format':    'json',
+        'requirements':     [],
+    },
     {
         'test_id':          1000,
         'test_class':       FioExeTest,

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-08-04 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-08-04 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 7006d70c7c8b9a39cf3dfdd839d1975295c10527:

  Merge branch 'io_uring-numa' (2022-08-02 10:20:31 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to c08f9533042e909d4b4b12fdb8d14f1bc8e23dff:

  filesetup: use correct random seed for non-uniform distributions (2022-08-03 16:18:53 -0400)

----------------------------------------------------------------
Vincent Fu (3):
      examples: fix ioengine in zbd-rand-write.fio
      engines/null: fill issue_time during commit
      filesetup: use correct random seed for non-uniform distributions

 engines/null.c              | 19 +++++++++++++++++++
 examples/zbd-rand-write.fio |  2 +-
 filesetup.c                 |  2 +-
 3 files changed, 21 insertions(+), 2 deletions(-)

---

Diff of recent changes:

diff --git a/engines/null.c b/engines/null.c
index 8dcd1b21..2df56718 100644
--- a/engines/null.c
+++ b/engines/null.c
@@ -44,9 +44,28 @@ static int null_getevents(struct null_data *nd, unsigned int min_events,
 	return ret;
 }
 
+static void null_queued(struct thread_data *td, struct null_data *nd)
+{
+	struct timespec now;
+
+	if (!fio_fill_issue_time(td))
+		return;
+
+	fio_gettime(&now, NULL);
+
+	for (int i = 0; i < nd->queued; i++) {
+		struct io_u *io_u = nd->io_us[i];
+
+		memcpy(&io_u->issue_time, &now, sizeof(now));
+		io_u_queued(td, io_u);
+	}
+}
+
 static int null_commit(struct thread_data *td, struct null_data *nd)
 {
 	if (!nd->events) {
+		null_queued(td, nd);
+
 #ifndef FIO_EXTERNAL_ENGINE
 		io_u_mark_submit(td, nd->queued);
 #endif
diff --git a/examples/zbd-rand-write.fio b/examples/zbd-rand-write.fio
index 46cddd06..9494a583 100644
--- a/examples/zbd-rand-write.fio
+++ b/examples/zbd-rand-write.fio
@@ -1,4 +1,4 @@
-; Using the libaio ioengine, random write to a (zoned) block device,
+; Using the psync ioengine, random write to a (zoned) block device,
 ; writing at most 32 zones at a time. Target zones are chosen randomly
 ; and writes directed at the write pointer of the chosen zones
 
diff --git a/filesetup.c b/filesetup.c
index e0592209..3e2ccf9b 100644
--- a/filesetup.c
+++ b/filesetup.c
@@ -1495,7 +1495,7 @@ static void __init_rand_distribution(struct thread_data *td, struct fio_file *f)
 
 	seed = jhash(f->file_name, strlen(f->file_name), 0) * td->thread_number;
 	if (!td->o.rand_repeatable)
-		seed = td->rand_seeds[4];
+		seed = td->rand_seeds[FIO_RAND_BLOCK_OFF];
 
 	if (td->o.random_distribution == FIO_RAND_DIST_ZIPF)
 		zipf_init(&f->zipf, nranges, td->o.zipf_theta.u.f, td->o.random_center.u.f, seed);

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-08-03 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-08-03 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 55037c4839c65612fa388ae937e63661d8192ed9:

  t/io_uring: switch to GiB/sec if numbers get large (2022-07-31 12:06:12 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 7006d70c7c8b9a39cf3dfdd839d1975295c10527:

  Merge branch 'io_uring-numa' (2022-08-02 10:20:31 -0600)

----------------------------------------------------------------
Jens Axboe (2):
      t/io_uring: support NUMA placement
      Merge branch 'io_uring-numa'

 t/io_uring.c | 446 +++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 252 insertions(+), 194 deletions(-)

---

Diff of recent changes:

diff --git a/t/io_uring.c b/t/io_uring.c
index 335a06ed..35bf1956 100644
--- a/t/io_uring.c
+++ b/t/io_uring.c
@@ -11,6 +11,10 @@
 #include <libaio.h>
 #endif
 
+#ifdef CONFIG_LIBNUMA
+#include <numa.h>
+#endif
+
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/ioctl.h>
@@ -100,6 +104,9 @@ struct submitter {
 	io_context_t aio_ctx;
 #endif
 
+	int numa_node;
+	const char *filename;
+
 	struct file files[MAX_FDS];
 	unsigned nr_files;
 	unsigned cur_file;
@@ -110,6 +117,7 @@ static struct submitter *submitter;
 static volatile int finish;
 static int stats_running;
 static unsigned long max_iops;
+static long page_size;
 
 static int depth = DEPTH;
 static int batch_submit = BATCH_SUBMIT;
@@ -130,6 +138,7 @@ static int runtime = 0;		/* runtime */
 static int random_io = 1;	/* random or sequential IO */
 static int register_ring = 1;	/* register ring */
 static int use_sync = 0;	/* use preadv2 */
+static int numa_placement = 0;	/* set to node of device */
 
 static unsigned long tsc_rate;
 
@@ -611,12 +620,191 @@ static int reap_events_uring(struct submitter *s)
 	return reaped;
 }
 
+static void set_affinity(struct submitter *s)
+{
+#ifdef CONFIG_LIBNUMA
+	struct bitmask *mask;
+
+	if (s->numa_node == -1)
+		return;
+
+	numa_set_preferred(s->numa_node);
+
+	mask = numa_allocate_cpumask();
+	numa_node_to_cpus(s->numa_node, mask);
+	numa_sched_setaffinity(s->tid, mask);
+#endif
+}
+
+static int detect_node(struct submitter *s, const char *name)
+{
+#ifdef CONFIG_LIBNUMA
+	const char *base = basename(name);
+	char str[128];
+	int ret, fd, node;
+
+	sprintf(str, "/sys/block/%s/device/numa_node", base);
+	fd = open(str, O_RDONLY);
+	if (fd < 0)
+		return -1;
+
+	ret = read(fd, str, sizeof(str));
+	if (ret < 0) {
+		close(fd);
+		return -1;
+	}
+	node = atoi(str);
+	s->numa_node = node;
+	close(fd);
+#else
+	s->numa_node = -1;
+#endif
+	return 0;
+}
+
+static int setup_aio(struct submitter *s)
+{
+#ifdef CONFIG_LIBAIO
+	if (polled) {
+		fprintf(stderr, "aio does not support polled IO\n");
+		polled = 0;
+	}
+	if (sq_thread_poll) {
+		fprintf(stderr, "aio does not support SQPOLL IO\n");
+		sq_thread_poll = 0;
+	}
+	if (do_nop) {
+		fprintf(stderr, "aio does not support polled IO\n");
+		do_nop = 0;
+	}
+	if (fixedbufs || register_files) {
+		fprintf(stderr, "aio does not support registered files or buffers\n");
+		fixedbufs = register_files = 0;
+	}
+
+	return io_queue_init(roundup_pow2(depth), &s->aio_ctx);
+#else
+	fprintf(stderr, "Legacy AIO not available on this system/build\n");
+	errno = EINVAL;
+	return -1;
+#endif
+}
+
+static int setup_ring(struct submitter *s)
+{
+	struct io_sq_ring *sring = &s->sq_ring;
+	struct io_cq_ring *cring = &s->cq_ring;
+	struct io_uring_params p;
+	int ret, fd;
+	void *ptr;
+
+	memset(&p, 0, sizeof(p));
+
+	if (polled && !do_nop)
+		p.flags |= IORING_SETUP_IOPOLL;
+	if (sq_thread_poll) {
+		p.flags |= IORING_SETUP_SQPOLL;
+		if (sq_thread_cpu != -1) {
+			p.flags |= IORING_SETUP_SQ_AFF;
+			p.sq_thread_cpu = sq_thread_cpu;
+		}
+	}
+
+	fd = io_uring_setup(depth, &p);
+	if (fd < 0) {
+		perror("io_uring_setup");
+		return 1;
+	}
+	s->ring_fd = s->enter_ring_fd = fd;
+
+	io_uring_probe(fd);
+
+	if (fixedbufs) {
+		struct rlimit rlim;
+
+		rlim.rlim_cur = RLIM_INFINITY;
+		rlim.rlim_max = RLIM_INFINITY;
+		/* ignore potential error, not needed on newer kernels */
+		setrlimit(RLIMIT_MEMLOCK, &rlim);
+
+		ret = io_uring_register_buffers(s);
+		if (ret < 0) {
+			perror("io_uring_register_buffers");
+			return 1;
+		}
+
+		if (dma_map) {
+			ret = io_uring_map_buffers(s);
+			if (ret < 0) {
+				perror("io_uring_map_buffers");
+				return 1;
+			}
+		}
+	}
+
+	if (register_files) {
+		ret = io_uring_register_files(s);
+		if (ret < 0) {
+			perror("io_uring_register_files");
+			return 1;
+		}
+	}
+
+	ptr = mmap(0, p.sq_off.array + p.sq_entries * sizeof(__u32),
+			PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
+			IORING_OFF_SQ_RING);
+	sring->head = ptr + p.sq_off.head;
+	sring->tail = ptr + p.sq_off.tail;
+	sring->ring_mask = ptr + p.sq_off.ring_mask;
+	sring->ring_entries = ptr + p.sq_off.ring_entries;
+	sring->flags = ptr + p.sq_off.flags;
+	sring->array = ptr + p.sq_off.array;
+	sq_ring_mask = *sring->ring_mask;
+
+	s->sqes = mmap(0, p.sq_entries * sizeof(struct io_uring_sqe),
+			PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
+			IORING_OFF_SQES);
+
+	ptr = mmap(0, p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe),
+			PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
+			IORING_OFF_CQ_RING);
+	cring->head = ptr + p.cq_off.head;
+	cring->tail = ptr + p.cq_off.tail;
+	cring->ring_mask = ptr + p.cq_off.ring_mask;
+	cring->ring_entries = ptr + p.cq_off.ring_entries;
+	cring->cqes = ptr + p.cq_off.cqes;
+	cq_ring_mask = *cring->ring_mask;
+	return 0;
+}
+
+static void *allocate_mem(struct submitter *s, int size)
+{
+	void *buf;
+
+#ifdef CONFIG_LIBNUMA
+	if (s->numa_node != -1)
+		return numa_alloc_onnode(size, s->numa_node);
+#endif
+
+	if (posix_memalign(&buf, page_size, bs)) {
+		printf("failed alloc\n");
+		return NULL;
+	}
+
+	return buf;
+}
+
 static int submitter_init(struct submitter *s)
 {
-	int i, nr_batch;
+	int i, nr_batch, err;
+	static int init_printed;
+	char buf[80];
 
 	s->tid = gettid();
-	printf("submitter=%d, tid=%d\n", s->index, s->tid);
+	printf("submitter=%d, tid=%d, file=%s, node=%d\n", s->index, s->tid,
+							s->filename, s->numa_node);
+
+	set_affinity(s);
 
 	__init_rand64(&s->rand_state, pthread_self());
 	srand48(pthread_self());
@@ -624,6 +812,37 @@ static int submitter_init(struct submitter *s)
 	for (i = 0; i < MAX_FDS; i++)
 		s->files[i].fileno = i;
 
+	for (i = 0; i < roundup_pow2(depth); i++) {
+		void *buf;
+
+		buf = allocate_mem(s, bs);
+		if (!buf)
+			return 1;
+		s->iovecs[i].iov_base = buf;
+		s->iovecs[i].iov_len = bs;
+	}
+
+	if (use_sync) {
+		sprintf(buf, "Engine=preadv2\n");
+		err = 0;
+	} else if (!aio) {
+		err = setup_ring(s);
+		sprintf(buf, "Engine=io_uring, sq_ring=%d, cq_ring=%d\n", *s->sq_ring.ring_entries, *s->cq_ring.ring_entries);
+	} else {
+		sprintf(buf, "Engine=aio\n");
+		err = setup_aio(s);
+	}
+	if (err) {
+		printf("queue setup failed: %s, %d\n", strerror(errno), err);
+		return 1;
+	}
+
+	if (!init_printed) {
+		printf("polled=%d, fixedbufs=%d/%d, register_files=%d, buffered=%d, QD=%d\n", polled, fixedbufs, dma_map, register_files, buffered, depth);
+		printf("%s", buf);
+		init_printed = 1;
+	}
+
 	if (stats) {
 		nr_batch = roundup_pow2(depth / batch_submit);
 		if (nr_batch < 2)
@@ -1026,15 +1245,21 @@ static struct submitter *get_submitter(int offset)
 static void do_finish(const char *reason)
 {
 	int j;
+
 	printf("Exiting on %s\n", reason);
 	for (j = 0; j < nthreads; j++) {
 		struct submitter *s = get_submitter(j);
 		s->finish = 1;
 	}
-	if (max_iops > 100000)
-		printf("Maximum IOPS=%luK\n", max_iops / 1000);
-	else if (max_iops)
+	if (max_iops > 1000000) {
+		double miops = (double) max_iops / 1000000.0;
+		printf("Maximum IOPS=%.2fM\n", miops);
+	} else if (max_iops > 100000) {
+		double kiops = (double) max_iops / 1000.0;
+		printf("Maximum IOPS=%.2fK\n", kiops);
+	} else {
 		printf("Maximum IOPS=%lu\n", max_iops);
+	}
 	finish = 1;
 }
 
@@ -1058,144 +1283,6 @@ static void arm_sig_int(void)
 #endif
 }
 
-static int setup_aio(struct submitter *s)
-{
-#ifdef CONFIG_LIBAIO
-	if (polled) {
-		fprintf(stderr, "aio does not support polled IO\n");
-		polled = 0;
-	}
-	if (sq_thread_poll) {
-		fprintf(stderr, "aio does not support SQPOLL IO\n");
-		sq_thread_poll = 0;
-	}
-	if (do_nop) {
-		fprintf(stderr, "aio does not support polled IO\n");
-		do_nop = 0;
-	}
-	if (fixedbufs || register_files) {
-		fprintf(stderr, "aio does not support registered files or buffers\n");
-		fixedbufs = register_files = 0;
-	}
-
-	return io_queue_init(roundup_pow2(depth), &s->aio_ctx);
-#else
-	fprintf(stderr, "Legacy AIO not available on this system/build\n");
-	errno = EINVAL;
-	return -1;
-#endif
-}
-
-static int setup_ring(struct submitter *s)
-{
-	struct io_sq_ring *sring = &s->sq_ring;
-	struct io_cq_ring *cring = &s->cq_ring;
-	struct io_uring_params p;
-	int ret, fd;
-	void *ptr;
-
-	memset(&p, 0, sizeof(p));
-
-	if (polled && !do_nop)
-		p.flags |= IORING_SETUP_IOPOLL;
-	if (sq_thread_poll) {
-		p.flags |= IORING_SETUP_SQPOLL;
-		if (sq_thread_cpu != -1) {
-			p.flags |= IORING_SETUP_SQ_AFF;
-			p.sq_thread_cpu = sq_thread_cpu;
-		}
-	}
-
-	fd = io_uring_setup(depth, &p);
-	if (fd < 0) {
-		perror("io_uring_setup");
-		return 1;
-	}
-	s->ring_fd = s->enter_ring_fd = fd;
-
-	io_uring_probe(fd);
-
-	if (fixedbufs) {
-		struct rlimit rlim;
-
-		rlim.rlim_cur = RLIM_INFINITY;
-		rlim.rlim_max = RLIM_INFINITY;
-		/* ignore potential error, not needed on newer kernels */
-		setrlimit(RLIMIT_MEMLOCK, &rlim);
-
-		ret = io_uring_register_buffers(s);
-		if (ret < 0) {
-			perror("io_uring_register_buffers");
-			return 1;
-		}
-
-		if (dma_map) {
-			ret = io_uring_map_buffers(s);
-			if (ret < 0) {
-				perror("io_uring_map_buffers");
-				return 1;
-			}
-		}
-	}
-
-	if (register_files) {
-		ret = io_uring_register_files(s);
-		if (ret < 0) {
-			perror("io_uring_register_files");
-			return 1;
-		}
-	}
-
-	ptr = mmap(0, p.sq_off.array + p.sq_entries * sizeof(__u32),
-			PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
-			IORING_OFF_SQ_RING);
-	sring->head = ptr + p.sq_off.head;
-	sring->tail = ptr + p.sq_off.tail;
-	sring->ring_mask = ptr + p.sq_off.ring_mask;
-	sring->ring_entries = ptr + p.sq_off.ring_entries;
-	sring->flags = ptr + p.sq_off.flags;
-	sring->array = ptr + p.sq_off.array;
-	sq_ring_mask = *sring->ring_mask;
-
-	s->sqes = mmap(0, p.sq_entries * sizeof(struct io_uring_sqe),
-			PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
-			IORING_OFF_SQES);
-
-	ptr = mmap(0, p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe),
-			PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
-			IORING_OFF_CQ_RING);
-	cring->head = ptr + p.cq_off.head;
-	cring->tail = ptr + p.cq_off.tail;
-	cring->ring_mask = ptr + p.cq_off.ring_mask;
-	cring->ring_entries = ptr + p.cq_off.ring_entries;
-	cring->cqes = ptr + p.cq_off.cqes;
-	cq_ring_mask = *cring->ring_mask;
-	return 0;
-}
-
-static void file_depths(char *buf)
-{
-	bool prev = false;
-	char *p;
-	int i, j;
-
-	buf[0] = '\0';
-	p = buf;
-	for (j = 0; j < nthreads; j++) {
-		struct submitter *s = get_submitter(j);
-
-		for (i = 0; i < s->nr_files; i++) {
-			struct file *f = &s->files[i];
-
-			if (prev)
-				p += sprintf(p, " %d", f->pending_ios);
-			else
-				p += sprintf(p, "%d", f->pending_ios);
-			prev = true;
-		}
-	}
-}
-
 static void usage(char *argv, int status)
 {
 	char runtime_str[16];
@@ -1218,11 +1305,12 @@ static void usage(char *argv, int status)
 		" -R <bool> : Use random IO, default %d\n"
 		" -a <bool> : Use legacy aio, default %d\n"
 		" -S <bool> : Use sync IO (preadv2), default %d\n"
-		" -X <bool> : Use registered ring %d\n",
+		" -X <bool> : Use registered ring %d\n"
+		" -P <bool> : Automatically place on device home node %d\n",
 		argv, DEPTH, BATCH_SUBMIT, BATCH_COMPLETE, BS, polled,
 		fixedbufs, dma_map, register_files, nthreads, !buffered, do_nop,
 		stats, runtime == 0 ? "unlimited" : runtime_str, random_io, aio,
-		use_sync, register_ring);
+		use_sync, register_ring, numa_placement);
 	exit(status);
 }
 
@@ -1274,16 +1362,14 @@ int main(int argc, char *argv[])
 {
 	struct submitter *s;
 	unsigned long done, calls, reap;
-	int err, i, j, flags, fd, opt, threads_per_f, threads_rem = 0, nfiles;
-	long page_size;
+	int i, j, flags, fd, opt, threads_per_f, threads_rem = 0, nfiles;
 	struct file f;
-	char *fdepths;
 	void *ret;
 
 	if (!do_nop && argc < 2)
 		usage(argv[0], 1);
 
-	while ((opt = getopt(argc, argv, "d:s:c:b:p:B:F:n:N:O:t:T:a:r:D:R:X:S:h?")) != -1) {
+	while ((opt = getopt(argc, argv, "d:s:c:b:p:B:F:n:N:O:t:T:a:r:D:R:X:S:P:h?")) != -1) {
 		switch (opt) {
 		case 'a':
 			aio = !!atoi(optarg);
@@ -1361,6 +1447,9 @@ int main(int argc, char *argv[])
 			exit(1);
 #endif
 			break;
+		case 'P':
+			numa_placement = !!atoi(optarg);
+			break;
 		case 'h':
 		case '?':
 		default:
@@ -1383,6 +1472,7 @@ int main(int argc, char *argv[])
 				roundup_pow2(depth) * sizeof(struct iovec));
 	for (j = 0; j < nthreads; j++) {
 		s = get_submitter(j);
+		s->numa_node = -1;
 		s->index = j;
 		s->done = s->calls = s->reaps = 0;
 	}
@@ -1440,7 +1530,10 @@ int main(int argc, char *argv[])
 
 			memcpy(&s->files[s->nr_files], &f, sizeof(f));
 
-			printf("Added file %s (submitter %d)\n", argv[i], s->index);
+			if (numa_placement)
+				detect_node(s, argv[i]);
+
+			s->filename = argv[i];
 			s->nr_files++;
 		}
 		threads_rem--;
@@ -1454,43 +1547,6 @@ int main(int argc, char *argv[])
 	if (page_size < 0)
 		page_size = 4096;
 
-	for (j = 0; j < nthreads; j++) {
-		s = get_submitter(j);
-		for (i = 0; i < roundup_pow2(depth); i++) {
-			void *buf;
-
-			if (posix_memalign(&buf, page_size, bs)) {
-				printf("failed alloc\n");
-				return 1;
-			}
-			s->iovecs[i].iov_base = buf;
-			s->iovecs[i].iov_len = bs;
-		}
-	}
-
-	for (j = 0; j < nthreads; j++) {
-		s = get_submitter(j);
-
-		if (use_sync)
-			continue;
-		else if (!aio)
-			err = setup_ring(s);
-		else
-			err = setup_aio(s);
-		if (err) {
-			printf("ring setup failed: %s, %d\n", strerror(errno), err);
-			return 1;
-		}
-	}
-	s = get_submitter(0);
-	printf("polled=%d, fixedbufs=%d/%d, register_files=%d, buffered=%d, QD=%d\n", polled, fixedbufs, dma_map, register_files, buffered, depth);
-	if (use_sync)
-		printf("Engine=preadv2\n");
-	else if (!aio)
-		printf("Engine=io_uring, sq_ring=%d, cq_ring=%d\n", *s->sq_ring.ring_entries, *s->cq_ring.ring_entries);
-	else
-		printf("Engine=aio\n");
-
 	for (j = 0; j < nthreads; j++) {
 		s = get_submitter(j);
 		if (use_sync)
@@ -1503,7 +1559,6 @@ int main(int argc, char *argv[])
 #endif
 	}
 
-	fdepths = malloc(8 * s->nr_files * nthreads);
 	reap = calls = done = 0;
 	do {
 		unsigned long this_done = 0;
@@ -1535,16 +1590,20 @@ int main(int argc, char *argv[])
 			ipc = (this_reap - reap) / (this_call - calls);
 		} else
 			rpc = ipc = -1;
-		file_depths(fdepths);
 		iops = this_done - done;
 		if (bs > 1048576)
 			bw = iops * (bs / 1048576);
 		else
 			bw = iops / (1048576 / bs);
-		if (iops > 100000)
-			printf("IOPS=%luK, ", iops / 1000);
-		else
+		if (iops > 1000000) {
+			double miops = (double) iops / 1000000.0;
+			printf("IOPS=%.2fM, ", miops);
+		} else if (iops > 100000) {
+			double kiops = (double) iops / 1000.0;
+			printf("IOPS=%.2fK, ", kiops);
+		} else {
 			printf("IOPS=%lu, ", iops);
+		}
 		max_iops = max(max_iops, iops);
 		if (!do_nop) {
 			if (bw > 2000) {
@@ -1555,7 +1614,7 @@ int main(int argc, char *argv[])
 				printf("BW=%luMiB/s, ", bw);
 			}
 		}
-		printf("IOS/call=%ld/%ld, inflight=(%s)\n", rpc, ipc, fdepths);
+		printf("IOS/call=%ld/%ld\n", rpc, ipc);
 		done = this_done;
 		calls = this_call;
 		reap = this_reap;
@@ -1578,7 +1637,6 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	free(fdepths);
 	free(submitter);
 	return 0;
 }

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-08-01 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-08-01 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 3e1d3f2fc4a5f09174f0d6d70d036285d69f17c2:

  .github: add pull request template (2022-07-28 11:00:04 -0400)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 55037c4839c65612fa388ae937e63661d8192ed9:

  t/io_uring: switch to GiB/sec if numbers get large (2022-07-31 12:06:12 -0600)

----------------------------------------------------------------
Jens Axboe (1):
      t/io_uring: switch to GiB/sec if numbers get large

 t/io_uring.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

---

Diff of recent changes:

diff --git a/t/io_uring.c b/t/io_uring.c
index 10035912..335a06ed 100644
--- a/t/io_uring.c
+++ b/t/io_uring.c
@@ -1546,8 +1546,15 @@ int main(int argc, char *argv[])
 		else
 			printf("IOPS=%lu, ", iops);
 		max_iops = max(max_iops, iops);
-		if (!do_nop)
-			printf("BW=%luMiB/s, ", bw);
+		if (!do_nop) {
+			if (bw > 2000) {
+				double bw_g = (double) bw / 1000.0;
+
+				printf("BW=%.2fGiB/s, ", bw_g);
+			} else {
+				printf("BW=%luMiB/s, ", bw);
+			}
+		}
 		printf("IOS/call=%ld/%ld, inflight=(%s)\n", rpc, ipc, fdepths);
 		done = this_done;
 		calls = this_call;

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-07-29 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-07-29 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 5b99196735a245224ec9321f796a9da30654ae6c:

  README: add maintainer section (2022-07-27 21:04:31 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 3e1d3f2fc4a5f09174f0d6d70d036285d69f17c2:

  .github: add pull request template (2022-07-28 11:00:04 -0400)

----------------------------------------------------------------
Vincent Fu (1):
      .github: add pull request template

 .github/PULL_REQUEST_TEMPLATE.md | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 .github/PULL_REQUEST_TEMPLATE.md

---

Diff of recent changes:

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 00000000..4d98a694
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,8 @@
+Please confirm that your commit message(s) follow these guidelines:
+
+1. First line is a commit title, a descriptive one-liner for the change
+2. Empty second line
+3. Commit message body that explains why the change is useful. Break lines that
+   aren't something like a URL at 72-74 chars.
+4. Empty line
+5. Signed-off-by: Real Name <real@email.com>

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-07-28 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-07-28 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit dff32ddb97f2257975b6047474d665a5de7f7bbc:

  ci: install libnfs for linux and macos builds (2022-07-22 15:57:27 -0400)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 5b99196735a245224ec9321f796a9da30654ae6c:

  README: add maintainer section (2022-07-27 21:04:31 -0600)

----------------------------------------------------------------
Chris Weber (1):
      Fix multithread issues when operating on a single shared file

Jens Axboe (3):
      Merge branch 'proposed_fix' of https://github.com/weberc-ntap/fio
      Minor style fixups
      README: add maintainer section

 README.rst  | 11 +++++++++++
 backend.c   | 19 ++++++++++++++++++-
 file.h      |  1 +
 filesetup.c | 45 +++++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 73 insertions(+), 3 deletions(-)

---

Diff of recent changes:

diff --git a/README.rst b/README.rst
index 4d736eaf..67420903 100644
--- a/README.rst
+++ b/README.rst
@@ -81,6 +81,17 @@ benchmark/test tools out there weren't flexible enough to do what he wanted.
 Jens Axboe <axboe@kernel.dk> 20060905
 
 
+Maintainers
+-----------
+
+Fio is maintained by Jens Axboe <axboe@kernel.dk and
+Vincent Fu <vincentfu@gmail.com> - however, for reporting bugs please use
+the fio reflector or the GitHub page rather than email any of them
+directly. By using the public resources, others will be able to learn from
+the responses too. Chances are also good that other members will be able to
+help with your inquiry as well.
+
+
 Binary packages
 ---------------
 
diff --git a/backend.c b/backend.c
index e5bb4e25..5159b60d 100644
--- a/backend.c
+++ b/backend.c
@@ -2314,8 +2314,25 @@ static void run_threads(struct sk_out *sk_out)
 	for_each_td(td, i) {
 		print_status_init(td->thread_number - 1);
 
-		if (!td->o.create_serialize)
+		if (!td->o.create_serialize) {
+			/*
+			 *  When operating on a single rile in parallel,
+			 *  perform single-threaded early setup so that
+			 *  when setup_files() does not run into issues
+			 *  later.
+			*/
+			if (!i && td->o.nr_files == 1) {
+				if (setup_shared_file(td)) {
+					exit_value++;
+					if (td->error)
+						log_err("fio: pid=%d, err=%d/%s\n",
+							(int) td->pid, td->error, td->verror);
+					td_set_runstate(td, TD_REAPED);
+					todo--;
+				}
+			}
 			continue;
+		}
 
 		if (fio_verify_load_state(td))
 			goto reap;
diff --git a/file.h b/file.h
index da1b8947..e646cf22 100644
--- a/file.h
+++ b/file.h
@@ -201,6 +201,7 @@ struct thread_data;
 extern void close_files(struct thread_data *);
 extern void close_and_free_files(struct thread_data *);
 extern uint64_t get_start_offset(struct thread_data *, struct fio_file *);
+extern int __must_check setup_shared_file(struct thread_data *);
 extern int __must_check setup_files(struct thread_data *);
 extern int __must_check file_invalidate_cache(struct thread_data *, struct fio_file *);
 #ifdef __cplusplus
diff --git a/filesetup.c b/filesetup.c
index ab6c488b..e0592209 100644
--- a/filesetup.c
+++ b/filesetup.c
@@ -143,7 +143,7 @@ static int extend_file(struct thread_data *td, struct fio_file *f)
 	if (unlink_file || new_layout) {
 		int ret;
 
-		dprint(FD_FILE, "layout unlink %s\n", f->file_name);
+		dprint(FD_FILE, "layout %d unlink %d %s\n", new_layout, unlink_file, f->file_name);
 
 		ret = td_io_unlink_file(td, f);
 		if (ret != 0 && ret != ENOENT) {
@@ -198,6 +198,9 @@ static int extend_file(struct thread_data *td, struct fio_file *f)
 		}
 	}
 
+
+	dprint(FD_FILE, "fill file %s, size %llu\n", f->file_name, (unsigned long long) f->real_file_size);
+
 	left = f->real_file_size;
 	bs = td->o.max_bs[DDIR_WRITE];
 	if (bs > left)
@@ -1078,6 +1081,44 @@ static bool create_work_dirs(struct thread_data *td, const char *fname)
 	return true;
 }
 
+int setup_shared_file(struct thread_data *td)
+{
+	struct fio_file *f;
+	uint64_t file_size;
+	int err = 0;
+
+	if (td->o.nr_files > 1) {
+		log_err("fio: shared file setup called for multiple files\n");
+		return -1;
+	}
+
+	get_file_sizes(td);
+
+	f = td->files[0];
+
+	if (f == NULL) {
+		log_err("fio: NULL shared file\n");
+		return -1;
+	}
+
+	file_size = thread_number * td->o.size;
+	dprint(FD_FILE, "shared setup %s real_file_size=%llu, desired=%llu\n", 
+			f->file_name, (unsigned long long)f->real_file_size, (unsigned long long)file_size);
+
+	if (f->real_file_size < file_size) {
+		dprint(FD_FILE, "fio: extending shared file\n");
+		f->real_file_size = file_size;
+		err = extend_file(td, f);
+		if (!err)
+			err = __file_invalidate_cache(td, f, 0, f->real_file_size);
+		get_file_sizes(td);
+		dprint(FD_FILE, "shared setup new real_file_size=%llu\n", 
+				(unsigned long long)f->real_file_size);
+	}
+
+	return err;
+}
+
 /*
  * Open the files and setup files sizes, creating files if necessary.
  */
@@ -1092,7 +1133,7 @@ int setup_files(struct thread_data *td)
 	const unsigned long long bs = td_min_bs(td);
 	uint64_t fs = 0;
 
-	dprint(FD_FILE, "setup files\n");
+	dprint(FD_FILE, "setup files (thread_number=%d, subjob_number=%d)\n", td->thread_number, td->subjob_number);
 
 	old_state = td_bump_runstate(td, TD_SETTING_UP);
 

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-07-23 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-07-23 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 02a36caa69f5675f7144fbeddb7a32e1d35ce0c7:

  docs: clarify write_iolog description (2022-07-21 15:18:18 -0400)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to dff32ddb97f2257975b6047474d665a5de7f7bbc:

  ci: install libnfs for linux and macos builds (2022-07-22 15:57:27 -0400)

----------------------------------------------------------------
Vincent Fu (3):
      configure: cleanups for nfs ioengine
      engines/nfs: remove commit hook
      ci: install libnfs for linux and macos builds

 ci/actions-install.sh |  3 ++-
 configure             | 16 +++++++---------
 engines/nfs.c         |  9 ---------
 options.c             |  2 +-
 4 files changed, 10 insertions(+), 20 deletions(-)

---

Diff of recent changes:

diff --git a/ci/actions-install.sh b/ci/actions-install.sh
index ff514926..b5c4198f 100755
--- a/ci/actions-install.sh
+++ b/ci/actions-install.sh
@@ -26,6 +26,7 @@ DPKGCFG
         libibverbs-dev
         libnuma-dev
         librdmacm-dev
+	libnfs-dev
         valgrind
     )
     case "${CI_TARGET_ARCH}" in
@@ -78,7 +79,7 @@ install_macos() {
     #echo "Updating homebrew..."
     #brew update >/dev/null 2>&1
     echo "Installing packages..."
-    HOMEBREW_NO_AUTO_UPDATE=1 brew install cunit
+    HOMEBREW_NO_AUTO_UPDATE=1 brew install cunit libnfs
     pip3 install scipy six sphinx
 }
 
diff --git a/configure b/configure
index 7965f0b0..36450df8 100755
--- a/configure
+++ b/configure
@@ -170,7 +170,7 @@ disable_native="no"
 march_set="no"
 libiscsi="no"
 libnbd="no"
-libnfs="no"
+libnfs=""
 xnvme=""
 libzbc=""
 dfs=""
@@ -245,6 +245,8 @@ for opt do
   ;;
   --disable-tcmalloc) disable_tcmalloc="yes"
   ;;
+  --disable-libnfs) libnfs="no"
+  ;;
   --enable-libnfs) libnfs="yes"
   ;;
   --dynamic-libengines) dynamic_engines="yes"
@@ -282,6 +284,7 @@ if test "$show_help" = "yes" ; then
   echo "--disable-gfapi         Disable gfapi"
   echo "--enable-libhdfs        Enable hdfs support"
   echo "--enable-libnfs         Enable nfs support"
+  echo "--disable-libnfs        Disable nfs support"
   echo "--disable-lex           Disable use of lex/yacc for math"
   echo "--disable-pmem          Disable pmem based engines even if found"
   echo "--enable-lex            Enable use of lex/yacc for math"
@@ -2313,15 +2316,14 @@ print_config "DAOS File System (dfs) Engine" "$dfs"
 
 ##########################################
 # Check if we have libnfs (for userspace nfs support).
-if test "$libnfs" = "yes" ; then
+if test "$libnfs" != "no" ; then
   if $(pkg-config libnfs > /dev/null 2>&1); then
     libnfs="yes"
     libnfs_cflags=$(pkg-config --cflags libnfs)
-    # libnfs_libs=$(pkg-config --libs libnfs)
-    libnfs_libs=/usr/local/lib/libnfs.a
+    libnfs_libs=$(pkg-config --libs libnfs)
   else
     if test "$libnfs" = "yes" ; then
-      echo "libnfs" "Install libnfs"
+      feature_not_found "libnfs" "libnfs"
     fi
     libnfs="no"
   fi
@@ -3190,9 +3192,6 @@ fi
 if test "$dfs" = "yes" ; then
   output_sym "CONFIG_DFS"
 fi
-if test "$libnfs" = "yes" ; then
-  output_sym "CONFIG_NFS"
-fi
 if test "$march_set" = "no" && test "$build_native" = "yes" ; then
   output_sym "CONFIG_BUILD_NATIVE"
 fi
@@ -3234,7 +3233,6 @@ if test "$libnbd" = "yes" ; then
 fi
 if test "$libnfs" = "yes" ; then
   output_sym "CONFIG_LIBNFS"
-  echo "CONFIG_LIBNFS=m" >> $config_host_mak
   echo "LIBNFS_CFLAGS=$libnfs_cflags" >> $config_host_mak
   echo "LIBNFS_LIBS=$libnfs_libs" >> $config_host_mak
 fi
diff --git a/engines/nfs.c b/engines/nfs.c
index 21be8833..7031769d 100644
--- a/engines/nfs.c
+++ b/engines/nfs.c
@@ -279,14 +279,6 @@ static int fio_libnfs_close(struct thread_data *td, struct fio_file *f)
 	return ret;
 }
 
-/*
- * Hook for writing out outstanding data.
- */
-static int fio_libnfs_commit(struct thread_data *td) {
-	nfs_event_loop(td, true);
-	return 0;
-}
-
 struct ioengine_ops ioengine = {
 	.name		= "nfs",
 	.version	= FIO_IOOPS_VERSION,
@@ -297,7 +289,6 @@ struct ioengine_ops ioengine = {
 	.cleanup	= fio_libnfs_cleanup,
 	.open_file	= fio_libnfs_open,
 	.close_file	= fio_libnfs_close,
-	.commit     = fio_libnfs_commit,
 	.flags      = FIO_DISKLESSIO | FIO_NOEXTEND | FIO_NODISKUTIL,
 	.options	= options,
 	.option_struct_size	= sizeof(struct fio_libnfs_options),
diff --git a/options.c b/options.c
index 2b183c60..5d3daedf 100644
--- a/options.c
+++ b/options.c
@@ -2140,7 +2140,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
 			    .help = "DAOS File System (dfs) IO engine",
 			  },
 #endif
-#ifdef CONFIG_NFS
+#ifdef CONFIG_LIBNFS
 			  { .ival = "nfs",
 			    .help = "NFS IO engine",
 			  },

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-07-22 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-07-22 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 9c1c1a8d6a4f30eba9595da951d18db1685c03d8:

  engines/http: silence openssl 3.0 deprecation warnings (2022-07-19 13:21:19 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 02a36caa69f5675f7144fbeddb7a32e1d35ce0c7:

  docs: clarify write_iolog description (2022-07-21 15:18:18 -0400)

----------------------------------------------------------------
Vincent Fu (1):
      docs: clarify write_iolog description

 HOWTO.rst | 3 ++-
 fio.1     | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

---

Diff of recent changes:

diff --git a/HOWTO.rst b/HOWTO.rst
index 470777e2..104cce2d 100644
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -3049,7 +3049,8 @@ I/O replay
 
 	Write the issued I/O patterns to the specified file. See
 	:option:`read_iolog`.  Specify a separate file for each job, otherwise the
-	iologs will be interspersed and the file may be corrupt.
+        iologs will be interspersed and the file may be corrupt. This file will
+        be opened in append mode.
 
 .. option:: read_iolog=str
 
diff --git a/fio.1 b/fio.1
index 948c01f9..ce9bf3ef 100644
--- a/fio.1
+++ b/fio.1
@@ -2793,7 +2793,8 @@ of milliseconds. Defaults to 1000.
 .BI write_iolog \fR=\fPstr
 Write the issued I/O patterns to the specified file. See
 \fBread_iolog\fR. Specify a separate file for each job, otherwise the
-iologs will be interspersed and the file may be corrupt.
+iologs will be interspersed and the file may be corrupt. This file will be
+opened in append mode.
 .TP
 .BI read_iolog \fR=\fPstr
 Open an iolog with the specified filename and replay the I/O patterns it

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-07-20 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-07-20 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit d6225c1550827077c0c0f9e1b8816b4f35cd5304:

  Update README.rst to specify secure protocols where possible (2022-07-11 07:53:29 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 9c1c1a8d6a4f30eba9595da951d18db1685c03d8:

  engines/http: silence openssl 3.0 deprecation warnings (2022-07-19 13:21:19 -0600)

----------------------------------------------------------------
Giuseppe Baccini (1):
      Fixed misplaced goto in http.c

Jens Axboe (1):
      engines/http: silence openssl 3.0 deprecation warnings

Vincent Fu (1):
      Merge branch 'giubacc-misplaced-goto'

 engines/http.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

---

Diff of recent changes:

diff --git a/engines/http.c b/engines/http.c
index 696febe1..1de9e66c 100644
--- a/engines/http.c
+++ b/engines/http.c
@@ -29,6 +29,10 @@
 #include "fio.h"
 #include "../optgroup.h"
 
+/*
+ * Silence OpenSSL 3.0 deprecated function warnings
+ */
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 
 enum {
 	FIO_HTTP_WEBDAV	    = 0,
@@ -526,8 +530,8 @@ static enum fio_q_status fio_http_queue(struct thread_data *td,
 			if (status == 100 || (status >= 200 && status <= 204))
 				goto out;
 			log_err("DDIR_WRITE failed with HTTP status code %ld\n", status);
-			goto err;
 		}
+		goto err;
 	} else if (io_u->ddir == DDIR_READ) {
 		curl_easy_setopt(http->curl, CURLOPT_READDATA, NULL);
 		curl_easy_setopt(http->curl, CURLOPT_WRITEDATA, &_curl_stream);

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-07-12 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-07-12 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 30568e0ed9366a810dfcf90a903ecfbff1a6196c:

  Merge branch 'client-hist-le64' of https://github.com/tuan-hoang1/fio (2022-07-07 06:33:25 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to d6225c1550827077c0c0f9e1b8816b4f35cd5304:

  Update README.rst to specify secure protocols where possible (2022-07-11 07:53:29 -0600)

----------------------------------------------------------------
Rebecca Cran (1):
      Update README.rst to specify secure protocols where possible

 README.rst | 27 ++++++++-------------------
 1 file changed, 8 insertions(+), 19 deletions(-)

---

Diff of recent changes:

diff --git a/README.rst b/README.rst
index 527f33ab..4d736eaf 100644
--- a/README.rst
+++ b/README.rst
@@ -27,31 +27,20 @@ Source
 
 Fio resides in a git repo, the canonical place is:
 
-	git://git.kernel.dk/fio.git
-
-When inside a corporate firewall, git:// URL sometimes does not work.
-If git:// does not work, use the http protocol instead:
-
-	http://git.kernel.dk/fio.git
+	https://git.kernel.dk/cgit/fio/
 
 Snapshots are frequently generated and :file:`fio-git-*.tar.gz` include the git
 meta data as well. Other tarballs are archives of official fio releases.
 Snapshots can download from:
 
-	http://brick.kernel.dk/snaps/
+	https://brick.kernel.dk/snaps/
 
 There are also two official mirrors. Both of these are automatically synced with
 the main repository, when changes are pushed. If the main repo is down for some
 reason, either one of these is safe to use as a backup:
 
-	git://git.kernel.org/pub/scm/linux/kernel/git/axboe/fio.git
-
 	https://git.kernel.org/pub/scm/linux/kernel/git/axboe/fio.git
 
-or
-
-	git://github.com/axboe/fio.git
-
 	https://github.com/axboe/fio.git
 
 
@@ -70,7 +59,7 @@ email to majordomo@vger.kernel.org with
 
 in the body of the email. Archives can be found here:
 
-	http://www.spinics.net/lists/fio/
+	https://www.spinics.net/lists/fio/
 
 or here:
 
@@ -97,12 +86,12 @@ Binary packages
 
 Debian:
 	Starting with Debian "Squeeze", fio packages are part of the official
-	Debian repository. http://packages.debian.org/search?keywords=fio .
+	Debian repository. https://packages.debian.org/search?keywords=fio .
 
 Ubuntu:
 	Starting with Ubuntu 10.04 LTS (aka "Lucid Lynx"), fio packages are part
 	of the Ubuntu "universe" repository.
-	http://packages.ubuntu.com/search?keywords=fio .
+	https://packages.ubuntu.com/search?keywords=fio .
 
 Red Hat, Fedora, CentOS & Co:
 	Starting with Fedora 9/Extra Packages for Enterprise Linux 4, fio
@@ -176,7 +165,7 @@ directory.
 
 How to compile fio on 64-bit Windows:
 
- 1. Install Cygwin (http://www.cygwin.com/). Install **make** and all
+ 1. Install Cygwin (https://www.cygwin.com/). Install **make** and all
     packages starting with **mingw64-x86_64**. Ensure
     **mingw64-x86_64-zlib** are installed if you wish
     to enable fio's log compression functionality.
@@ -205,8 +194,8 @@ browser to :file:`./doc/output/html/index.html`.  To build manual page run
 ``make -C doc man`` and then ``man doc/output/man/fio.1``.  To see what other
 output formats are supported run ``make -C doc help``.
 
-.. _reStructuredText: http://www.sphinx-doc.org/rest.html
-.. _Sphinx: http://www.sphinx-doc.org
+.. _reStructuredText: https://www.sphinx-doc.org/rest.html
+.. _Sphinx: https://www.sphinx-doc.org
 
 
 Platforms

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-07-08 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-07-08 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 1f43cc2e7b2f3ac7461f8ea66bb9b32cb03075c3:

  Merge branch 'server-hist-le64' of https://github.com/tuan-hoang1/fio (2022-07-06 16:38:07 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 30568e0ed9366a810dfcf90a903ecfbff1a6196c:

  Merge branch 'client-hist-le64' of https://github.com/tuan-hoang1/fio (2022-07-07 06:33:25 -0600)

----------------------------------------------------------------
Jens Axboe (1):
      Merge branch 'client-hist-le64' of https://github.com/tuan-hoang1/fio

Tuan Hoang (1):
      client: only do le64_to_cpu() on io_sample_data member if iolog is histogram

 client.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

---

Diff of recent changes:

diff --git a/client.c b/client.c
index 605a3ce5..37da74bc 100644
--- a/client.c
+++ b/client.c
@@ -1702,7 +1702,8 @@ static struct cmd_iolog_pdu *convert_iolog(struct fio_net_cmd *cmd,
 			s = (struct io_sample *)((char *)s + sizeof(struct io_u_plat_entry) * i);
 
 		s->time		= le64_to_cpu(s->time);
-		s->data.val	= le64_to_cpu(s->data.val);
+		if (ret->log_type != IO_LOG_TYPE_HIST)
+			s->data.val	= le64_to_cpu(s->data.val);
 		s->__ddir	= __le32_to_cpu(s->__ddir);
 		s->bs		= le64_to_cpu(s->bs);
 		s->priority	= le16_to_cpu(s->priority);

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-07-07 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-07-07 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 1eb5ca76ee17ff80dd06a0c2d22498ab720ec76f:

  configure: revert NFS configure change (2022-07-05 07:19:39 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 1f43cc2e7b2f3ac7461f8ea66bb9b32cb03075c3:

  Merge branch 'server-hist-le64' of https://github.com/tuan-hoang1/fio (2022-07-06 16:38:07 -0600)

----------------------------------------------------------------
Jens Axboe (1):
      Merge branch 'server-hist-le64' of https://github.com/tuan-hoang1/fio

Tuan Hoang (1):
      server: only do cpu_to_le64() on io_sample_data member if iolog is histogram

 server.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

---

Diff of recent changes:

diff --git a/server.c b/server.c
index 4c71bd44..b453be5f 100644
--- a/server.c
+++ b/server.c
@@ -2284,7 +2284,8 @@ int fio_send_iolog(struct thread_data *td, struct io_log *log, const char *name)
 			struct io_sample *s = get_sample(log, cur_log, i);
 
 			s->time		= cpu_to_le64(s->time);
-			s->data.val	= cpu_to_le64(s->data.val);
+			if (log->log_type != IO_LOG_TYPE_HIST)
+				s->data.val	= cpu_to_le64(s->data.val);
 			s->__ddir	= __cpu_to_le32(s->__ddir);
 			s->bs		= cpu_to_le64(s->bs);
 

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-07-06 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-07-06 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit dc4729e3ef6a9116d7cd30e96e4f5863883e5bd7:

  hash: cleanups (2022-07-01 15:03:39 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 1eb5ca76ee17ff80dd06a0c2d22498ab720ec76f:

  configure: revert NFS configure change (2022-07-05 07:19:39 -0600)

----------------------------------------------------------------
Jens Axboe (1):
      configure: revert NFS configure change

 configure | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

---

Diff of recent changes:

diff --git a/configure b/configure
index 04a1d0e2..7965f0b0 100755
--- a/configure
+++ b/configure
@@ -245,7 +245,7 @@ for opt do
   ;;
   --disable-tcmalloc) disable_tcmalloc="yes"
   ;;
-  --disable-nfs) disable_nfs="yes"
+  --enable-libnfs) libnfs="yes"
   ;;
   --dynamic-libengines) dynamic_engines="yes"
   ;;
@@ -279,7 +279,6 @@ if test "$show_help" = "yes" ; then
   echo "--disable-rados         Disable Rados support even if found"
   echo "--disable-rbd           Disable Rados Block Device even if found"
   echo "--disable-http          Disable HTTP support even if found"
-  echo "--disable-nfs           Disable userspace NFS support even if found"
   echo "--disable-gfapi         Disable gfapi"
   echo "--enable-libhdfs        Enable hdfs support"
   echo "--enable-libnfs         Enable nfs support"
@@ -2314,15 +2313,17 @@ print_config "DAOS File System (dfs) Engine" "$dfs"
 
 ##########################################
 # Check if we have libnfs (for userspace nfs support).
-if test "$disable_nfs" != "yes"; then
+if test "$libnfs" = "yes" ; then
   if $(pkg-config libnfs > /dev/null 2>&1); then
     libnfs="yes"
     libnfs_cflags=$(pkg-config --cflags libnfs)
-    libnfs_libs=$(pkg-config --libs libnfs)
+    # libnfs_libs=$(pkg-config --libs libnfs)
+    libnfs_libs=/usr/local/lib/libnfs.a
   else
     if test "$libnfs" = "yes" ; then
       echo "libnfs" "Install libnfs"
     fi
+    libnfs="no"
   fi
 fi
 print_config "NFS engine" "$libnfs"

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-07-02 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-07-02 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 660879102e32a0ed3d3225afaebcc0d46625a4a6:

  Merge branch 'master' of https://github.com/bvanassche/fio (2022-06-23 08:20:22 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to dc4729e3ef6a9116d7cd30e96e4f5863883e5bd7:

  hash: cleanups (2022-07-01 15:03:39 -0600)

----------------------------------------------------------------
Georg Sauthoff (1):
      Simplify and optimize __fill_random_buf

Jens Axboe (3):
      Merge branch 'fill-random-smaller' of https://github.com/gsauthof/fio
      lib/rand: improve __fill_random_buf()
      hash: cleanups

 engines/rdma.c |  2 +-
 hash.h         | 26 --------------------------
 lib/rand.c     | 30 +++++++++---------------------
 3 files changed, 10 insertions(+), 48 deletions(-)

---

Diff of recent changes:

diff --git a/engines/rdma.c b/engines/rdma.c
index e3bb2567..fcb41068 100644
--- a/engines/rdma.c
+++ b/engines/rdma.c
@@ -1389,7 +1389,7 @@ static int fio_rdmaio_setup(struct thread_data *td)
 		rd = malloc(sizeof(*rd));
 
 		memset(rd, 0, sizeof(*rd));
-		init_rand_seed(&rd->rand_state, (unsigned int) GOLDEN_RATIO_PRIME, 0);
+		init_rand_seed(&rd->rand_state, (unsigned int) GOLDEN_RATIO_64, 0);
 		td->io_ops_data = rd;
 	}
 
diff --git a/hash.h b/hash.h
index f7596a56..51f0706e 100644
--- a/hash.h
+++ b/hash.h
@@ -9,32 +9,6 @@
    (C) 2002 William Lee Irwin III, IBM */
 
 /*
- * Knuth recommends primes in approximately golden ratio to the maximum
- * integer representable by a machine word for multiplicative hashing.
- * Chuck Lever verified the effectiveness of this technique:
- * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf
- *
- * These primes are chosen to be bit-sparse, that is operations on
- * them can use shifts and additions instead of multiplications for
- * machines where multiplications are slow.
- */
-
-#if BITS_PER_LONG == 32
-/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
-#define GOLDEN_RATIO_PRIME 0x9e370001UL
-#elif BITS_PER_LONG == 64
-/*  2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */
-#define GOLDEN_RATIO_PRIME 0x9e37fffffffc0001UL
-#else
-#error Define GOLDEN_RATIO_PRIME for your wordsize.
-#endif
-
-/*
- * The above primes are actively bad for hashing, since they are
- * too sparse. The 32-bit one is mostly ok, the 64-bit one causes
- * real problems. Besides, the "prime" part is pointless for the
- * multiplicative hash.
- *
  * Although a random odd number will do, it turns out that the golden
  * ratio phi = (sqrt(5)-1)/2, or its negative, has particularly nice
  * properties.
diff --git a/lib/rand.c b/lib/rand.c
index 6e893e80..1e669116 100644
--- a/lib/rand.c
+++ b/lib/rand.c
@@ -97,29 +97,17 @@ void init_rand_seed(struct frand_state *state, uint64_t seed, bool use64)
 
 void __fill_random_buf(void *buf, unsigned int len, uint64_t seed)
 {
-	void *ptr = buf;
+	uint64_t *b = buf;
+	uint64_t *e = b  + len / sizeof(*b);
+	unsigned int rest = len % sizeof(*b);
 
-	while (len) {
-		int this_len;
-
-		if (len >= sizeof(int64_t)) {
-			*((int64_t *) ptr) = seed;
-			this_len = sizeof(int64_t);
-		} else if (len >= sizeof(int32_t)) {
-			*((int32_t *) ptr) = seed;
-			this_len = sizeof(int32_t);
-		} else if (len >= sizeof(int16_t)) {
-			*((int16_t *) ptr) = seed;
-			this_len = sizeof(int16_t);
-		} else {
-			*((int8_t *) ptr) = seed;
-			this_len = sizeof(int8_t);
-		}
-		ptr += this_len;
-		len -= this_len;
-		seed *= GOLDEN_RATIO_PRIME;
-		seed >>= 3;
+	for (; b != e; ++b) {
+		*b = seed;
+		seed = __hash_u64(seed);
 	}
+
+	if (fio_unlikely(rest))
+		__builtin_memcpy(e, &seed, rest);
 }
 
 uint64_t fill_random_buf(struct frand_state *fs, void *buf,

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-06-24 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-06-24 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 6aaebfbe7269f95164ac83a04505869f96f5f83a:

  configure: add option to disable xnvme build (2022-06-22 11:45:32 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 660879102e32a0ed3d3225afaebcc0d46625a4a6:

  Merge branch 'master' of https://github.com/bvanassche/fio (2022-06-23 08:20:22 -0600)

----------------------------------------------------------------
Bart Van Assche (2):
      ci/travis-*: Fix shellcheck warnings
      ci: Verify the Android build

Jens Axboe (1):
      Merge branch 'master' of https://github.com/bvanassche/fio

 .github/workflows/ci.yml     |  5 +++++
 ci/actions-build.sh          | 19 +++++++++++++++++--
 ci/actions-full-test.sh      |  2 ++
 ci/actions-install.sh        |  7 +++++++
 ci/actions-smoke-test.sh     |  2 ++
 ci/travis-install-librpma.sh |  6 +++---
 ci/travis-install-pmdk.sh    |  9 +++++----
 7 files changed, 41 insertions(+), 9 deletions(-)

---

Diff of recent changes:

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index cd8ce142..650366b2 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -15,6 +15,7 @@ jobs:
         - linux-clang
         - macos
         - linux-i686-gcc
+        - android
         include:
         - build: linux-gcc
           os: ubuntu-20.04
@@ -27,8 +28,12 @@ jobs:
         - build: linux-i686-gcc
           os: ubuntu-20.04
           arch: i686
+        - build: android
+          os: ubuntu-20.04
+          arch: aarch64-linux-android32
 
     env:
+      CI_TARGET_BUILD: ${{ matrix.build }}
       CI_TARGET_ARCH: ${{ matrix.arch }}
       CC: ${{ matrix.cc }}
 
diff --git a/ci/actions-build.sh b/ci/actions-build.sh
index 74a6fdcb..2b3de8e3 100755
--- a/ci/actions-build.sh
+++ b/ci/actions-build.sh
@@ -11,8 +11,23 @@ main() {
     local configure_flags=()
 
     set_ci_target_os
-    case "${CI_TARGET_OS}" in
-        "linux")
+    case "${CI_TARGET_BUILD}/${CI_TARGET_OS}" in
+        android/*)
+            export UNAME=Android
+            if [ -z "${CI_TARGET_ARCH}" ]; then
+                echo "Error: CI_TARGET_ARCH has not been set"
+                return 1
+            fi
+            NDK=$PWD/android-ndk-r24/toolchains/llvm/prebuilt/linux-x86_64/bin
+            export PATH="${NDK}:${PATH}"
+            export LIBS="-landroid"
+            CC=${NDK}/${CI_TARGET_ARCH}-clang
+            if [ ! -e "${CC}" ]; then
+                echo "Error: could not find ${CC}"
+                return 1
+            fi
+            ;;
+        */linux)
             case "${CI_TARGET_ARCH}" in
                 "i686")
                     extra_cflags="${extra_cflags} -m32"
diff --git a/ci/actions-full-test.sh b/ci/actions-full-test.sh
index 8282002f..d1675f6e 100755
--- a/ci/actions-full-test.sh
+++ b/ci/actions-full-test.sh
@@ -3,6 +3,8 @@
 set -eu
 
 main() {
+    [ "${CI_TARGET_BUILD}" = android ] && return 0
+
     echo "Running long running tests..."
     export PYTHONUNBUFFERED="TRUE"
     if [[ "${CI_TARGET_ARCH}" == "arm64" ]]; then
diff --git a/ci/actions-install.sh b/ci/actions-install.sh
index 0e472717..ff514926 100755
--- a/ci/actions-install.sh
+++ b/ci/actions-install.sh
@@ -83,6 +83,13 @@ install_macos() {
 }
 
 main() {
+    if [ "${CI_TARGET_BUILD}" = "android" ]; then
+	echo "Installing Android NDK..."
+	wget --quiet https://dl.google.com/android/repository/android-ndk-r24-linux.zip
+	unzip -q android-ndk-r24-linux.zip
+	return 0
+    fi
+
     set_ci_target_os
 
     install_function="install_${CI_TARGET_OS}"
diff --git a/ci/actions-smoke-test.sh b/ci/actions-smoke-test.sh
index c129c89f..3196f6a1 100755
--- a/ci/actions-smoke-test.sh
+++ b/ci/actions-smoke-test.sh
@@ -3,6 +3,8 @@
 set -eu
 
 main() {
+    [ "${CI_TARGET_BUILD}" = "android" ] && return 0
+
     echo "Running smoke tests..."
     make test
 }
diff --git a/ci/travis-install-librpma.sh b/ci/travis-install-librpma.sh
index b127f3f5..4e5ed21d 100755
--- a/ci/travis-install-librpma.sh
+++ b/ci/travis-install-librpma.sh
@@ -16,7 +16,7 @@ cmake .. -DCMAKE_BUILD_TYPE=Release \
 	-DBUILD_DOC=OFF \
 	-DBUILD_EXAMPLES=OFF \
 	-DBUILD_TESTS=OFF
-make -j$(nproc)
-sudo make -j$(nproc) install
-cd $WORKDIR
+make -j"$(nproc)"
+sudo make -j"$(nproc)" install
+cd "$WORKDIR"
 rm -rf $ZIP_FILE rpma-${LIBRPMA_VERSION}
diff --git a/ci/travis-install-pmdk.sh b/ci/travis-install-pmdk.sh
index 3b0b5bbc..7bde9fd0 100755
--- a/ci/travis-install-pmdk.sh
+++ b/ci/travis-install-pmdk.sh
@@ -12,7 +12,8 @@ WORKDIR=$(pwd)
 #    /bin/sh: 1: clang: not found
 # if CC is not set to the full path of clang.
 #
-export CC=$(type -P $CC)
+CC=$(type -P "$CC")
+export CC
 
 # Install PMDK libraries, because PMDK's libpmem
 # is a dependency of the librpma fio engine.
@@ -22,7 +23,7 @@ export CC=$(type -P $CC)
 wget https://github.com/pmem/pmdk/releases/download/${PMDK_VERSION}/pmdk-${PMDK_VERSION}.tar.gz
 tar -xzf pmdk-${PMDK_VERSION}.tar.gz
 cd pmdk-${PMDK_VERSION}
-make -j$(nproc) NDCTL_ENABLE=n
-sudo make -j$(nproc) install prefix=/usr NDCTL_ENABLE=n
-cd $WORKDIR
+make -j"$(nproc)" NDCTL_ENABLE=n
+sudo make -j"$(nproc)" install prefix=/usr NDCTL_ENABLE=n
+cd "$WORKDIR"
 rm -rf pmdk-${PMDK_VERSION}

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-06-23 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-06-23 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit d4bf5e6193b97c5e5490fdb93b069d149a38777c:

  gettime: fix whitespace damage (2022-06-19 12:04:19 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 6aaebfbe7269f95164ac83a04505869f96f5f83a:

  configure: add option to disable xnvme build (2022-06-22 11:45:32 -0600)

----------------------------------------------------------------
Ankit Kumar (1):
      configure: add option to disable xnvme build

 configure | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

---

Diff of recent changes:

diff --git a/configure b/configure
index 510af424..04a1d0e2 100755
--- a/configure
+++ b/configure
@@ -171,7 +171,7 @@ march_set="no"
 libiscsi="no"
 libnbd="no"
 libnfs="no"
-xnvme="no"
+xnvme=""
 libzbc=""
 dfs=""
 dynamic_engines="no"
@@ -241,7 +241,7 @@ for opt do
   ;;
   --disable-libzbc) libzbc="no"
   ;;
-  --enable-xnvme) xnvme="yes"
+  --disable-xnvme) xnvme="no"
   ;;
   --disable-tcmalloc) disable_tcmalloc="yes"
   ;;
@@ -294,7 +294,7 @@ if test "$show_help" = "yes" ; then
   echo "--with-ime=             Install path for DDN's Infinite Memory Engine"
   echo "--enable-libiscsi       Enable iscsi support"
   echo "--enable-libnbd         Enable libnbd (NBD engine) support"
-  echo "--enable-xnvme          Enable xnvme support"
+  echo "--disable-xnvme         Disable xnvme support even if found"
   echo "--disable-libzbc        Disable libzbc even if found"
   echo "--disable-tcmalloc      Disable tcmalloc support"
   echo "--dynamic-libengines    Lib-based ioengines as dynamic libraries"
@@ -2619,7 +2619,7 @@ fi
 
 ##########################################
 # Check if we have xnvme
-if test "$xnvme" != "yes" ; then
+if test "$xnvme" != "no" ; then
   if check_min_lib_version xnvme 0.2.0; then
     xnvme="yes"
     xnvme_cflags=$(pkg-config --cflags xnvme)

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-06-20 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-06-20 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit e4d384755e4831cf5bbaa97e0c5b79a3598efbc4:

  Merge branch 'master' of https://github.com/useche/fio (2022-06-15 18:38:41 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to d4bf5e6193b97c5e5490fdb93b069d149a38777c:

  gettime: fix whitespace damage (2022-06-19 12:04:19 -0600)

----------------------------------------------------------------
Jens Axboe (1):
      gettime: fix whitespace damage

 gettime.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

---

Diff of recent changes:

diff --git a/gettime.c b/gettime.c
index 099e9d9f..14462420 100644
--- a/gettime.c
+++ b/gettime.c
@@ -431,22 +431,22 @@ void fio_clock_init(void)
 
 uint64_t ntime_since(const struct timespec *s, const struct timespec *e)
 {
-       int64_t sec, nsec;
+	int64_t sec, nsec;
 
-       sec = e->tv_sec - s->tv_sec;
-       nsec = e->tv_nsec - s->tv_nsec;
-       if (sec > 0 && nsec < 0) {
-	       sec--;
-	       nsec += 1000000000LL;
-       }
+	sec = e->tv_sec - s->tv_sec;
+	nsec = e->tv_nsec - s->tv_nsec;
+	if (sec > 0 && nsec < 0) {
+		sec--;
+		nsec += 1000000000LL;
+	}
 
        /*
 	* time warp bug on some kernels?
 	*/
-       if (sec < 0 || (sec == 0 && nsec < 0))
-	       return 0;
+	if (sec < 0 || (sec == 0 && nsec < 0))
+		return 0;
 
-       return nsec + (sec * 1000000000LL);
+	return nsec + (sec * 1000000000LL);
 }
 
 uint64_t ntime_since_now(const struct timespec *s)

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-06-16 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-06-16 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit b5f3adf9e1e40c7bdb76a9e433aa580f7eead740:

  Merge branch 'master' of https://github.com/bvanassche/fio (2022-06-13 18:14:26 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to e4d384755e4831cf5bbaa97e0c5b79a3598efbc4:

  Merge branch 'master' of https://github.com/useche/fio (2022-06-15 18:38:41 -0600)

----------------------------------------------------------------
Jens Axboe (1):
      Merge branch 'master' of https://github.com/useche/fio

Luis Useche (1):
      Init file_cache to invalid (maj, min)

Vincent Fu (5):
      ioengines: add helper for trims with async ioengines
      ioengines: don't record issue_time if ioengines already do it
      HOWTO: improve description of latency measures
      ioengines: update last_issue if we set issue_time
      ioengines: clean up latency accounting for 3 ioengines

 HOWTO.rst               | 29 ++++++++++++++++++-----------
 blktrace.c              |  5 ++++-
 engines/io_uring.c      | 13 +++++++++++--
 engines/libaio.c        |  9 ++++++++-
 engines/librpma_apm.c   |  2 +-
 engines/librpma_fio.c   |  9 ++++++++-
 engines/librpma_gpspm.c |  2 +-
 engines/rdma.c          |  9 ++++++++-
 ioengines.c             | 44 ++++++++++++++++++++++++++------------------
 ioengines.h             |  2 ++
 10 files changed, 87 insertions(+), 37 deletions(-)

---

Diff of recent changes:

diff --git a/HOWTO.rst b/HOWTO.rst
index 28ac2b7c..470777e2 100644
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -4165,24 +4165,31 @@ writes in the example above).  In the order listed, they denote:
 **slat**
 		Submission latency (**min** being the minimum, **max** being the
 		maximum, **avg** being the average, **stdev** being the standard
-		deviation).  This is the time it took to submit the I/O.  For
-		sync I/O this row is not displayed as the slat is really the
-		completion latency (since queue/complete is one operation there).
-		This value can be in nanoseconds, microseconds or milliseconds ---
-		fio will choose the most appropriate base and print that (in the
-		example above nanoseconds was the best scale).  Note: in :option:`--minimal` mode
-		latencies are always expressed in microseconds.
+                deviation).  This is the time from when fio initialized the I/O
+                to submission.  For synchronous ioengines this includes the time
+                up until just before the ioengine's queue function is called.
+                For asynchronous ioengines this includes the time up through the
+                completion of the ioengine's queue function (and commit function
+                if it is defined). For sync I/O this row is not displayed as the
+                slat is negligible.  This value can be in nanoseconds,
+                microseconds or milliseconds --- fio will choose the most
+                appropriate base and print that (in the example above
+                nanoseconds was the best scale).  Note: in :option:`--minimal`
+                mode latencies are always expressed in microseconds.
 
 **clat**
 		Completion latency. Same names as slat, this denotes the time from
-		submission to completion of the I/O pieces. For sync I/O, clat will
-		usually be equal (or very close) to 0, as the time from submit to
-		complete is basically just CPU time (I/O has already been done, see slat
-		explanation).
+                submission to completion of the I/O pieces. For sync I/O, this
+                represents the time from when the I/O was submitted to the
+                operating system to when it was completed. For asynchronous
+                ioengines this is the time from when the ioengine's queue (and
+                commit if available) functions were completed to when the I/O's
+                completion was reaped by fio.
 
 **lat**
 		Total latency. Same names as slat and clat, this denotes the time from
 		when fio created the I/O unit to completion of the I/O operation.
+                It is the sum of submission and completion latency.
 
 **bw**
 		Bandwidth statistics based on samples. Same names as the xlat stats,
diff --git a/blktrace.c b/blktrace.c
index 619121c7..00e5f9a9 100644
--- a/blktrace.c
+++ b/blktrace.c
@@ -442,7 +442,10 @@ err:
 bool read_blktrace(struct thread_data* td)
 {
 	struct blk_io_trace t;
-	struct file_cache cache = { };
+	struct file_cache cache = {
+		.maj = ~0U,
+		.min = ~0U,
+	};
 	unsigned long ios[DDIR_RWDIR_SYNC_CNT] = { };
 	unsigned long long rw_bs[DDIR_RWDIR_CNT] = { };
 	unsigned long skipped_writes;
diff --git a/engines/io_uring.c b/engines/io_uring.c
index cceafe69..cffc7371 100644
--- a/engines/io_uring.c
+++ b/engines/io_uring.c
@@ -608,6 +608,12 @@ static void fio_ioring_queued(struct thread_data *td, int start, int nr)
 
 		start++;
 	}
+
+	/*
+	 * only used for iolog
+	 */
+	if (td->o.read_iolog_file)
+		memcpy(&td->last_issue, &now, sizeof(now));
 }
 
 static int fio_ioring_commit(struct thread_data *td)
@@ -1191,7 +1197,8 @@ static int fio_ioring_cmd_get_max_open_zones(struct thread_data *td,
 static struct ioengine_ops ioengine_uring = {
 	.name			= "io_uring",
 	.version		= FIO_IOOPS_VERSION,
-	.flags			= FIO_ASYNCIO_SYNC_TRIM | FIO_NO_OFFLOAD,
+	.flags			= FIO_ASYNCIO_SYNC_TRIM | FIO_NO_OFFLOAD |
+					FIO_ASYNCIO_SETS_ISSUE_TIME,
 	.init			= fio_ioring_init,
 	.post_init		= fio_ioring_post_init,
 	.io_u_init		= fio_ioring_io_u_init,
@@ -1211,7 +1218,9 @@ static struct ioengine_ops ioengine_uring = {
 static struct ioengine_ops ioengine_uring_cmd = {
 	.name			= "io_uring_cmd",
 	.version		= FIO_IOOPS_VERSION,
-	.flags			= FIO_ASYNCIO_SYNC_TRIM | FIO_NO_OFFLOAD | FIO_MEMALIGN | FIO_RAWIO,
+	.flags			= FIO_ASYNCIO_SYNC_TRIM | FIO_NO_OFFLOAD |
+					FIO_MEMALIGN | FIO_RAWIO |
+					FIO_ASYNCIO_SETS_ISSUE_TIME,
 	.init			= fio_ioring_init,
 	.post_init		= fio_ioring_cmd_post_init,
 	.io_u_init		= fio_ioring_io_u_init,
diff --git a/engines/libaio.c b/engines/libaio.c
index 9c278d06..33b8c12f 100644
--- a/engines/libaio.c
+++ b/engines/libaio.c
@@ -368,6 +368,12 @@ static void fio_libaio_queued(struct thread_data *td, struct io_u **io_us,
 		memcpy(&io_u->issue_time, &now, sizeof(now));
 		io_u_queued(td, io_u);
 	}
+
+	/*
+	 * only used for iolog
+	 */
+	if (td->o.read_iolog_file)
+		memcpy(&td->last_issue, &now, sizeof(now));
 }
 
 static int fio_libaio_commit(struct thread_data *td)
@@ -511,7 +517,8 @@ static int fio_libaio_init(struct thread_data *td)
 FIO_STATIC struct ioengine_ops ioengine = {
 	.name			= "libaio",
 	.version		= FIO_IOOPS_VERSION,
-	.flags			= FIO_ASYNCIO_SYNC_TRIM,
+	.flags			= FIO_ASYNCIO_SYNC_TRIM |
+					FIO_ASYNCIO_SETS_ISSUE_TIME,
 	.init			= fio_libaio_init,
 	.post_init		= fio_libaio_post_init,
 	.prep			= fio_libaio_prep,
diff --git a/engines/librpma_apm.c b/engines/librpma_apm.c
index d1166ad8..896240dd 100644
--- a/engines/librpma_apm.c
+++ b/engines/librpma_apm.c
@@ -208,7 +208,7 @@ FIO_STATIC struct ioengine_ops ioengine_client = {
 	.errdetails		= librpma_fio_client_errdetails,
 	.close_file		= librpma_fio_file_nop,
 	.cleanup		= client_cleanup,
-	.flags			= FIO_DISKLESSIO,
+	.flags			= FIO_DISKLESSIO | FIO_ASYNCIO_SETS_ISSUE_TIME,
 	.options		= librpma_fio_options,
 	.option_struct_size	= sizeof(struct librpma_fio_options_values),
 };
diff --git a/engines/librpma_fio.c b/engines/librpma_fio.c
index 34818904..a78a1e57 100644
--- a/engines/librpma_fio.c
+++ b/engines/librpma_fio.c
@@ -621,9 +621,16 @@ int librpma_fio_client_commit(struct thread_data *td)
 		}
 	}
 
-	if ((fill_time = fio_fill_issue_time(td)))
+	if ((fill_time = fio_fill_issue_time(td))) {
 		fio_gettime(&now, NULL);
 
+		/*
+		 * only used for iolog
+		 */
+		if (td->o.read_iolog_file)
+			memcpy(&td->last_issue, &now, sizeof(now));
+
+	}
 	/* move executed io_us from queued[] to flight[] */
 	for (i = 0; i < ccd->io_u_queued_nr; i++) {
 		struct io_u *io_u = ccd->io_us_queued[i];
diff --git a/engines/librpma_gpspm.c b/engines/librpma_gpspm.c
index 5cf97472..f00717a7 100644
--- a/engines/librpma_gpspm.c
+++ b/engines/librpma_gpspm.c
@@ -352,7 +352,7 @@ FIO_STATIC struct ioengine_ops ioengine_client = {
 	.errdetails		= librpma_fio_client_errdetails,
 	.close_file		= librpma_fio_file_nop,
 	.cleanup		= client_cleanup,
-	.flags			= FIO_DISKLESSIO,
+	.flags			= FIO_DISKLESSIO | FIO_ASYNCIO_SETS_ISSUE_TIME,
 	.options		= librpma_fio_options,
 	.option_struct_size	= sizeof(struct librpma_fio_options_values),
 };
diff --git a/engines/rdma.c b/engines/rdma.c
index 4eb86652..e3bb2567 100644
--- a/engines/rdma.c
+++ b/engines/rdma.c
@@ -832,6 +832,12 @@ static void fio_rdmaio_queued(struct thread_data *td, struct io_u **io_us,
 		memcpy(&io_u->issue_time, &now, sizeof(now));
 		io_u_queued(td, io_u);
 	}
+
+	/*
+	 * only used for iolog
+	 */
+	if (td->o.read_iolog_file)
+		memcpy(&td->last_issue, &now, sizeof(now));
 }
 
 static int fio_rdmaio_commit(struct thread_data *td)
@@ -1404,7 +1410,8 @@ FIO_STATIC struct ioengine_ops ioengine = {
 	.cleanup		= fio_rdmaio_cleanup,
 	.open_file		= fio_rdmaio_open_file,
 	.close_file		= fio_rdmaio_close_file,
-	.flags			= FIO_DISKLESSIO | FIO_UNIDIR | FIO_PIPEIO,
+	.flags			= FIO_DISKLESSIO | FIO_UNIDIR | FIO_PIPEIO |
+					FIO_ASYNCIO_SETS_ISSUE_TIME,
 	.options		= options,
 	.option_struct_size	= sizeof(struct rdmaio_options),
 };
diff --git a/ioengines.c b/ioengines.c
index 68f307e5..e2316ee4 100644
--- a/ioengines.c
+++ b/ioengines.c
@@ -24,6 +24,13 @@
 
 static FLIST_HEAD(engine_list);
 
+static inline bool async_ioengine_sync_trim(struct thread_data *td,
+					    struct io_u	*io_u)
+{
+	return td_ioengine_flagged(td, FIO_ASYNCIO_SYNC_TRIM) &&
+		io_u->ddir == DDIR_TRIM;
+}
+
 static bool check_engine_ops(struct thread_data *td, struct ioengine_ops *ops)
 {
 	if (ops->version != FIO_IOOPS_VERSION) {
@@ -350,17 +357,17 @@ enum fio_q_status td_io_queue(struct thread_data *td, struct io_u *io_u)
 	io_u->resid = 0;
 
 	if (td_ioengine_flagged(td, FIO_SYNCIO) ||
-		(td_ioengine_flagged(td, FIO_ASYNCIO_SYNC_TRIM) && 
-		io_u->ddir == DDIR_TRIM)) {
-		if (fio_fill_issue_time(td))
+		async_ioengine_sync_trim(td, io_u)) {
+		if (fio_fill_issue_time(td)) {
 			fio_gettime(&io_u->issue_time, NULL);
 
-		/*
-		 * only used for iolog
-		 */
-		if (td->o.read_iolog_file)
-			memcpy(&td->last_issue, &io_u->issue_time,
-					sizeof(io_u->issue_time));
+			/*
+			 * only used for iolog
+			 */
+			if (td->o.read_iolog_file)
+				memcpy(&td->last_issue, &io_u->issue_time,
+						sizeof(io_u->issue_time));
+		}
 	}
 
 
@@ -435,17 +442,18 @@ enum fio_q_status td_io_queue(struct thread_data *td, struct io_u *io_u)
 	}
 
 	if (!td_ioengine_flagged(td, FIO_SYNCIO) &&
-		(!td_ioengine_flagged(td, FIO_ASYNCIO_SYNC_TRIM) ||
-		 io_u->ddir != DDIR_TRIM)) {
-		if (fio_fill_issue_time(td))
+		!async_ioengine_sync_trim(td, io_u)) {
+		if (fio_fill_issue_time(td) &&
+			!td_ioengine_flagged(td, FIO_ASYNCIO_SETS_ISSUE_TIME)) {
 			fio_gettime(&io_u->issue_time, NULL);
 
-		/*
-		 * only used for iolog
-		 */
-		if (td->o.read_iolog_file)
-			memcpy(&td->last_issue, &io_u->issue_time,
-					sizeof(io_u->issue_time));
+			/*
+			 * only used for iolog
+			 */
+			if (td->o.read_iolog_file)
+				memcpy(&td->last_issue, &io_u->issue_time,
+						sizeof(io_u->issue_time));
+		}
 	}
 
 	return ret;
diff --git a/ioengines.h b/ioengines.h
index acdb0071..fafa1e48 100644
--- a/ioengines.h
+++ b/ioengines.h
@@ -83,6 +83,8 @@ enum fio_ioengine_flags {
 	FIO_ASYNCIO_SYNC_TRIM
 			= 1 << 14,	/* io engine has async ->queue except for trim */
 	FIO_NO_OFFLOAD	= 1 << 15,	/* no async offload */
+	FIO_ASYNCIO_SETS_ISSUE_TIME
+			= 1 << 16,	/* async ioengine with commit function that sets issue_time */
 };
 
 /*

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-06-14 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-06-14 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 26faead0f3c6e7608b89a51373f1455b91377fcb:

  t/zbd: skip test case #13 when max_open_zones is too small (2022-06-02 03:58:31 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to b5f3adf9e1e40c7bdb76a9e433aa580f7eead740:

  Merge branch 'master' of https://github.com/bvanassche/fio (2022-06-13 18:14:26 -0600)

----------------------------------------------------------------
Bart Van Assche (2):
      configure: Support gcc 12
      configure: Fix libzbc detection on SUSE Linux

Jens Axboe (1):
      Merge branch 'master' of https://github.com/bvanassche/fio

 configure | 37 +++++++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 14 deletions(-)

---

Diff of recent changes:

diff --git a/configure b/configure
index 8182322b..510af424 100755
--- a/configure
+++ b/configure
@@ -1128,7 +1128,8 @@ cat > $TMPC << EOF
 #include <sched.h>
 int main(int argc, char **argv)
 {
-  cpu_set_t mask;
+  cpu_set_t mask = { };
+
   return sched_setaffinity(0, sizeof(mask), &mask);
 }
 EOF
@@ -1139,7 +1140,8 @@ else
 #include <sched.h>
 int main(int argc, char **argv)
 {
-  cpu_set_t mask;
+  cpu_set_t mask = { };
+
   return sched_setaffinity(0, &mask);
 }
 EOF
@@ -1621,7 +1623,8 @@ cat > $TMPC << EOF
 #include <sched.h>
 int main(int argc, char **argv)
 {
-  struct sched_param p;
+  struct sched_param p = { };
+
   return sched_setscheduler(0, SCHED_IDLE, &p);
 }
 EOF
@@ -1743,7 +1746,9 @@ cat > $TMPC << EOF
 #include <sys/uio.h>
 int main(int argc, char **argv)
 {
-  return pwritev(0, NULL, 1, 0) + preadv(0, NULL, 1, 0);
+  struct iovec iov[1] = { };
+
+  return pwritev(0, iov, 1, 0) + preadv(0, iov, 1, 0);
 }
 EOF
 if compile_prog "" "" "pwritev"; then
@@ -1761,7 +1766,9 @@ cat > $TMPC << EOF
 #include <sys/uio.h>
 int main(int argc, char **argv)
 {
-  return pwritev2(0, NULL, 1, 0, 0) + preadv2(0, NULL, 1, 0, 0);
+  struct iovec iov[1] = { };
+
+  return pwritev2(0, iov, 1, 0, 0) + preadv2(0, iov, 1, 0, 0);
 }
 EOF
 if compile_prog "" "" "pwritev2"; then
@@ -1787,14 +1794,14 @@ cat > $TMPC << EOF
 #include <stdio.h>
 int main(int argc, char **argv)
 {
-  struct addrinfo hints;
-  struct in6_addr addr;
+  struct addrinfo hints = { };
+  struct in6_addr addr = in6addr_any;
   int ret;
 
   ret = getaddrinfo(NULL, NULL, &hints, NULL);
   freeaddrinfo(NULL);
-  printf("%s\n", gai_strerror(ret));
-  addr = in6addr_any;
+  printf("%s %d\n", gai_strerror(ret), addr.s6_addr[0]);
+
   return 0;
 }
 EOF
@@ -2155,9 +2162,7 @@ cat > $TMPC << EOF
 #include <stdlib.h>
 int main(int argc, char **argv)
 {
-  int rc;
-  rc = pmem_is_pmem(NULL, 0);
-  return 0;
+  return pmem_is_pmem(NULL, 0);
 }
 EOF
 if compile_prog "" "-lpmem" "libpmem"; then
@@ -2176,7 +2181,7 @@ if test "$libpmem" = "yes"; then
 #include <stdlib.h>
 int main(int argc, char **argv)
 {
-  pmem_memcpy(NULL, NULL, NULL, NULL);
+  pmem_memcpy(NULL, NULL, 0, 0);
   return 0;
 }
 EOF
@@ -2392,7 +2397,7 @@ int main(int argc, char **argv)
   FILE *mtab = setmntent(NULL, "r");
   struct mntent *mnt = getmntent(mtab);
   endmntent(mtab);
-  return 0;
+  return mnt != NULL;
 }
 EOF
 if compile_prog "" "" "getmntent"; then
@@ -2573,6 +2578,10 @@ int main(int argc, char **argv)
 }
 EOF
 if test "$libzbc" != "no" ; then
+  if [ -e /usr/include/libzbc/libzbc ]; then
+    # SUSE Linux.
+    CFLAGS="$CFLAGS -I/usr/include/libzbc"
+  fi
   if compile_prog "" "-lzbc" "libzbc"; then
     libzbc="yes"
     if ! check_min_lib_version libzbc 5; then

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-06-02 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-06-02 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 5ceed0be62f3ce8903d5747674f9f70f44e736d6:

  docs: update language setting for Sphinx build (2022-05-31 20:58:00 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 26faead0f3c6e7608b89a51373f1455b91377fcb:

  t/zbd: skip test case #13 when max_open_zones is too small (2022-06-02 03:58:31 -0600)

----------------------------------------------------------------
Ankit Kumar (5):
      configure: check nvme uring command support
      nvme: add nvme opcodes, structures and helper functions
      docs: document options for io_uring_cmd I/O engine
      zbd: Check for direct flag only if its block device
      engines/io_uring: Enable zone device support for io_uring_cmd I/O engine

Anuj Gupta (4):
      io_uring.h: add IORING_SETUP_SQE128 and IORING_SETUP_CQE32
      init: return error incase an invalid value is passed as option
      engines/io_uring: add new I/O engine for uring passthrough support
      examples: add 2 example job file for io_uring_cmd engine

Jens Axboe (3):
      engines/io_uring: cleanup supported case
      engines/nvme: fix 'fd' leak in error handling
      engines/nvme: ioctl return value is an int

Shin'ichiro Kawasaki (1):
      t/zbd: skip test case #13 when max_open_zones is too small

 HOWTO.rst                    |  41 +++--
 Makefile                     |   4 +-
 configure                    |  21 +++
 engines/io_uring.c           | 346 +++++++++++++++++++++++++++++++++++++++++-
 engines/nvme.c               | 347 +++++++++++++++++++++++++++++++++++++++++++
 engines/nvme.h               | 214 ++++++++++++++++++++++++++
 examples/uring-cmd-ng.fio    |  25 ++++
 examples/uring-cmd-zoned.fio |  31 ++++
 file.h                       |  12 +-
 fio.1                        |  33 +++-
 init.c                       |   9 ++
 os/linux/io_uring.h          |  45 +++++-
 t/zbd/test-zbd-support       |  23 ++-
 zbd.c                        |   4 +-
 14 files changed, 1123 insertions(+), 32 deletions(-)
 create mode 100644 engines/nvme.c
 create mode 100644 engines/nvme.h
 create mode 100644 examples/uring-cmd-ng.fio
 create mode 100644 examples/uring-cmd-zoned.fio

---

Diff of recent changes:

diff --git a/HOWTO.rst b/HOWTO.rst
index 8ab3ac4b..28ac2b7c 100644
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -1952,6 +1952,10 @@ I/O engine
 			for both direct and buffered IO.
 			This engine defines engine specific options.
 
+		**io_uring_cmd**
+			Fast Linux native asynchronous I/O for pass through commands.
+			This engine defines engine specific options.
+
 		**libaio**
 			Linux native asynchronous I/O. Note that Linux may only support
 			queued behavior with non-buffered I/O (set ``direct=1`` or
@@ -2255,22 +2259,34 @@ with the caveat that when used on the command line, they must come after the
 	values for trim IOs are ignored. This option is mutually exclusive with
 	the :option:`cmdprio_percentage` option.
 
-.. option:: fixedbufs : [io_uring]
+.. option:: fixedbufs : [io_uring] [io_uring_cmd]
+
+	If fio is asked to do direct IO, then Linux will map pages for each
+	IO call, and release them when IO is done. If this option is set, the
+	pages are pre-mapped before IO is started. This eliminates the need to
+	map and release for each IO. This is more efficient, and reduces the
+	IO latency as well.
+
+.. option:: nonvectored : [io_uring] [io_uring_cmd]
 
-    If fio is asked to do direct IO, then Linux will map pages for each
-    IO call, and release them when IO is done. If this option is set, the
-    pages are pre-mapped before IO is started. This eliminates the need to
-    map and release for each IO. This is more efficient, and reduces the
-    IO latency as well.
+	With this option, fio will use non-vectored read/write commands, where
+	address must contain the address directly. Default is -1.
 
-.. option:: registerfiles : [io_uring]
+.. option:: force_async=int : [io_uring] [io_uring_cmd]
+
+	Normal operation for io_uring is to try and issue an sqe as
+	non-blocking first, and if that fails, execute it in an async manner.
+	With this option set to N, then every N request fio will ask sqe to
+	be issued in an async manner. Default is 0.
+
+.. option:: registerfiles : [io_uring] [io_uring_cmd]
 
 	With this option, fio registers the set of files being used with the
 	kernel. This avoids the overhead of managing file counts in the kernel,
 	making the submission and completion part more lightweight. Required
 	for the below :option:`sqthread_poll` option.
 
-.. option:: sqthread_poll : [io_uring] [xnvme]
+.. option:: sqthread_poll : [io_uring] [io_uring_cmd] [xnvme]
 
 	Normally fio will submit IO by issuing a system call to notify the
 	kernel of available items in the SQ ring. If this option is set, the
@@ -2278,14 +2294,19 @@ with the caveat that when used on the command line, they must come after the
 	This frees up cycles for fio, at the cost of using more CPU in the
 	system.
 
-.. option:: sqthread_poll_cpu : [io_uring]
+.. option:: sqthread_poll_cpu : [io_uring] [io_uring_cmd]
 
 	When :option:`sqthread_poll` is set, this option provides a way to
 	define which CPU should be used for the polling thread.
 
+.. option:: cmd_type=str : [io_uring_cmd]
+
+	Specifies the type of uring passthrough command to be used. Supported
+	value is nvme. Default is nvme.
+
 .. option:: hipri
 
-   [io_uring], [xnvme]
+   [io_uring] [io_uring_cmd] [xnvme]
 
         If this option is set, fio will attempt to use polled IO completions.
         Normal IO completions generate interrupts to signal the completion of
diff --git a/Makefile b/Makefile
index ed66305a..188a74d7 100644
--- a/Makefile
+++ b/Makefile
@@ -231,7 +231,7 @@ ifdef CONFIG_LIBXNVME
 endif
 ifeq ($(CONFIG_TARGET_OS), Linux)
   SOURCE += diskutil.c fifo.c blktrace.c cgroup.c trim.c engines/sg.c \
-		oslib/linux-dev-lookup.c engines/io_uring.c
+		oslib/linux-dev-lookup.c engines/io_uring.c engines/nvme.c
   cmdprio_SRCS = engines/cmdprio.c
 ifdef CONFIG_HAS_BLKZONED
   SOURCE += oslib/linux-blkzoned.c
@@ -241,7 +241,7 @@ endif
 endif
 ifeq ($(CONFIG_TARGET_OS), Android)
   SOURCE += diskutil.c fifo.c blktrace.c cgroup.c trim.c profiles/tiobench.c \
-		oslib/linux-dev-lookup.c engines/io_uring.c
+		oslib/linux-dev-lookup.c engines/io_uring.c engines/nvme.c
   cmdprio_SRCS = engines/cmdprio.c
 ifdef CONFIG_HAS_BLKZONED
   SOURCE += oslib/linux-blkzoned.c
diff --git a/configure b/configure
index 4ee536a0..8182322b 100755
--- a/configure
+++ b/configure
@@ -2587,6 +2587,27 @@ if test "$libzbc" != "no" ; then
 fi
 print_config "libzbc engine" "$libzbc"
 
+if test "$targetos" = "Linux" ; then
+##########################################
+# Check NVME_URING_CMD support
+cat > $TMPC << EOF
+#include <linux/nvme_ioctl.h>
+int main(void)
+{
+  struct nvme_uring_cmd *cmd;
+
+  return sizeof(struct nvme_uring_cmd);
+}
+EOF
+if compile_prog "" "" "nvme uring cmd"; then
+  output_sym "CONFIG_NVME_URING_CMD"
+  nvme_uring_cmd="yes"
+else
+  nvme_uring_cmd="no"
+fi
+print_config "NVMe uring command support" "$nvme_uring_cmd"
+fi
+
 ##########################################
 # Check if we have xnvme
 if test "$xnvme" != "yes" ; then
diff --git a/engines/io_uring.c b/engines/io_uring.c
index 1e15647e..cceafe69 100644
--- a/engines/io_uring.c
+++ b/engines/io_uring.c
@@ -24,6 +24,13 @@
 #include "../lib/types.h"
 #include "../os/linux/io_uring.h"
 #include "cmdprio.h"
+#include "nvme.h"
+
+#include <sys/stat.h>
+
+enum uring_cmd_type {
+	FIO_URING_CMD_NVME = 1,
+};
 
 struct io_sq_ring {
 	unsigned *head;
@@ -85,6 +92,7 @@ struct ioring_options {
 	unsigned int uncached;
 	unsigned int nowait;
 	unsigned int force_async;
+	enum uring_cmd_type cmd_type;
 };
 
 static const int ddir_to_op[2][2] = {
@@ -270,6 +278,22 @@ static struct fio_option options[] = {
 		.category = FIO_OPT_C_ENGINE,
 		.group	= FIO_OPT_G_IOURING,
 	},
+	{
+		.name	= "cmd_type",
+		.lname	= "Uring cmd type",
+		.type	= FIO_OPT_STR,
+		.off1	= offsetof(struct ioring_options, cmd_type),
+		.help	= "Specify uring-cmd type",
+		.def	= "nvme",
+		.posval = {
+			  { .ival = "nvme",
+			    .oval = FIO_URING_CMD_NVME,
+			    .help = "Issue nvme-uring-cmd",
+			  },
+		},
+		.category = FIO_OPT_C_ENGINE,
+		.group	= FIO_OPT_G_IOURING,
+	},
 	{
 		.name	= NULL,
 	},
@@ -373,6 +397,48 @@ static int fio_ioring_prep(struct thread_data *td, struct io_u *io_u)
 	return 0;
 }
 
+static int fio_ioring_cmd_prep(struct thread_data *td, struct io_u *io_u)
+{
+	struct ioring_data *ld = td->io_ops_data;
+	struct ioring_options *o = td->eo;
+	struct fio_file *f = io_u->file;
+	struct nvme_uring_cmd *cmd;
+	struct io_uring_sqe *sqe;
+
+	/* only supports nvme_uring_cmd */
+	if (o->cmd_type != FIO_URING_CMD_NVME)
+		return -EINVAL;
+
+	sqe = &ld->sqes[(io_u->index) << 1];
+
+	if (o->registerfiles) {
+		sqe->fd = f->engine_pos;
+		sqe->flags = IOSQE_FIXED_FILE;
+	} else {
+		sqe->fd = f->fd;
+	}
+	sqe->rw_flags = 0;
+	if (!td->o.odirect && o->uncached)
+		sqe->rw_flags |= RWF_UNCACHED;
+	if (o->nowait)
+		sqe->rw_flags |= RWF_NOWAIT;
+
+	sqe->opcode = IORING_OP_URING_CMD;
+	sqe->user_data = (unsigned long) io_u;
+	if (o->nonvectored)
+		sqe->cmd_op = NVME_URING_CMD_IO;
+	else
+		sqe->cmd_op = NVME_URING_CMD_IO_VEC;
+	if (o->force_async && ++ld->prepped == o->force_async) {
+		ld->prepped = 0;
+		sqe->flags |= IOSQE_ASYNC;
+	}
+
+	cmd = (struct nvme_uring_cmd *)sqe->cmd;
+	return fio_nvme_uring_cmd_prep(cmd, io_u,
+			o->nonvectored ? NULL : &ld->iovecs[io_u->index]);
+}
+
 static struct io_u *fio_ioring_event(struct thread_data *td, int event)
 {
 	struct ioring_data *ld = td->io_ops_data;
@@ -396,6 +462,29 @@ static struct io_u *fio_ioring_event(struct thread_data *td, int event)
 	return io_u;
 }
 
+static struct io_u *fio_ioring_cmd_event(struct thread_data *td, int event)
+{
+	struct ioring_data *ld = td->io_ops_data;
+	struct ioring_options *o = td->eo;
+	struct io_uring_cqe *cqe;
+	struct io_u *io_u;
+	unsigned index;
+
+	index = (event + ld->cq_ring_off) & ld->cq_ring_mask;
+	if (o->cmd_type == FIO_URING_CMD_NVME)
+		index <<= 1;
+
+	cqe = &ld->cq_ring.cqes[index];
+	io_u = (struct io_u *) (uintptr_t) cqe->user_data;
+
+	if (cqe->res != 0)
+		io_u->error = -cqe->res;
+	else
+		io_u->error = 0;
+
+	return io_u;
+}
+
 static int fio_ioring_cqring_reap(struct thread_data *td, unsigned int events,
 				   unsigned int max)
 {
@@ -622,14 +711,22 @@ static int fio_ioring_mmap(struct ioring_data *ld, struct io_uring_params *p)
 	sring->array = ptr + p->sq_off.array;
 	ld->sq_ring_mask = *sring->ring_mask;
 
-	ld->mmap[1].len = p->sq_entries * sizeof(struct io_uring_sqe);
+	if (p->flags & IORING_SETUP_SQE128)
+		ld->mmap[1].len = 2 * p->sq_entries * sizeof(struct io_uring_sqe);
+	else
+		ld->mmap[1].len = p->sq_entries * sizeof(struct io_uring_sqe);
 	ld->sqes = mmap(0, ld->mmap[1].len, PROT_READ | PROT_WRITE,
 				MAP_SHARED | MAP_POPULATE, ld->ring_fd,
 				IORING_OFF_SQES);
 	ld->mmap[1].ptr = ld->sqes;
 
-	ld->mmap[2].len = p->cq_off.cqes +
-				p->cq_entries * sizeof(struct io_uring_cqe);
+	if (p->flags & IORING_SETUP_CQE32) {
+		ld->mmap[2].len = p->cq_off.cqes +
+					2 * p->cq_entries * sizeof(struct io_uring_cqe);
+	} else {
+		ld->mmap[2].len = p->cq_off.cqes +
+					p->cq_entries * sizeof(struct io_uring_cqe);
+	}
 	ptr = mmap(0, ld->mmap[2].len, PROT_READ | PROT_WRITE,
 			MAP_SHARED | MAP_POPULATE, ld->ring_fd,
 			IORING_OFF_CQ_RING);
@@ -728,6 +825,61 @@ retry:
 	return fio_ioring_mmap(ld, &p);
 }
 
+static int fio_ioring_cmd_queue_init(struct thread_data *td)
+{
+	struct ioring_data *ld = td->io_ops_data;
+	struct ioring_options *o = td->eo;
+	int depth = td->o.iodepth;
+	struct io_uring_params p;
+	int ret;
+
+	memset(&p, 0, sizeof(p));
+
+	if (o->hipri)
+		p.flags |= IORING_SETUP_IOPOLL;
+	if (o->sqpoll_thread) {
+		p.flags |= IORING_SETUP_SQPOLL;
+		if (o->sqpoll_set) {
+			p.flags |= IORING_SETUP_SQ_AFF;
+			p.sq_thread_cpu = o->sqpoll_cpu;
+		}
+	}
+	if (o->cmd_type == FIO_URING_CMD_NVME) {
+		p.flags |= IORING_SETUP_SQE128;
+		p.flags |= IORING_SETUP_CQE32;
+	}
+
+	/*
+	 * Clamp CQ ring size at our SQ ring size, we don't need more entries
+	 * than that.
+	 */
+	p.flags |= IORING_SETUP_CQSIZE;
+	p.cq_entries = depth;
+
+retry:
+	ret = syscall(__NR_io_uring_setup, depth, &p);
+	if (ret < 0) {
+		if (errno == EINVAL && p.flags & IORING_SETUP_CQSIZE) {
+			p.flags &= ~IORING_SETUP_CQSIZE;
+			goto retry;
+		}
+		return ret;
+	}
+
+	ld->ring_fd = ret;
+
+	fio_ioring_probe(td);
+
+	if (o->fixedbufs) {
+		ret = syscall(__NR_io_uring_register, ld->ring_fd,
+				IORING_REGISTER_BUFFERS, ld->iovecs, depth);
+		if (ret < 0)
+			return ret;
+	}
+
+	return fio_ioring_mmap(ld, &p);
+}
+
 static int fio_ioring_register_files(struct thread_data *td)
 {
 	struct ioring_data *ld = td->io_ops_data;
@@ -811,6 +963,52 @@ static int fio_ioring_post_init(struct thread_data *td)
 	return 0;
 }
 
+static int fio_ioring_cmd_post_init(struct thread_data *td)
+{
+	struct ioring_data *ld = td->io_ops_data;
+	struct ioring_options *o = td->eo;
+	struct io_u *io_u;
+	int err, i;
+
+	for (i = 0; i < td->o.iodepth; i++) {
+		struct iovec *iov = &ld->iovecs[i];
+
+		io_u = ld->io_u_index[i];
+		iov->iov_base = io_u->buf;
+		iov->iov_len = td_max_bs(td);
+	}
+
+	err = fio_ioring_cmd_queue_init(td);
+	if (err) {
+		int init_err = errno;
+
+		td_verror(td, init_err, "io_queue_init");
+		return 1;
+	}
+
+	for (i = 0; i < td->o.iodepth; i++) {
+		struct io_uring_sqe *sqe;
+
+		if (o->cmd_type == FIO_URING_CMD_NVME) {
+			sqe = &ld->sqes[i << 1];
+			memset(sqe, 0, 2 * sizeof(*sqe));
+		} else {
+			sqe = &ld->sqes[i];
+			memset(sqe, 0, sizeof(*sqe));
+		}
+	}
+
+	if (o->registerfiles) {
+		err = fio_ioring_register_files(td);
+		if (err) {
+			td_verror(td, errno, "ioring_register_files");
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
 static int fio_ioring_init(struct thread_data *td)
 {
 	struct ioring_options *o = td->eo;
@@ -868,6 +1066,38 @@ static int fio_ioring_open_file(struct thread_data *td, struct fio_file *f)
 	return 0;
 }
 
+static int fio_ioring_cmd_open_file(struct thread_data *td, struct fio_file *f)
+{
+	struct ioring_data *ld = td->io_ops_data;
+	struct ioring_options *o = td->eo;
+
+	if (o->cmd_type == FIO_URING_CMD_NVME) {
+		struct nvme_data *data = NULL;
+		unsigned int nsid, lba_size = 0;
+		unsigned long long nlba = 0;
+		int ret;
+
+		/* Store the namespace-id and lba size. */
+		data = FILE_ENG_DATA(f);
+		if (data == NULL) {
+			ret = fio_nvme_get_info(f, &nsid, &lba_size, &nlba);
+			if (ret)
+				return ret;
+
+			data = calloc(1, sizeof(struct nvme_data));
+			data->nsid = nsid;
+			data->lba_shift = ilog2(lba_size);
+
+			FILE_SET_ENG_DATA(f, data);
+		}
+	}
+	if (!ld || !o->registerfiles)
+		return generic_open_file(td, f);
+
+	f->fd = ld->fds[f->engine_pos];
+	return 0;
+}
+
 static int fio_ioring_close_file(struct thread_data *td, struct fio_file *f)
 {
 	struct ioring_data *ld = td->io_ops_data;
@@ -880,7 +1110,85 @@ static int fio_ioring_close_file(struct thread_data *td, struct fio_file *f)
 	return 0;
 }
 
-static struct ioengine_ops ioengine = {
+static int fio_ioring_cmd_close_file(struct thread_data *td,
+				     struct fio_file *f)
+{
+	struct ioring_data *ld = td->io_ops_data;
+	struct ioring_options *o = td->eo;
+
+	if (o->cmd_type == FIO_URING_CMD_NVME) {
+		struct nvme_data *data = FILE_ENG_DATA(f);
+
+		FILE_SET_ENG_DATA(f, NULL);
+		free(data);
+	}
+	if (!ld || !o->registerfiles)
+		return generic_close_file(td, f);
+
+	f->fd = -1;
+	return 0;
+}
+
+static int fio_ioring_cmd_get_file_size(struct thread_data *td,
+					struct fio_file *f)
+{
+	struct ioring_options *o = td->eo;
+
+	if (fio_file_size_known(f))
+		return 0;
+
+	if (o->cmd_type == FIO_URING_CMD_NVME) {
+		struct nvme_data *data = NULL;
+		unsigned int nsid, lba_size = 0;
+		unsigned long long nlba = 0;
+		int ret;
+
+		ret = fio_nvme_get_info(f, &nsid, &lba_size, &nlba);
+		if (ret)
+			return ret;
+
+		data = calloc(1, sizeof(struct nvme_data));
+		data->nsid = nsid;
+		data->lba_shift = ilog2(lba_size);
+
+		f->real_file_size = lba_size * nlba;
+		fio_file_set_size_known(f);
+
+		FILE_SET_ENG_DATA(f, data);
+		return 0;
+	}
+	return generic_get_file_size(td, f);
+}
+
+static int fio_ioring_cmd_get_zoned_model(struct thread_data *td,
+					  struct fio_file *f,
+					  enum zbd_zoned_model *model)
+{
+	return fio_nvme_get_zoned_model(td, f, model);
+}
+
+static int fio_ioring_cmd_report_zones(struct thread_data *td,
+				       struct fio_file *f, uint64_t offset,
+				       struct zbd_zone *zbdz,
+				       unsigned int nr_zones)
+{
+	return fio_nvme_report_zones(td, f, offset, zbdz, nr_zones);
+}
+
+static int fio_ioring_cmd_reset_wp(struct thread_data *td, struct fio_file *f,
+				   uint64_t offset, uint64_t length)
+{
+	return fio_nvme_reset_wp(td, f, offset, length);
+}
+
+static int fio_ioring_cmd_get_max_open_zones(struct thread_data *td,
+					     struct fio_file *f,
+					     unsigned int *max_open_zones)
+{
+	return fio_nvme_get_max_open_zones(td, f, max_open_zones);
+}
+
+static struct ioengine_ops ioengine_uring = {
 	.name			= "io_uring",
 	.version		= FIO_IOOPS_VERSION,
 	.flags			= FIO_ASYNCIO_SYNC_TRIM | FIO_NO_OFFLOAD,
@@ -900,13 +1208,39 @@ static struct ioengine_ops ioengine = {
 	.option_struct_size	= sizeof(struct ioring_options),
 };
 
+static struct ioengine_ops ioengine_uring_cmd = {
+	.name			= "io_uring_cmd",
+	.version		= FIO_IOOPS_VERSION,
+	.flags			= FIO_ASYNCIO_SYNC_TRIM | FIO_NO_OFFLOAD | FIO_MEMALIGN | FIO_RAWIO,
+	.init			= fio_ioring_init,
+	.post_init		= fio_ioring_cmd_post_init,
+	.io_u_init		= fio_ioring_io_u_init,
+	.prep			= fio_ioring_cmd_prep,
+	.queue			= fio_ioring_queue,
+	.commit			= fio_ioring_commit,
+	.getevents		= fio_ioring_getevents,
+	.event			= fio_ioring_cmd_event,
+	.cleanup		= fio_ioring_cleanup,
+	.open_file		= fio_ioring_cmd_open_file,
+	.close_file		= fio_ioring_cmd_close_file,
+	.get_file_size		= fio_ioring_cmd_get_file_size,
+	.get_zoned_model	= fio_ioring_cmd_get_zoned_model,
+	.report_zones		= fio_ioring_cmd_report_zones,
+	.reset_wp		= fio_ioring_cmd_reset_wp,
+	.get_max_open_zones	= fio_ioring_cmd_get_max_open_zones,
+	.options		= options,
+	.option_struct_size	= sizeof(struct ioring_options),
+};
+
 static void fio_init fio_ioring_register(void)
 {
-	register_ioengine(&ioengine);
+	register_ioengine(&ioengine_uring);
+	register_ioengine(&ioengine_uring_cmd);
 }
 
 static void fio_exit fio_ioring_unregister(void)
 {
-	unregister_ioengine(&ioengine);
+	unregister_ioengine(&ioengine_uring);
+	unregister_ioengine(&ioengine_uring_cmd);
 }
 #endif
diff --git a/engines/nvme.c b/engines/nvme.c
new file mode 100644
index 00000000..9ffc5303
--- /dev/null
+++ b/engines/nvme.c
@@ -0,0 +1,347 @@
+/*
+ * nvme structure declarations and helper functions for the
+ * io_uring_cmd engine.
+ */
+
+#include "nvme.h"
+
+int fio_nvme_uring_cmd_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
+			    struct iovec *iov)
+{
+	struct nvme_data *data = FILE_ENG_DATA(io_u->file);
+	__u64 slba;
+	__u32 nlb;
+
+	memset(cmd, 0, sizeof(struct nvme_uring_cmd));
+
+	if (io_u->ddir == DDIR_READ)
+		cmd->opcode = nvme_cmd_read;
+	else if (io_u->ddir == DDIR_WRITE)
+		cmd->opcode = nvme_cmd_write;
+	else
+		return -ENOTSUP;
+
+	slba = io_u->offset >> data->lba_shift;
+	nlb = (io_u->xfer_buflen >> data->lba_shift) - 1;
+
+	/* cdw10 and cdw11 represent starting lba */
+	cmd->cdw10 = slba & 0xffffffff;
+	cmd->cdw11 = slba >> 32;
+	/* cdw12 represent number of lba's for read/write */
+	cmd->cdw12 = nlb;
+	if (iov) {
+		iov->iov_base = io_u->xfer_buf;
+		iov->iov_len = io_u->xfer_buflen;
+		cmd->addr = (__u64)(uintptr_t)iov;
+		cmd->data_len = 1;
+	} else {
+		cmd->addr = (__u64)(uintptr_t)io_u->xfer_buf;
+		cmd->data_len = io_u->xfer_buflen;
+	}
+	cmd->nsid = data->nsid;
+	return 0;
+}
+
+static int nvme_identify(int fd, __u32 nsid, enum nvme_identify_cns cns,
+			 enum nvme_csi csi, void *data)
+{
+	struct nvme_passthru_cmd cmd = {
+		.opcode         = nvme_admin_identify,
+		.nsid           = nsid,
+		.addr           = (__u64)(uintptr_t)data,
+		.data_len       = NVME_IDENTIFY_DATA_SIZE,
+		.cdw10          = cns,
+		.cdw11          = csi << NVME_IDENTIFY_CSI_SHIFT,
+		.timeout_ms     = NVME_DEFAULT_IOCTL_TIMEOUT,
+	};
+
+	return ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd);
+}
+
+int fio_nvme_get_info(struct fio_file *f, __u32 *nsid, __u32 *lba_sz,
+		      __u64 *nlba)
+{
+	struct nvme_id_ns ns;
+	int namespace_id;
+	int fd, err;
+
+	if (f->filetype != FIO_TYPE_CHAR) {
+		log_err("ioengine io_uring_cmd only works with nvme ns "
+			"generic char devices (/dev/ngXnY)\n");
+		return 1;
+	}
+
+	fd = open(f->file_name, O_RDONLY);
+	if (fd < 0)
+		return -errno;
+
+	namespace_id = ioctl(fd, NVME_IOCTL_ID);
+	if (namespace_id < 0) {
+		log_err("failed to fetch namespace-id");
+		close(fd);
+		return -errno;
+	}
+
+	/*
+	 * Identify namespace to get namespace-id, namespace size in LBA's
+	 * and LBA data size.
+	 */
+	err = nvme_identify(fd, namespace_id, NVME_IDENTIFY_CNS_NS,
+				NVME_CSI_NVM, &ns);
+	if (err) {
+		log_err("failed to fetch identify namespace\n");
+		close(fd);
+		return err;
+	}
+
+	*nsid = namespace_id;
+	*lba_sz = 1 << ns.lbaf[(ns.flbas & 0x0f)].ds;
+	*nlba = ns.nsze;
+
+	close(fd);
+	return 0;
+}
+
+int fio_nvme_get_zoned_model(struct thread_data *td, struct fio_file *f,
+			     enum zbd_zoned_model *model)
+{
+	struct nvme_data *data = FILE_ENG_DATA(f);
+	struct nvme_id_ns ns;
+	struct nvme_passthru_cmd cmd;
+	int fd, ret = 0;
+
+	if (f->filetype != FIO_TYPE_CHAR)
+		return -EINVAL;
+
+	/* File is not yet opened */
+	fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
+	if (fd < 0)
+		return -errno;
+
+	/* Using nvme_id_ns for data as sizes are same */
+	ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_CTRL,
+				NVME_CSI_ZNS, &ns);
+	if (ret) {
+		*model = ZBD_NONE;
+		goto out;
+	}
+
+	memset(&cmd, 0, sizeof(struct nvme_passthru_cmd));
+
+	/* Using nvme_id_ns for data as sizes are same */
+	ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
+				NVME_CSI_ZNS, &ns);
+	if (ret) {
+		*model = ZBD_NONE;
+		goto out;
+	}
+
+	*model = ZBD_HOST_MANAGED;
+out:
+	close(fd);
+	return 0;
+}
+
+static int nvme_report_zones(int fd, __u32 nsid, __u64 slba, __u32 zras_feat,
+			     __u32 data_len, void *data)
+{
+	struct nvme_passthru_cmd cmd = {
+		.opcode         = nvme_zns_cmd_mgmt_recv,
+		.nsid           = nsid,
+		.addr           = (__u64)(uintptr_t)data,
+		.data_len       = data_len,
+		.cdw10          = slba & 0xffffffff,
+		.cdw11          = slba >> 32,
+		.cdw12		= (data_len >> 2) - 1,
+		.cdw13		= NVME_ZNS_ZRA_REPORT_ZONES | zras_feat,
+		.timeout_ms     = NVME_DEFAULT_IOCTL_TIMEOUT,
+	};
+
+	return ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+}
+
+int fio_nvme_report_zones(struct thread_data *td, struct fio_file *f,
+			  uint64_t offset, struct zbd_zone *zbdz,
+			  unsigned int nr_zones)
+{
+	struct nvme_data *data = FILE_ENG_DATA(f);
+	struct nvme_zone_report *zr;
+	struct nvme_zns_id_ns zns_ns;
+	struct nvme_id_ns ns;
+	unsigned int i = 0, j, zones_fetched = 0;
+	unsigned int max_zones, zones_chunks = 1024;
+	int fd, ret = 0;
+	__u32 zr_len;
+	__u64 zlen;
+
+	/* File is not yet opened */
+	fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
+	if (fd < 0)
+		return -errno;
+
+	zones_fetched = 0;
+	zr_len = sizeof(*zr) + (zones_chunks * sizeof(struct nvme_zns_desc));
+	zr = calloc(1, zr_len);
+	if (!zr) {
+		close(fd);
+		return -ENOMEM;
+	}
+
+	ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_NS,
+				NVME_CSI_NVM, &ns);
+	if (ret) {
+		log_err("%s: nvme_identify_ns failed, err=%d\n", f->file_name,
+			ret);
+		goto out;
+	}
+
+	ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
+				NVME_CSI_ZNS, &zns_ns);
+	if (ret) {
+		log_err("%s: nvme_zns_identify_ns failed, err=%d\n",
+			f->file_name, ret);
+		goto out;
+	}
+	zlen = zns_ns.lbafe[ns.flbas & 0x0f].zsze << data->lba_shift;
+
+	max_zones = (f->real_file_size - offset) / zlen;
+	if (max_zones < nr_zones)
+		nr_zones = max_zones;
+
+	if (nr_zones < zones_chunks)
+		zones_chunks = nr_zones;
+
+	while (zones_fetched < nr_zones) {
+		if (zones_fetched + zones_chunks >= nr_zones) {
+			zones_chunks = nr_zones - zones_fetched;
+			zr_len = sizeof(*zr) + (zones_chunks * sizeof(struct nvme_zns_desc));
+		}
+		ret = nvme_report_zones(fd, data->nsid, offset >> data->lba_shift,
+					NVME_ZNS_ZRAS_FEAT_ERZ, zr_len, (void *)zr);
+		if (ret) {
+			log_err("%s: nvme_zns_report_zones failed, err=%d\n",
+				f->file_name, ret);
+			goto out;
+		}
+
+		/* Transform the zone-report */
+		for (j = 0; j < zr->nr_zones; j++, i++) {
+			struct nvme_zns_desc *desc = (struct nvme_zns_desc *)&(zr->entries[j]);
+
+			zbdz[i].start = desc->zslba << data->lba_shift;
+			zbdz[i].len = zlen;
+			zbdz[i].wp = desc->wp << data->lba_shift;
+			zbdz[i].capacity = desc->zcap << data->lba_shift;
+
+			/* Zone Type is stored in first 4 bits. */
+			switch (desc->zt & 0x0f) {
+			case NVME_ZONE_TYPE_SEQWRITE_REQ:
+				zbdz[i].type = ZBD_ZONE_TYPE_SWR;
+				break;
+			default:
+				log_err("%s: invalid type for zone at offset %llu.\n",
+					f->file_name, desc->zslba);
+				ret = -EIO;
+				goto out;
+			}
+
+			/* Zone State is stored in last 4 bits. */
+			switch (desc->zs >> 4) {
+			case NVME_ZNS_ZS_EMPTY:
+				zbdz[i].cond = ZBD_ZONE_COND_EMPTY;
+				break;
+			case NVME_ZNS_ZS_IMPL_OPEN:
+				zbdz[i].cond = ZBD_ZONE_COND_IMP_OPEN;
+				break;
+			case NVME_ZNS_ZS_EXPL_OPEN:
+				zbdz[i].cond = ZBD_ZONE_COND_EXP_OPEN;
+				break;
+			case NVME_ZNS_ZS_CLOSED:
+				zbdz[i].cond = ZBD_ZONE_COND_CLOSED;
+				break;
+			case NVME_ZNS_ZS_FULL:
+				zbdz[i].cond = ZBD_ZONE_COND_FULL;
+				break;
+			case NVME_ZNS_ZS_READ_ONLY:
+			case NVME_ZNS_ZS_OFFLINE:
+			default:
+				/* Treat all these conditions as offline (don't use!) */
+				zbdz[i].cond = ZBD_ZONE_COND_OFFLINE;
+				zbdz[i].wp = zbdz[i].start;
+			}
+		}
+		zones_fetched += zr->nr_zones;
+		offset += zr->nr_zones * zlen;
+	}
+
+	ret = zones_fetched;
+out:
+	free(zr);
+	close(fd);
+
+	return ret;
+}
+
+int fio_nvme_reset_wp(struct thread_data *td, struct fio_file *f,
+		      uint64_t offset, uint64_t length)
+{
+	struct nvme_data *data = FILE_ENG_DATA(f);
+	unsigned int nr_zones;
+	unsigned long long zslba;
+	int i, fd, ret = 0;
+
+	/* If the file is not yet opened, open it for this function. */
+	fd = f->fd;
+	if (fd < 0) {
+		fd = open(f->file_name, O_RDWR | O_LARGEFILE);
+		if (fd < 0)
+			return -errno;
+	}
+
+	zslba = offset >> data->lba_shift;
+	nr_zones = (length + td->o.zone_size - 1) / td->o.zone_size;
+
+	for (i = 0; i < nr_zones; i++, zslba += (td->o.zone_size >> data->lba_shift)) {
+		struct nvme_passthru_cmd cmd = {
+			.opcode         = nvme_zns_cmd_mgmt_send,
+			.nsid           = data->nsid,
+			.cdw10          = zslba & 0xffffffff,
+			.cdw11          = zslba >> 32,
+			.cdw13          = NVME_ZNS_ZSA_RESET,
+			.addr           = (__u64)(uintptr_t)NULL,
+			.data_len       = 0,
+			.timeout_ms     = NVME_DEFAULT_IOCTL_TIMEOUT,
+		};
+
+		ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+	}
+
+	if (f->fd < 0)
+		close(fd);
+	return -ret;
+}
+
+int fio_nvme_get_max_open_zones(struct thread_data *td, struct fio_file *f,
+				unsigned int *max_open_zones)
+{
+	struct nvme_data *data = FILE_ENG_DATA(f);
+	struct nvme_zns_id_ns zns_ns;
+	int fd, ret = 0;
+
+	fd = open(f->file_name, O_RDONLY | O_LARGEFILE);
+	if (fd < 0)
+		return -errno;
+
+	ret = nvme_identify(fd, data->nsid, NVME_IDENTIFY_CNS_CSI_NS,
+				NVME_CSI_ZNS, &zns_ns);
+	if (ret) {
+		log_err("%s: nvme_zns_identify_ns failed, err=%d\n",
+			f->file_name, ret);
+		goto out;
+	}
+
+	*max_open_zones = zns_ns.mor + 1;
+out:
+	close(fd);
+	return ret;
+}
diff --git a/engines/nvme.h b/engines/nvme.h
new file mode 100644
index 00000000..70a89b74
--- /dev/null
+++ b/engines/nvme.h
@@ -0,0 +1,214 @@
+/*
+ * nvme structure declarations and helper functions for the
+ * io_uring_cmd engine.
+ */
+
+#ifndef FIO_NVME_H
+#define FIO_NVME_H
+
+#include <linux/nvme_ioctl.h>
+#include "../fio.h"
+
+/*
+ * If the uapi headers installed on the system lacks nvme uring command
+ * support, use the local version to prevent compilation issues.
+ */
+#ifndef CONFIG_NVME_URING_CMD
+struct nvme_uring_cmd {
+	__u8	opcode;
+	__u8	flags;
+	__u16	rsvd1;
+	__u32	nsid;
+	__u32	cdw2;
+	__u32	cdw3;
+	__u64	metadata;
+	__u64	addr;
+	__u32	metadata_len;
+	__u32	data_len;
+	__u32	cdw10;
+	__u32	cdw11;
+	__u32	cdw12;
+	__u32	cdw13;
+	__u32	cdw14;
+	__u32	cdw15;
+	__u32	timeout_ms;
+	__u32   rsvd2;
+};
+
+#define NVME_URING_CMD_IO	_IOWR('N', 0x80, struct nvme_uring_cmd)
+#define NVME_URING_CMD_IO_VEC	_IOWR('N', 0x81, struct nvme_uring_cmd)
+#endif /* CONFIG_NVME_URING_CMD */
+
+#define NVME_DEFAULT_IOCTL_TIMEOUT 0
+#define NVME_IDENTIFY_DATA_SIZE 4096
+#define NVME_IDENTIFY_CSI_SHIFT 24
+
+#define NVME_ZNS_ZRA_REPORT_ZONES 0
+#define NVME_ZNS_ZRAS_FEAT_ERZ (1 << 16)
+#define NVME_ZNS_ZSA_RESET 0x4
+#define NVME_ZONE_TYPE_SEQWRITE_REQ 0x2
+
+enum nvme_identify_cns {
+	NVME_IDENTIFY_CNS_NS		= 0x00,
+	NVME_IDENTIFY_CNS_CSI_NS	= 0x05,
+	NVME_IDENTIFY_CNS_CSI_CTRL	= 0x06,
+};
+
+enum nvme_csi {
+	NVME_CSI_NVM			= 0,
+	NVME_CSI_KV			= 1,
+	NVME_CSI_ZNS			= 2,
+};
+
+enum nvme_admin_opcode {
+	nvme_admin_identify		= 0x06,
+};
+
+enum nvme_io_opcode {
+	nvme_cmd_write			= 0x01,
+	nvme_cmd_read			= 0x02,
+	nvme_zns_cmd_mgmt_send		= 0x79,
+	nvme_zns_cmd_mgmt_recv		= 0x7a,
+};
+
+enum nvme_zns_zs {
+	NVME_ZNS_ZS_EMPTY		= 0x1,
+	NVME_ZNS_ZS_IMPL_OPEN		= 0x2,
+	NVME_ZNS_ZS_EXPL_OPEN		= 0x3,
+	NVME_ZNS_ZS_CLOSED		= 0x4,
+	NVME_ZNS_ZS_READ_ONLY		= 0xd,
+	NVME_ZNS_ZS_FULL		= 0xe,
+	NVME_ZNS_ZS_OFFLINE		= 0xf,
+};
+
+struct nvme_data {
+	__u32 nsid;
+	__u32 lba_shift;
+};
+
+struct nvme_lbaf {
+	__le16			ms;
+	__u8			ds;
+	__u8			rp;
+};
+
+struct nvme_id_ns {
+	__le64			nsze;
+	__le64			ncap;
+	__le64			nuse;
+	__u8			nsfeat;
+	__u8			nlbaf;
+	__u8			flbas;
+	__u8			mc;
+	__u8			dpc;
+	__u8			dps;
+	__u8			nmic;
+	__u8			rescap;
+	__u8			fpi;
+	__u8			dlfeat;
+	__le16			nawun;
+	__le16			nawupf;
+	__le16			nacwu;
+	__le16			nabsn;
+	__le16			nabo;
+	__le16			nabspf;
+	__le16			noiob;
+	__u8			nvmcap[16];
+	__le16			npwg;
+	__le16			npwa;
+	__le16			npdg;
+	__le16			npda;
+	__le16			nows;
+	__le16			mssrl;
+	__le32			mcl;
+	__u8			msrc;
+	__u8			rsvd81[11];
+	__le32			anagrpid;
+	__u8			rsvd96[3];
+	__u8			nsattr;
+	__le16			nvmsetid;
+	__le16			endgid;
+	__u8			nguid[16];
+	__u8			eui64[8];
+	struct nvme_lbaf	lbaf[16];
+	__u8			rsvd192[192];
+	__u8			vs[3712];
+};
+
+static inline int ilog2(uint32_t i)
+{
+	int log = -1;
+
+	while (i) {
+		i >>= 1;
+		log++;
+	}
+	return log;
+}
+
+struct nvme_zns_lbafe {
+	__le64	zsze;
+	__u8	zdes;
+	__u8	rsvd9[7];
+};
+
+struct nvme_zns_id_ns {
+	__le16			zoc;
+	__le16			ozcs;
+	__le32			mar;
+	__le32			mor;
+	__le32			rrl;
+	__le32			frl;
+	__le32			rrl1;
+	__le32			rrl2;
+	__le32			rrl3;
+	__le32			frl1;
+	__le32			frl2;
+	__le32			frl3;
+	__le32			numzrwa;
+	__le16			zrwafg;
+	__le16			zrwasz;
+	__u8			zrwacap;
+	__u8			rsvd53[2763];
+	struct nvme_zns_lbafe	lbafe[64];
+	__u8			vs[256];
+};
+
+struct nvme_zns_desc {
+	__u8	zt;
+	__u8	zs;
+	__u8	za;
+	__u8	zai;
+	__u8	rsvd4[4];
+	__le64	zcap;
+	__le64	zslba;
+	__le64	wp;
+	__u8	rsvd32[32];
+};
+
+struct nvme_zone_report {
+	__le64			nr_zones;
+	__u8			rsvd8[56];
+	struct nvme_zns_desc	entries[];
+};
+
+int fio_nvme_get_info(struct fio_file *f, __u32 *nsid, __u32 *lba_sz,
+		      __u64 *nlba);
+
+int fio_nvme_uring_cmd_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
+			    struct iovec *iov);
+
+int fio_nvme_get_zoned_model(struct thread_data *td, struct fio_file *f,
+			     enum zbd_zoned_model *model);
+
+int fio_nvme_report_zones(struct thread_data *td, struct fio_file *f,
+			  uint64_t offset, struct zbd_zone *zbdz,
+			  unsigned int nr_zones);
+
+int fio_nvme_reset_wp(struct thread_data *td, struct fio_file *f,
+		      uint64_t offset, uint64_t length);
+
+int fio_nvme_get_max_open_zones(struct thread_data *td, struct fio_file *f,
+				unsigned int *max_open_zones);
+
+#endif
diff --git a/examples/uring-cmd-ng.fio b/examples/uring-cmd-ng.fio
new file mode 100644
index 00000000..b2888a00
--- /dev/null
+++ b/examples/uring-cmd-ng.fio
@@ -0,0 +1,25 @@
+# io_uring_cmd I/O engine for nvme-ns generic character device
+
+[global]
+filename=/dev/ng0n1
+ioengine=io_uring_cmd
+cmd_type=nvme
+size=1G
+iodepth=32
+bs=4K
+thread=1
+stonewall=1
+
+[rand-write]
+rw=randwrite
+sqthread_poll=1
+
+[rand-read]
+rw=randread
+
+[write-opts]
+rw=write
+sqthread_poll=1
+sqthread_poll_cpu=0
+nonvectored=1
+registerfiles=1
diff --git a/examples/uring-cmd-zoned.fio b/examples/uring-cmd-zoned.fio
new file mode 100644
index 00000000..58e8f79e
--- /dev/null
+++ b/examples/uring-cmd-zoned.fio
@@ -0,0 +1,31 @@
+# io_uring_cmd I/O engine for nvme-ns generic zoned character device
+#
+# NOTE: with write workload iodepth must be set to 1 as there is no IO
+# scheduler.
+
+[global]
+filename=/dev/ng0n1
+ioengine=io_uring_cmd
+cmd_type=nvme
+zonemode=zbd
+size=1G
+iodepth=1
+bs=256K
+verify=crc32c
+stonewall=1
+
+[rand-write]
+rw=randwrite
+
+[write-opts]
+rw=write
+registerfiles=1
+sqthread_poll=1
+sqthread_poll_cpu=0
+
+[randwrite-opts]
+rw=randwrite
+sqthread_poll=1
+sqthread_poll_cpu=0
+nonvectored=1
+registerfiles=1
diff --git a/file.h b/file.h
index faf65a2a..da1b8947 100644
--- a/file.h
+++ b/file.h
@@ -126,12 +126,14 @@ struct fio_file {
 	unsigned int last_write_idx;
 
 	/*
-	 * For use by the io engine for offset or private data storage
+	 * For use by the io engine to store offset
 	 */
-	union {
-		uint64_t engine_pos;
-		void *engine_data;
-	};
+	uint64_t engine_pos;
+
+	/*
+	 * For use by the io engine for private data storage
+	 */
+	void *engine_data;
 
 	/*
 	 * if io is protected by a semaphore, this is set
diff --git a/fio.1 b/fio.1
index bdba3142..948c01f9 100644
--- a/fio.1
+++ b/fio.1
@@ -1739,6 +1739,15 @@ Basic \fBpreadv\fR\|(2) or \fBpwritev\fR\|(2) I/O.
 .B pvsync2
 Basic \fBpreadv2\fR\|(2) or \fBpwritev2\fR\|(2) I/O.
 .TP
+.B io_uring
+Fast Linux native asynchronous I/O. Supports async IO
+for both direct and buffered IO.
+This engine defines engine specific options.
+.TP
+.B io_uring_cmd
+Fast Linux native asynchronous I/O for passthrough commands.
+This engine defines engine specific options.
+.TP
 .B libaio
 Linux native asynchronous I/O. Note that Linux may only support
 queued behavior with non-buffered I/O (set `direct=1' or
@@ -2040,35 +2049,49 @@ for trim IOs are ignored. This option is mutually exclusive with the
 \fBcmdprio_percentage\fR option.
 .RE
 .TP
-.BI (io_uring)fixedbufs
+.BI (io_uring,io_uring_cmd)fixedbufs
 If fio is asked to do direct IO, then Linux will map pages for each IO call, and
 release them when IO is done. If this option is set, the pages are pre-mapped
 before IO is started. This eliminates the need to map and release for each IO.
 This is more efficient, and reduces the IO latency as well.
 .TP
-.BI (io_uring,xnvme)hipri
+.BI (io_uring,io_uring_cmd)nonvectored
+With this option, fio will use non-vectored read/write commands, where address
+must contain the address directly. Default is -1.
+.TP
+.BI (io_uring,io_uring_cmd)force_async
+Normal operation for io_uring is to try and issue an sqe as non-blocking first,
+and if that fails, execute it in an async manner. With this option set to N,
+then every N request fio will ask sqe to be issued in an async manner. Default
+is 0.
+.TP
+.BI (io_uring,io_uring_cmd,xnvme)hipri
 If this option is set, fio will attempt to use polled IO completions. Normal IO
 completions generate interrupts to signal the completion of IO, polled
 completions do not. Hence they are require active reaping by the application.
 The benefits are more efficient IO for high IOPS scenarios, and lower latencies
 for low queue depth IO.
 .TP
-.BI (io_uring)registerfiles
+.BI (io_uring,io_uring_cmd)registerfiles
 With this option, fio registers the set of files being used with the kernel.
 This avoids the overhead of managing file counts in the kernel, making the
 submission and completion part more lightweight. Required for the below
 sqthread_poll option.
 .TP
-.BI (io_uring,xnvme)sqthread_poll
+.BI (io_uring,io_uring_cmd,xnvme)sqthread_poll
 Normally fio will submit IO by issuing a system call to notify the kernel of
 available items in the SQ ring. If this option is set, the act of submitting IO
 will be done by a polling thread in the kernel. This frees up cycles for fio, at
 the cost of using more CPU in the system.
 .TP
-.BI (io_uring)sqthread_poll_cpu
+.BI (io_uring,io_uring_cmd)sqthread_poll_cpu
 When `sqthread_poll` is set, this option provides a way to define which CPU
 should be used for the polling thread.
 .TP
+.BI (io_uring_cmd)cmd_type \fR=\fPstr
+Specifies the type of uring passthrough command to be used. Supported
+value is nvme. Default is nvme.
+.TP
 .BI (libaio)userspace_reap
 Normally, with the libaio engine in use, fio will use the
 \fBio_getevents\fR\|(3) system call to reap newly returned events. With
diff --git a/init.c b/init.c
index f7d702f8..da800776 100644
--- a/init.c
+++ b/init.c
@@ -2810,6 +2810,15 @@ int parse_cmd_line(int argc, char *argv[], int client_type)
 				break;
 
 			ret = fio_cmd_ioengine_option_parse(td, opt, val);
+
+			if (ret) {
+				if (td) {
+					put_job(td);
+					td = NULL;
+				}
+				do_exit++;
+				exit_val = 1;
+			}
 			break;
 		}
 		case 'w':
diff --git a/os/linux/io_uring.h b/os/linux/io_uring.h
index 42b2fe84..929997f8 100644
--- a/os/linux/io_uring.h
+++ b/os/linux/io_uring.h
@@ -22,6 +22,7 @@ struct io_uring_sqe {
 	union {
 		__u64	off;	/* offset into file */
 		__u64	addr2;
+		__u32	cmd_op;
 	};
 	union {
 		__u64	addr;	/* pointer to buffer or iovecs */
@@ -60,7 +61,17 @@ struct io_uring_sqe {
 		__s32	splice_fd_in;
 		__u32	file_index;
 	};
-	__u64	__pad2[2];
+	union {
+		struct {
+			__u64	addr3;
+			__u64	__pad2[1];
+		};
+		/*
+		 * If the ring is initialized with IORING_SETUP_SQE128, then
+		 * this field is used for 80 bytes of arbitrary command data
+		 */
+		__u8	cmd[0];
+	};
 };
 
 enum {
@@ -101,6 +112,24 @@ enum {
 #define IORING_SETUP_CLAMP	(1U << 4)	/* clamp SQ/CQ ring sizes */
 #define IORING_SETUP_ATTACH_WQ	(1U << 5)	/* attach to existing wq */
 #define IORING_SETUP_R_DISABLED	(1U << 6)	/* start with ring disabled */
+#define IORING_SETUP_SUBMIT_ALL	(1U << 7)	/* continue submit on error */
+/*
+ * Cooperative task running. When requests complete, they often require
+ * forcing the submitter to transition to the kernel to complete. If this
+ * flag is set, work will be done when the task transitions anyway, rather
+ * than force an inter-processor interrupt reschedule. This avoids interrupting
+ * a task running in userspace, and saves an IPI.
+ */
+#define IORING_SETUP_COOP_TASKRUN	(1U << 8)
+/*
+ * If COOP_TASKRUN is set, get notified if task work is available for
+ * running and a kernel transition would be needed to run it. This sets
+ * IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN.
+ */
+#define IORING_SETUP_TASKRUN_FLAG	(1U << 9)
+
+#define IORING_SETUP_SQE128		(1U << 10) /* SQEs are 128 byte */
+#define IORING_SETUP_CQE32		(1U << 11) /* CQEs are 32 byte */
 
 enum {
 	IORING_OP_NOP,
@@ -143,6 +172,14 @@ enum {
 	IORING_OP_MKDIRAT,
 	IORING_OP_SYMLINKAT,
 	IORING_OP_LINKAT,
+	IORING_OP_MSG_RING,
+	IORING_OP_FSETXATTR,
+	IORING_OP_SETXATTR,
+	IORING_OP_FGETXATTR,
+	IORING_OP_GETXATTR,
+	IORING_OP_SOCKET,
+	IORING_OP_URING_CMD,
+
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
@@ -192,6 +229,12 @@ struct io_uring_cqe {
 	__u64	user_data;	/* sqe->data submission passed back */
 	__s32	res;		/* result code for this event */
 	__u32	flags;
+
+	/*
+	 * If the ring is initialized with IORING_SETUP_CQE32, then this field
+	 * contains 16-bytes of padding, doubling the size of the CQE.
+	 */
+	__u64 big_cqe[];
 };
 
 /*
diff --git a/t/zbd/test-zbd-support b/t/zbd/test-zbd-support
index 7e2fff00..d4aaa813 100755
--- a/t/zbd/test-zbd-support
+++ b/t/zbd/test-zbd-support
@@ -229,6 +229,14 @@ require_regular_block_dev() {
 	return 0
 }
 
+require_block_dev() {
+	if [[ -b "$realdev" ]]; then
+		return 0
+	fi
+	SKIP_REASON="$dev is not a block device"
+	return 1
+}
+
 require_seq_zones() {
 	local req_seq_zones=${1}
 	local seq_bytes=$((disk_size - first_sequential_zone_sector * 512))
@@ -251,8 +259,19 @@ require_conv_zones() {
 	return 0
 }
 
-# Check whether buffered writes are refused.
+require_max_open_zones() {
+	local min=${1}
+
+	if ((max_open_zones !=0 && max_open_zones < min)); then
+		SKIP_REASON="max_open_zones of $dev is smaller than $min"
+		return 1
+	fi
+	return 0
+}
+
+# Check whether buffered writes are refused for block devices.
 test1() {
+    require_block_dev || return $SKIP_TESTCASE
     run_fio --name=job1 --filename="$dev" --rw=write --direct=0 --bs=4K	\
 	    "$(ioengine "psync")" --size="${zone_size}" --thread=1	\
 	    --zonemode=zbd --zonesize="${zone_size}" 2>&1 |
@@ -453,6 +472,8 @@ test12() {
 test13() {
     local size off capacity
 
+    require_max_open_zones 4 || return $SKIP_TESTCASE
+
     prep_write
     size=$((8 * zone_size))
     off=$((first_sequential_zone_sector * 512))
diff --git a/zbd.c b/zbd.c
index b1fd6b4b..627fb968 100644
--- a/zbd.c
+++ b/zbd.c
@@ -466,7 +466,7 @@ out:
 	return res;
 }
 
-/* Verify whether direct I/O is used for all host-managed zoned drives. */
+/* Verify whether direct I/O is used for all host-managed zoned block drives. */
 static bool zbd_using_direct_io(void)
 {
 	struct thread_data *td;
@@ -477,7 +477,7 @@ static bool zbd_using_direct_io(void)
 		if (td->o.odirect || !(td->o.td_ddir & TD_DDIR_WRITE))
 			continue;
 		for_each_file(td, f, j) {
-			if (f->zbd_info &&
+			if (f->zbd_info && f->filetype == FIO_TYPE_BLOCK &&
 			    f->zbd_info->model == ZBD_HOST_MANAGED)
 				return false;
 		}

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-06-01 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-06-01 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit e1aeff3ac96a51128b0493377f405e38bdc83500:

  Merge branch 'wip-lmy-rados' of https://github.com/liangmingyuanneo/fio (2022-05-29 09:32:18 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 5ceed0be62f3ce8903d5747674f9f70f44e736d6:

  docs: update language setting for Sphinx build (2022-05-31 20:58:00 -0600)

----------------------------------------------------------------
Vincent Fu (1):
      docs: update language setting for Sphinx build

 doc/conf.py | 7 -------
 1 file changed, 7 deletions(-)

---

Diff of recent changes:

diff --git a/doc/conf.py b/doc/conf.py
index 10b72ecb..844f951a 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -85,13 +85,6 @@ def fio_version():
 
 version, release = fio_version()
 
-# The language for content autogenerated by Sphinx. Refer to documentation
-# for a list of supported languages.
-#
-# This is also used if you do content translation via gettext catalogs.
-# Usually you set "language" from the command line for these cases.
-language = None
-
 # There are two options for replacing |today|: either, you set today to some
 # non-false value, then it is used:
 #

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-05-30 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-05-30 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit a2840331c3cae5b2b0a13f99e58ae18375e2e40d:

  Merge branch 'master' of https://github.com/guoanwu/fio (2022-05-25 06:30:06 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to e1aeff3ac96a51128b0493377f405e38bdc83500:

  Merge branch 'wip-lmy-rados' of https://github.com/liangmingyuanneo/fio (2022-05-29 09:32:18 -0600)

----------------------------------------------------------------
Jens Axboe (1):
      Merge branch 'wip-lmy-rados' of https://github.com/liangmingyuanneo/fio

Vincent Fu (5):
      steadystate: delete incorrect comment
      configure: refer to zlib1g-dev package for zlib support
      HOWTO: add blank line for prettier formatting
      t/run-fio-tests: improve json data decoding
      docs: update discussion of huge page sizes

liangmingyuan (1):
      engines/ceph: add option for setting config file path

 HOWTO.rst          | 31 ++++++++++++++++++++-----------
 configure          |  2 +-
 engines/rados.c    | 13 ++++++++++++-
 examples/rados.fio |  1 +
 fio.1              | 23 ++++++++++++++---------
 steadystate.c      |  7 -------
 t/run-fio-tests.py | 20 +++++++-------------
 7 files changed, 55 insertions(+), 42 deletions(-)

---

Diff of recent changes:

diff --git a/HOWTO.rst b/HOWTO.rst
index 84bea5c5..8ab3ac4b 100644
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -1064,6 +1064,7 @@ Target file/device
 	thread/process.
 
 .. option:: ignore_zone_limits=bool
+
 	If this option is used, fio will ignore the maximum number of open
 	zones limit of the zoned block device in use, thus allowing the
 	option :option:`max_open_zones` value to be larger than the device
@@ -1822,13 +1823,14 @@ Buffers and memory
 	**mmaphuge** to work, the system must have free huge pages allocated. This
 	can normally be checked and set by reading/writing
 	:file:`/proc/sys/vm/nr_hugepages` on a Linux system. Fio assumes a huge page
-	is 4MiB in size. So to calculate the number of huge pages you need for a
-	given job file, add up the I/O depth of all jobs (normally one unless
-	:option:`iodepth` is used) and multiply by the maximum bs set. Then divide
-	that number by the huge page size. You can see the size of the huge pages in
-	:file:`/proc/meminfo`. If no huge pages are allocated by having a non-zero
-	number in `nr_hugepages`, using **mmaphuge** or **shmhuge** will fail. Also
-	see :option:`hugepage-size`.
+        is 2 or 4MiB in size depending on the platform. So to calculate the
+        number of huge pages you need for a given job file, add up the I/O
+        depth of all jobs (normally one unless :option:`iodepth` is used) and
+        multiply by the maximum bs set. Then divide that number by the huge
+        page size. You can see the size of the huge pages in
+        :file:`/proc/meminfo`. If no huge pages are allocated by having a
+        non-zero number in `nr_hugepages`, using **mmaphuge** or **shmhuge**
+        will fail. Also see :option:`hugepage-size`.
 
 	**mmaphuge** also needs to have hugetlbfs mounted and the file location
 	should point there. So if it's mounted in :file:`/huge`, you would use
@@ -1847,10 +1849,12 @@ Buffers and memory
 
 .. option:: hugepage-size=int
 
-	Defines the size of a huge page. Must at least be equal to the system
-	setting, see :file:`/proc/meminfo`. Defaults to 4MiB.  Should probably
-	always be a multiple of megabytes, so using ``hugepage-size=Xm`` is the
-	preferred way to set this to avoid setting a non-pow-2 bad value.
+        Defines the size of a huge page. Must at least be equal to the system
+        setting, see :file:`/proc/meminfo` and
+        :file:`/sys/kernel/mm/hugepages/`. Defaults to 2 or 4MiB depending on
+        the platform.  Should probably always be a multiple of megabytes, so
+        using ``hugepage-size=Xm`` is the preferred way to set this to avoid
+        setting a non-pow-2 bad value.
 
 .. option:: lockmem=int
 
@@ -2491,6 +2495,11 @@ with the caveat that when used on the command line, they must come after the
 	the full *type.id* string. If no type. prefix is given, fio will add
 	'client.' by default.
 
+.. option:: conf=str : [rados]
+
+    Specifies the configuration path of ceph cluster, so conf file does not
+    have to be /etc/ceph/ceph.conf.
+
 .. option:: busy_poll=bool : [rbd,rados]
 
         Poll store instead of waiting for completion. Usually this provides better
diff --git a/configure b/configure
index 95b60bb7..4ee536a0 100755
--- a/configure
+++ b/configure
@@ -3142,7 +3142,7 @@ if test "$libzbc" = "yes" ; then
   output_sym "CONFIG_LIBZBC"
 fi
 if test "$zlib" = "no" ; then
-  echo "Consider installing zlib-dev (zlib-devel, some fio features depend on it."
+  echo "Consider installing zlib1g-dev (zlib-devel) as some fio features depend on it."
   if test "$build_static" = "yes"; then
     echo "Note that some distros have separate packages for static libraries."
   fi
diff --git a/engines/rados.c b/engines/rados.c
index 976f9229..d0d15c5b 100644
--- a/engines/rados.c
+++ b/engines/rados.c
@@ -37,6 +37,7 @@ struct rados_options {
 	char *cluster_name;
 	char *pool_name;
 	char *client_name;
+	char *conf;
 	int busy_poll;
 	int touch_objects;
 };
@@ -69,6 +70,16 @@ static struct fio_option options[] = {
 		.category = FIO_OPT_C_ENGINE,
 		.group    = FIO_OPT_G_RBD,
 	},
+	{
+		.name     = "conf",
+		.lname    = "ceph configuration file path",
+		.type     = FIO_OPT_STR_STORE,
+		.help     = "Path of the ceph configuration file",
+		.off1     = offsetof(struct rados_options, conf),
+		.def      = "/etc/ceph/ceph.conf",
+		.category = FIO_OPT_C_ENGINE,
+		.group    = FIO_OPT_G_RBD,
+	},
 	{
 		.name     = "busy_poll",
 		.lname    = "busy poll mode",
@@ -184,7 +195,7 @@ static int _fio_rados_connect(struct thread_data *td)
 		goto failed_early;
 	}
 
-	r = rados_conf_read_file(rados->cluster, NULL);
+	r = rados_conf_read_file(rados->cluster, o->conf);
 	if (r < 0) {
 		log_err("rados_conf_read_file failed.\n");
 		goto failed_early;
diff --git a/examples/rados.fio b/examples/rados.fio
index 035cbff4..dd86f354 100644
--- a/examples/rados.fio
+++ b/examples/rados.fio
@@ -14,6 +14,7 @@
 ioengine=rados
 clientname=admin
 pool=rados
+conf=/etc/ceph/ceph.conf
 busy_poll=0
 rw=randwrite
 bs=4k
diff --git a/fio.1 b/fio.1
index ded7bbfc..bdba3142 100644
--- a/fio.1
+++ b/fio.1
@@ -1631,11 +1631,11 @@ multiplied by the I/O depth given. Note that for \fBshmhuge\fR and
 \fBmmaphuge\fR to work, the system must have free huge pages allocated. This
 can normally be checked and set by reading/writing
 `/proc/sys/vm/nr_hugepages' on a Linux system. Fio assumes a huge page
-is 4MiB in size. So to calculate the number of huge pages you need for a
-given job file, add up the I/O depth of all jobs (normally one unless
-\fBiodepth\fR is used) and multiply by the maximum bs set. Then divide
-that number by the huge page size. You can see the size of the huge pages in
-`/proc/meminfo'. If no huge pages are allocated by having a non-zero
+is 2 or 4MiB in size depending on the platform. So to calculate the number of
+huge pages you need for a given job file, add up the I/O depth of all jobs
+(normally one unless \fBiodepth\fR is used) and multiply by the maximum bs set.
+Then divide that number by the huge page size. You can see the size of the huge
+pages in `/proc/meminfo'. If no huge pages are allocated by having a non-zero
 number in `nr_hugepages', using \fBmmaphuge\fR or \fBshmhuge\fR will fail. Also
 see \fBhugepage\-size\fR.
 .P
@@ -1655,10 +1655,11 @@ of subsequent I/O memory buffers is the sum of the \fBiomem_align\fR and
 \fBbs\fR used.
 .TP
 .BI hugepage\-size \fR=\fPint
-Defines the size of a huge page. Must at least be equal to the system
-setting, see `/proc/meminfo'. Defaults to 4MiB. Should probably
-always be a multiple of megabytes, so using `hugepage\-size=Xm' is the
-preferred way to set this to avoid setting a non-pow-2 bad value.
+Defines the size of a huge page. Must at least be equal to the system setting,
+see `/proc/meminfo' and `/sys/kernel/mm/hugepages/'. Defaults to 2 or 4MiB
+depending on the platform. Should probably always be a multiple of megabytes,
+so using `hugepage\-size=Xm' is the preferred way to set this to avoid setting
+a non-pow-2 bad value.
 .TP
 .BI lockmem \fR=\fPint
 Pin the specified amount of memory with \fBmlock\fR\|(2). Can be used to
@@ -2243,6 +2244,10 @@ Ceph cluster. If the \fBclustername\fR is specified, the \fBclientname\fR shall
 the full *type.id* string. If no type. prefix is given, fio will add 'client.'
 by default.
 .TP
+.BI (rados)conf \fR=\fPstr
+Specifies the configuration path of ceph cluster, so conf file does not
+have to be /etc/ceph/ceph.conf.
+.TP
 .BI (rbd,rados)busy_poll \fR=\fPbool
 Poll store instead of waiting for completion. Usually this provides better
 throughput at cost of higher(up to 100%) CPU utilization.
diff --git a/steadystate.c b/steadystate.c
index 2e3da1db..ad19318c 100644
--- a/steadystate.c
+++ b/steadystate.c
@@ -250,13 +250,6 @@ int steadystate_check(void)
 		rate_time = mtime_since(&ss->prev_time, &now);
 		memcpy(&ss->prev_time, &now, sizeof(now));
 
-		/*
-		 * Begin monitoring when job starts but don't actually use
-		 * data in checking stopping criterion until ss->ramp_time is
-		 * over. This ensures that we will have a sane value in
-		 * prev_iops/bw the first time through after ss->ramp_time
-		 * is done.
-		 */
 		if (ss->state & FIO_SS_RAMP_OVER) {
 			group_bw += 1000 * (td_bytes - ss->prev_bytes) / rate_time;
 			group_iops += 1000 * (td_iops - ss->prev_iops) / rate_time;
diff --git a/t/run-fio-tests.py b/t/run-fio-tests.py
index ecceb67e..32cdbc19 100755
--- a/t/run-fio-tests.py
+++ b/t/run-fio-tests.py
@@ -311,21 +311,15 @@ class FioJobTest(FioExeTest):
         #
         # Sometimes fio informational messages are included at the top of the
         # JSON output, especially under Windows. Try to decode output as JSON
-        # data, lopping off up to the first four lines
+        # data, skipping everything until the first {
         #
         lines = file_data.splitlines()
-        for i in range(5):
-            file_data = '\n'.join(lines[i:])
-            try:
-                self.json_data = json.loads(file_data)
-            except json.JSONDecodeError:
-                continue
-            else:
-                logging.debug("Test %d: skipped %d lines decoding JSON data", self.testnum, i)
-                return
-
-        self.failure_reason = "{0} unable to decode JSON data,".format(self.failure_reason)
-        self.passed = False
+        file_data = '\n'.join(lines[lines.index("{"):])
+        try:
+            self.json_data = json.loads(file_data)
+        except json.JSONDecodeError:
+            self.failure_reason = "{0} unable to decode JSON data,".format(self.failure_reason)
+            self.passed = False
 
 
 class FioJobTest_t0005(FioJobTest):

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-05-26 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-05-26 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 6f1a24593c227a4f392f454698aca20e95f0006c:

  Makefile: Suppress `-Wimplicit-fallthrough` when compiling `lex.yy` (2022-05-12 11:02:55 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to a2840331c3cae5b2b0a13f99e58ae18375e2e40d:

  Merge branch 'master' of https://github.com/guoanwu/fio (2022-05-25 06:30:06 -0600)

----------------------------------------------------------------
Jens Axboe (1):
      Merge branch 'master' of https://github.com/guoanwu/fio

dennis.wu (1):
      pmemblk.c: fix one logic bug - read always with write

 engines/pmemblk.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

---

Diff of recent changes:

diff --git a/engines/pmemblk.c b/engines/pmemblk.c
index fc6358e8..849d8a15 100644
--- a/engines/pmemblk.c
+++ b/engines/pmemblk.c
@@ -375,10 +375,11 @@ static enum fio_q_status fio_pmemblk_queue(struct thread_data *td,
 		off /= pmb->pmb_bsize;
 		len /= pmb->pmb_bsize;
 		while (0 < len) {
-			if (io_u->ddir == DDIR_READ &&
-			   0 != pmemblk_read(pmb->pmb_pool, buf, off)) {
-				io_u->error = errno;
-				break;
+			if (io_u->ddir == DDIR_READ) {
+				if (0 != pmemblk_read(pmb->pmb_pool, buf, off)) {
+					io_u->error = errno;
+					break;
+				}
 			} else if (0 != pmemblk_write(pmb->pmb_pool, buf, off)) {
 				io_u->error = errno;
 				break;

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-05-13 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-05-13 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 12db6deb8b767ac89dd73e34dbc6f06905441e07:

  Merge branch 'patch-1' of https://github.com/ferdnyc/fio (2022-05-01 07:29:05 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 6f1a24593c227a4f392f454698aca20e95f0006c:

  Makefile: Suppress `-Wimplicit-fallthrough` when compiling `lex.yy` (2022-05-12 11:02:55 -0600)

----------------------------------------------------------------
Ammar Faizi (2):
      backend: Fix indentation
      Makefile: Suppress `-Wimplicit-fallthrough` when compiling `lex.yy`

Ankit Kumar (3):
      engines/xnvme: add xnvme engine
      docs: documentation for xnvme ioengine
      examples: add example job file for xnvme engine usage

 HOWTO.rst                  |  55 ++-
 Makefile                   |  13 +-
 backend.c                  |   2 +-
 configure                  |  22 +
 engines/xnvme.c            | 981 +++++++++++++++++++++++++++++++++++++++++++++
 examples/xnvme-compare.fio |  72 ++++
 examples/xnvme-zoned.fio   |  87 ++++
 fio.1                      |  70 +++-
 optgroup.h                 |   2 +
 options.c                  |   5 +
 10 files changed, 1302 insertions(+), 7 deletions(-)
 create mode 100644 engines/xnvme.c
 create mode 100644 examples/xnvme-compare.fio
 create mode 100644 examples/xnvme-zoned.fio

---

Diff of recent changes:

diff --git a/HOWTO.rst b/HOWTO.rst
index 6a3e09f5..84bea5c5 100644
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -2171,6 +2171,12 @@ I/O engine
 		**exec**
 			Execute 3rd party tools. Could be used to perform monitoring during jobs runtime.
 
+		**xnvme**
+			I/O engine using the xNVMe C API, for NVMe devices. The xnvme engine provides
+			flexibility to access GNU/Linux Kernel NVMe driver via libaio, IOCTLs, io_uring,
+			the SPDK NVMe driver, or your own custom NVMe driver. The xnvme engine includes
+			engine specific options. (See https://xnvme.io).
+
 I/O engine specific parameters
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -2260,7 +2266,7 @@ with the caveat that when used on the command line, they must come after the
 	making the submission and completion part more lightweight. Required
 	for the below :option:`sqthread_poll` option.
 
-.. option:: sqthread_poll : [io_uring]
+.. option:: sqthread_poll : [io_uring] [xnvme]
 
 	Normally fio will submit IO by issuing a system call to notify the
 	kernel of available items in the SQ ring. If this option is set, the
@@ -2275,7 +2281,7 @@ with the caveat that when used on the command line, they must come after the
 
 .. option:: hipri
 
-   [io_uring]
+   [io_uring], [xnvme]
 
         If this option is set, fio will attempt to use polled IO completions.
         Normal IO completions generate interrupts to signal the completion of
@@ -2725,6 +2731,51 @@ with the caveat that when used on the command line, they must come after the
 
 	If set, stdout and stderr streams are redirected to files named from the job name. Default is true.
 
+.. option:: xnvme_async=str : [xnvme]
+
+	Select the xnvme async command interface. This can take these values.
+
+	**emu**
+		This is default and used to emulate asynchronous I/O.
+	**thrpool**
+		Use thread pool for Asynchronous I/O.
+	**io_uring**
+		Use Linux io_uring/liburing for Asynchronous I/O.
+	**libaio**
+		Use Linux aio for Asynchronous I/O.
+	**posix**
+		Use POSIX aio for Asynchronous I/O.
+	**nil**
+		Use nil-io; For introspective perf. evaluation
+
+.. option:: xnvme_sync=str : [xnvme]
+
+	Select the xnvme synchronous command interface. This can take these values.
+
+	**nvme**
+		This is default and uses Linux NVMe Driver ioctl() for synchronous I/O.
+	**psync**
+		Use pread()/write() for synchronous I/O.
+
+.. option:: xnvme_admin=str : [xnvme]
+
+	Select the xnvme admin command interface. This can take these values.
+
+	**nvme**
+		This is default and uses linux NVMe Driver ioctl() for admin commands.
+	**block**
+		Use Linux Block Layer ioctl() and sysfs for admin commands.
+	**file_as_ns**
+		Use file-stat to construct NVMe idfy responses.
+
+.. option:: xnvme_dev_nsid=int : [xnvme]
+
+	xnvme namespace identifier, for userspace NVMe driver.
+
+.. option:: xnvme_iovec=int : [xnvme]
+
+	If this option is set. xnvme will use vectored read/write commands.
+
 I/O depth
 ~~~~~~~~~
 
diff --git a/Makefile b/Makefile
index e670c1f2..ed66305a 100644
--- a/Makefile
+++ b/Makefile
@@ -223,7 +223,12 @@ ifdef CONFIG_LIBZBC
   libzbc_LIBS = -lzbc
   ENGINES += libzbc
 endif
-
+ifdef CONFIG_LIBXNVME
+  xnvme_SRCS = engines/xnvme.c
+  xnvme_LIBS = $(LIBXNVME_LIBS)
+  xnvme_CFLAGS = $(LIBXNVME_CFLAGS)
+  ENGINES += xnvme
+endif
 ifeq ($(CONFIG_TARGET_OS), Linux)
   SOURCE += diskutil.c fifo.c blktrace.c cgroup.c trim.c engines/sg.c \
 		oslib/linux-dev-lookup.c engines/io_uring.c
@@ -530,8 +535,12 @@ else
 	$(QUIET_LEX)$(LEX) $<
 endif
 
+ifneq (,$(findstring -Wimplicit-fallthrough,$(CFLAGS)))
+LEX_YY_CFLAGS := -Wno-implicit-fallthrough
+endif
+
 lex.yy.o: lex.yy.c y.tab.h
-	$(QUIET_CC)$(CC) -o $@ $(CFLAGS) $(CPPFLAGS) -c $<
+	$(QUIET_CC)$(CC) -o $@ $(CFLAGS) $(CPPFLAGS) $(LEX_YY_CFLAGS) -c $<
 
 y.tab.o: y.tab.c y.tab.h
 	$(QUIET_CC)$(CC) -o $@ $(CFLAGS) $(CPPFLAGS) -c $<
diff --git a/backend.c b/backend.c
index ffbb7e2a..e5bb4e25 100644
--- a/backend.c
+++ b/backend.c
@@ -2021,7 +2021,7 @@ static void reap_threads(unsigned int *nr_running, uint64_t *t_rate,
 	for_each_td(td, i) {
 		int flags = 0;
 
-		 if (!strcmp(td->o.ioengine, "cpuio"))
+		if (!strcmp(td->o.ioengine, "cpuio"))
 			cputhreads++;
 		else
 			realthreads++;
diff --git a/configure b/configure
index d327d2ca..95b60bb7 100755
--- a/configure
+++ b/configure
@@ -171,6 +171,7 @@ march_set="no"
 libiscsi="no"
 libnbd="no"
 libnfs="no"
+xnvme="no"
 libzbc=""
 dfs=""
 dynamic_engines="no"
@@ -240,6 +241,8 @@ for opt do
   ;;
   --disable-libzbc) libzbc="no"
   ;;
+  --enable-xnvme) xnvme="yes"
+  ;;
   --disable-tcmalloc) disable_tcmalloc="yes"
   ;;
   --disable-nfs) disable_nfs="yes"
@@ -291,6 +294,7 @@ if test "$show_help" = "yes" ; then
   echo "--with-ime=             Install path for DDN's Infinite Memory Engine"
   echo "--enable-libiscsi       Enable iscsi support"
   echo "--enable-libnbd         Enable libnbd (NBD engine) support"
+  echo "--enable-xnvme          Enable xnvme support"
   echo "--disable-libzbc        Disable libzbc even if found"
   echo "--disable-tcmalloc      Disable tcmalloc support"
   echo "--dynamic-libengines    Lib-based ioengines as dynamic libraries"
@@ -2583,6 +2587,19 @@ if test "$libzbc" != "no" ; then
 fi
 print_config "libzbc engine" "$libzbc"
 
+##########################################
+# Check if we have xnvme
+if test "$xnvme" != "yes" ; then
+  if check_min_lib_version xnvme 0.2.0; then
+    xnvme="yes"
+    xnvme_cflags=$(pkg-config --cflags xnvme)
+    xnvme_libs=$(pkg-config --libs xnvme)
+  else
+    xnvme="no"
+  fi
+fi
+print_config "xnvme engine" "$xnvme"
+
 ##########################################
 # check march=armv8-a+crc+crypto
 if test "$march_armv8_a_crc_crypto" != "yes" ; then
@@ -3190,6 +3207,11 @@ if test "$libnfs" = "yes" ; then
   echo "LIBNFS_CFLAGS=$libnfs_cflags" >> $config_host_mak
   echo "LIBNFS_LIBS=$libnfs_libs" >> $config_host_mak
 fi
+if test "$xnvme" = "yes" ; then
+  output_sym "CONFIG_LIBXNVME"
+  echo "LIBXNVME_CFLAGS=$xnvme_cflags" >> $config_host_mak
+  echo "LIBXNVME_LIBS=$xnvme_libs" >> $config_host_mak
+fi
 if test "$dynamic_engines" = "yes" ; then
   output_sym "CONFIG_DYNAMIC_ENGINES"
 fi
diff --git a/engines/xnvme.c b/engines/xnvme.c
new file mode 100644
index 00000000..c11b33a8
--- /dev/null
+++ b/engines/xnvme.c
@@ -0,0 +1,981 @@
+/*
+ * fio xNVMe IO Engine
+ *
+ * IO engine using the xNVMe C API.
+ *
+ * See: http://xnvme.io/
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#include <stdlib.h>
+#include <assert.h>
+#include <libxnvme.h>
+#include <libxnvme_libconf.h>
+#include <libxnvme_nvm.h>
+#include <libxnvme_znd.h>
+#include <libxnvme_spec_fs.h>
+#include "fio.h"
+#include "zbd_types.h"
+#include "optgroup.h"
+
+static pthread_mutex_t g_serialize = PTHREAD_MUTEX_INITIALIZER;
+
+struct xnvme_fioe_fwrap {
+	/* fio file representation */
+	struct fio_file *fio_file;
+
+	/* xNVMe device handle */
+	struct xnvme_dev *dev;
+	/* xNVMe device geometry */
+	const struct xnvme_geo *geo;
+
+	struct xnvme_queue *queue;
+
+	uint32_t ssw;
+	uint32_t lba_nbytes;
+
+	uint8_t _pad[24];
+};
+XNVME_STATIC_ASSERT(sizeof(struct xnvme_fioe_fwrap) == 64, "Incorrect size")
+
+struct xnvme_fioe_data {
+	/* I/O completion queue */
+	struct io_u **iocq;
+
+	/* # of iocq entries; incremented via getevents()/cb_pool() */
+	uint64_t completed;
+
+	/*
+	 *  # of errors; incremented when observed on completion via
+	 *  getevents()/cb_pool()
+	 */
+	uint64_t ecount;
+
+	/* Controller which device/file to select */
+	int32_t prev;
+	int32_t cur;
+
+	/* Number of devices/files for which open() has been called */
+	int64_t nopen;
+	/* Number of devices/files allocated in files[] */
+	uint64_t nallocated;
+
+	struct iovec *iovec;
+
+	uint8_t _pad[8];
+
+	struct xnvme_fioe_fwrap files[];
+};
+XNVME_STATIC_ASSERT(sizeof(struct xnvme_fioe_data) == 64, "Incorrect size")
+
+struct xnvme_fioe_options {
+	void *padding;
+	unsigned int hipri;
+	unsigned int sqpoll_thread;
+	unsigned int xnvme_dev_nsid;
+	unsigned int xnvme_iovec;
+	char *xnvme_be;
+	char *xnvme_async;
+	char *xnvme_sync;
+	char *xnvme_admin;
+};
+
+static struct fio_option options[] = {
+	{
+		.name = "hipri",
+		.lname = "High Priority",
+		.type = FIO_OPT_STR_SET,
+		.off1 = offsetof(struct xnvme_fioe_options, hipri),
+		.help = "Use polled IO completions",
+		.category = FIO_OPT_C_ENGINE,
+		.group = FIO_OPT_G_XNVME,
+	},
+	{
+		.name = "sqthread_poll",
+		.lname = "Kernel SQ thread polling",
+		.type = FIO_OPT_STR_SET,
+		.off1 = offsetof(struct xnvme_fioe_options, sqpoll_thread),
+		.help = "Offload submission/completion to kernel thread",
+		.category = FIO_OPT_C_ENGINE,
+		.group = FIO_OPT_G_XNVME,
+	},
+	{
+		.name = "xnvme_be",
+		.lname = "xNVMe Backend",
+		.type = FIO_OPT_STR_STORE,
+		.off1 = offsetof(struct xnvme_fioe_options, xnvme_be),
+		.help = "Select xNVMe backend [spdk,linux,fbsd]",
+		.category = FIO_OPT_C_ENGINE,
+		.group = FIO_OPT_G_XNVME,
+	},
+	{
+		.name = "xnvme_async",
+		.lname = "xNVMe Asynchronous command-interface",
+		.type = FIO_OPT_STR_STORE,
+		.off1 = offsetof(struct xnvme_fioe_options, xnvme_async),
+		.help = "Select xNVMe async. interface: [emu,thrpool,io_uring,libaio,posix,nil]",
+		.category = FIO_OPT_C_ENGINE,
+		.group = FIO_OPT_G_XNVME,
+	},
+	{
+		.name = "xnvme_sync",
+		.lname = "xNVMe Synchronous. command-interface",
+		.type = FIO_OPT_STR_STORE,
+		.off1 = offsetof(struct xnvme_fioe_options, xnvme_sync),
+		.help = "Select xNVMe sync. interface: [nvme,psync]",
+		.category = FIO_OPT_C_ENGINE,
+		.group = FIO_OPT_G_XNVME,
+	},
+	{
+		.name = "xnvme_admin",
+		.lname = "xNVMe Admin command-interface",
+		.type = FIO_OPT_STR_STORE,
+		.off1 = offsetof(struct xnvme_fioe_options, xnvme_admin),
+		.help = "Select xNVMe admin. cmd-interface: [nvme,block,file_as_ns]",
+		.category = FIO_OPT_C_ENGINE,
+		.group = FIO_OPT_G_XNVME,
+	},
+	{
+		.name = "xnvme_dev_nsid",
+		.lname = "xNVMe Namespace-Identifier, for user-space NVMe driver",
+		.type = FIO_OPT_INT,
+		.off1 = offsetof(struct xnvme_fioe_options, xnvme_dev_nsid),
+		.help = "xNVMe Namespace-Identifier, for user-space NVMe driver",
+		.category = FIO_OPT_C_ENGINE,
+		.group = FIO_OPT_G_XNVME,
+	},
+	{
+		.name = "xnvme_iovec",
+		.lname = "Vectored IOs",
+		.type = FIO_OPT_STR_SET,
+		.off1 = offsetof(struct xnvme_fioe_options, xnvme_iovec),
+		.help = "Send vectored IOs",
+		.category = FIO_OPT_C_ENGINE,
+		.group = FIO_OPT_G_XNVME,
+	},
+
+	{
+		.name = NULL,
+	},
+};
+
+static void cb_pool(struct xnvme_cmd_ctx *ctx, void *cb_arg)
+{
+	struct io_u *io_u = cb_arg;
+	struct xnvme_fioe_data *xd = io_u->mmap_data;
+
+	if (xnvme_cmd_ctx_cpl_status(ctx)) {
+		xnvme_cmd_ctx_pr(ctx, XNVME_PR_DEF);
+		xd->ecount += 1;
+		io_u->error = EIO;
+	}
+
+	xd->iocq[xd->completed++] = io_u;
+	xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
+}
+
+static struct xnvme_opts xnvme_opts_from_fioe(struct thread_data *td)
+{
+	struct xnvme_fioe_options *o = td->eo;
+	struct xnvme_opts opts = xnvme_opts_default();
+
+	opts.nsid = o->xnvme_dev_nsid;
+	opts.be = o->xnvme_be;
+	opts.async = o->xnvme_async;
+	opts.sync = o->xnvme_sync;
+	opts.admin = o->xnvme_admin;
+
+	opts.poll_io = o->hipri;
+	opts.poll_sq = o->sqpoll_thread;
+
+	opts.direct = td->o.odirect;
+
+	return opts;
+}
+
+static void _dev_close(struct thread_data *td, struct xnvme_fioe_fwrap *fwrap)
+{
+	if (fwrap->dev)
+		xnvme_queue_term(fwrap->queue);
+
+	xnvme_dev_close(fwrap->dev);
+
+	memset(fwrap, 0, sizeof(*fwrap));
+}
+
+static void xnvme_fioe_cleanup(struct thread_data *td)
+{
+	struct xnvme_fioe_data *xd = td->io_ops_data;
+	int err;
+
+	err = pthread_mutex_lock(&g_serialize);
+	if (err)
+		log_err("ioeng->cleanup(): pthread_mutex_lock(), err(%d)\n", err);
+		/* NOTE: not returning here */
+
+	for (uint64_t i = 0; i < xd->nallocated; ++i)
+		_dev_close(td, &xd->files[i]);
+
+	if (!err) {
+		err = pthread_mutex_unlock(&g_serialize);
+		if (err)
+			log_err("ioeng->cleanup(): pthread_mutex_unlock(), err(%d)\n", err);
+	}
+
+	free(xd->iocq);
+	free(xd->iovec);
+	free(xd);
+	td->io_ops_data = NULL;
+}
+
+/**
+ * Helper function setting up device handles as addressed by the naming
+ * convention of the given `fio_file` filename.
+ *
+ * Checks thread-options for explicit control of asynchronous implementation via
+ * the ``--xnvme_async={thrpool,emu,posix,io_uring,libaio,nil}``.
+ */
+static int _dev_open(struct thread_data *td, struct fio_file *f)
+{
+	struct xnvme_opts opts = xnvme_opts_from_fioe(td);
+	struct xnvme_fioe_data *xd = td->io_ops_data;
+	struct xnvme_fioe_fwrap *fwrap;
+	int flags = 0;
+	int err;
+
+	if (f->fileno > (int)xd->nallocated) {
+		log_err("ioeng->_dev_open(%s): invalid assumption\n", f->file_name);
+		return 1;
+	}
+
+	fwrap = &xd->files[f->fileno];
+
+	err = pthread_mutex_lock(&g_serialize);
+	if (err) {
+		log_err("ioeng->_dev_open(%s): pthread_mutex_lock(), err(%d)\n", f->file_name,
+			err);
+		return -err;
+	}
+
+	fwrap->dev = xnvme_dev_open(f->file_name, &opts);
+	if (!fwrap->dev) {
+		log_err("ioeng->_dev_open(%s): xnvme_dev_open(), err(%d)\n", f->file_name, errno);
+		goto failure;
+	}
+	fwrap->geo = xnvme_dev_get_geo(fwrap->dev);
+
+	if (xnvme_queue_init(fwrap->dev, td->o.iodepth, flags, &(fwrap->queue))) {
+		log_err("ioeng->_dev_open(%s): xnvme_queue_init(), err(?)\n", f->file_name);
+		goto failure;
+	}
+	xnvme_queue_set_cb(fwrap->queue, cb_pool, NULL);
+
+	fwrap->ssw = xnvme_dev_get_ssw(fwrap->dev);
+	fwrap->lba_nbytes = fwrap->geo->lba_nbytes;
+
+	fwrap->fio_file = f;
+	fwrap->fio_file->filetype = FIO_TYPE_BLOCK;
+	fwrap->fio_file->real_file_size = fwrap->geo->tbytes;
+	fio_file_set_size_known(fwrap->fio_file);
+
+	err = pthread_mutex_unlock(&g_serialize);
+	if (err)
+		log_err("ioeng->_dev_open(%s): pthread_mutex_unlock(), err(%d)\n", f->file_name,
+			err);
+
+	return 0;
+
+failure:
+	xnvme_queue_term(fwrap->queue);
+	xnvme_dev_close(fwrap->dev);
+
+	err = pthread_mutex_unlock(&g_serialize);
+	if (err)
+		log_err("ioeng->_dev_open(%s): pthread_mutex_unlock(), err(%d)\n", f->file_name,
+			err);
+
+	return 1;
+}
+
+static int xnvme_fioe_init(struct thread_data *td)
+{
+	struct xnvme_fioe_data *xd = NULL;
+	struct fio_file *f;
+	unsigned int i;
+
+	if (!td->o.use_thread) {
+		log_err("ioeng->init(): --thread=1 is required\n");
+		return 1;
+	}
+
+	/* Allocate xd and iocq */
+	xd = calloc(1, sizeof(*xd) + sizeof(*xd->files) * td->o.nr_files);
+	if (!xd) {
+		log_err("ioeng->init(): !calloc(), err(%d)\n", errno);
+		return 1;
+	}
+
+	xd->iocq = calloc(td->o.iodepth, sizeof(struct io_u *));
+	if (!xd->iocq) {
+		log_err("ioeng->init(): !calloc(), err(%d)\n", errno);
+		return 1;
+	}
+
+	xd->iovec = calloc(td->o.iodepth, sizeof(*xd->iovec));
+	if (!xd->iovec) {
+		log_err("ioeng->init(): !calloc(xd->iovec), err(%d)\n", errno);
+		return 1;
+	}
+
+	xd->prev = -1;
+	td->io_ops_data = xd;
+
+	for_each_file(td, f, i)
+	{
+		if (_dev_open(td, f)) {
+			log_err("ioeng->init(): failed; _dev_open(%s)\n", f->file_name);
+			return 1;
+		}
+
+		++(xd->nallocated);
+	}
+
+	if (xd->nallocated != td->o.nr_files) {
+		log_err("ioeng->init(): failed; nallocated != td->o.nr_files\n");
+		return 1;
+	}
+
+	return 0;
+}
+
+/* NOTE: using the first device for buffer-allocators) */
+static int xnvme_fioe_iomem_alloc(struct thread_data *td, size_t total_mem)
+{
+	struct xnvme_fioe_data *xd = td->io_ops_data;
+	struct xnvme_fioe_fwrap *fwrap = &xd->files[0];
+
+	if (!fwrap->dev) {
+		log_err("ioeng->iomem_alloc(): failed; no dev-handle\n");
+		return 1;
+	}
+
+	td->orig_buffer = xnvme_buf_alloc(fwrap->dev, total_mem);
+
+	return td->orig_buffer == NULL;
+}
+
+/* NOTE: using the first device for buffer-allocators) */
+static void xnvme_fioe_iomem_free(struct thread_data *td)
+{
+	struct xnvme_fioe_data *xd = td->io_ops_data;
+	struct xnvme_fioe_fwrap *fwrap = &xd->files[0];
+
+	if (!fwrap->dev) {
+		log_err("ioeng->iomem_free(): failed no dev-handle\n");
+		return;
+	}
+
+	xnvme_buf_free(fwrap->dev, td->orig_buffer);
+}
+
+static int xnvme_fioe_io_u_init(struct thread_data *td, struct io_u *io_u)
+{
+	io_u->mmap_data = td->io_ops_data;
+
+	return 0;
+}
+
+static void xnvme_fioe_io_u_free(struct thread_data *td, struct io_u *io_u)
+{
+	io_u->mmap_data = NULL;
+}
+
+static struct io_u *xnvme_fioe_event(struct thread_data *td, int event)
+{
+	struct xnvme_fioe_data *xd = td->io_ops_data;
+
+	assert(event >= 0);
+	assert((unsigned)event < xd->completed);
+
+	return xd->iocq[event];
+}
+
+static int xnvme_fioe_getevents(struct thread_data *td, unsigned int min, unsigned int max,
+				const struct timespec *t)
+{
+	struct xnvme_fioe_data *xd = td->io_ops_data;
+	struct xnvme_fioe_fwrap *fwrap = NULL;
+	int nfiles = xd->nallocated;
+	int err = 0;
+
+	if (xd->prev != -1 && ++xd->prev < nfiles) {
+		fwrap = &xd->files[xd->prev];
+		xd->cur = xd->prev;
+	}
+
+	xd->completed = 0;
+	for (;;) {
+		if (fwrap == NULL || xd->cur == nfiles) {
+			fwrap = &xd->files[0];
+			xd->cur = 0;
+		}
+
+		while (fwrap != NULL && xd->cur < nfiles && err >= 0) {
+			err = xnvme_queue_poke(fwrap->queue, max - xd->completed);
+			if (err < 0) {
+				switch (err) {
+				case -EBUSY:
+				case -EAGAIN:
+					usleep(1);
+					break;
+
+				default:
+					log_err("ioeng->getevents(): unhandled IO error\n");
+					assert(false);
+					return 0;
+				}
+			}
+			if (xd->completed >= min) {
+				xd->prev = xd->cur;
+				return xd->completed;
+			}
+			xd->cur++;
+			fwrap = &xd->files[xd->cur];
+
+			if (err < 0) {
+				switch (err) {
+				case -EBUSY:
+				case -EAGAIN:
+					usleep(1);
+					break;
+				}
+			}
+		}
+	}
+
+	xd->cur = 0;
+
+	return xd->completed;
+}
+
+static enum fio_q_status xnvme_fioe_queue(struct thread_data *td, struct io_u *io_u)
+{
+	struct xnvme_fioe_data *xd = td->io_ops_data;
+	struct xnvme_fioe_fwrap *fwrap;
+	struct xnvme_cmd_ctx *ctx;
+	uint32_t nsid;
+	uint64_t slba;
+	uint16_t nlb;
+	int err;
+	bool vectored_io = ((struct xnvme_fioe_options *)td->eo)->xnvme_iovec;
+
+	fio_ro_check(td, io_u);
+
+	fwrap = &xd->files[io_u->file->fileno];
+	nsid = xnvme_dev_get_nsid(fwrap->dev);
+
+	slba = io_u->offset >> fwrap->ssw;
+	nlb = (io_u->xfer_buflen >> fwrap->ssw) - 1;
+
+	ctx = xnvme_queue_get_cmd_ctx(fwrap->queue);
+	ctx->async.cb_arg = io_u;
+
+	ctx->cmd.common.nsid = nsid;
+	ctx->cmd.nvm.slba = slba;
+	ctx->cmd.nvm.nlb = nlb;
+
+	switch (io_u->ddir) {
+	case DDIR_READ:
+		ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_READ;
+		break;
+
+	case DDIR_WRITE:
+		ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_WRITE;
+		break;
+
+	default:
+		log_err("ioeng->queue(): ENOSYS: %u\n", io_u->ddir);
+		err = -1;
+		assert(false);
+		break;
+	}
+
+	if (vectored_io) {
+		xd->iovec[io_u->index].iov_base = io_u->xfer_buf;
+		xd->iovec[io_u->index].iov_len = io_u->xfer_buflen;
+
+		err = xnvme_cmd_passv(ctx, &xd->iovec[io_u->index], 1, io_u->xfer_buflen, NULL, 0,
+				      0);
+	} else {
+		err = xnvme_cmd_pass(ctx, io_u->xfer_buf, io_u->xfer_buflen, NULL, 0);
+	}
+	switch (err) {
+	case 0:
+		return FIO_Q_QUEUED;
+
+	case -EBUSY:
+	case -EAGAIN:
+		xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
+		return FIO_Q_BUSY;
+
+	default:
+		log_err("ioeng->queue(): err: '%d'\n", err);
+
+		xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
+
+		io_u->error = abs(err);
+		assert(false);
+		return FIO_Q_COMPLETED;
+	}
+}
+
+static int xnvme_fioe_close(struct thread_data *td, struct fio_file *f)
+{
+	struct xnvme_fioe_data *xd = td->io_ops_data;
+
+	dprint(FD_FILE, "xnvme close %s -- nopen: %ld\n", f->file_name, xd->nopen);
+
+	--(xd->nopen);
+
+	return 0;
+}
+
+static int xnvme_fioe_open(struct thread_data *td, struct fio_file *f)
+{
+	struct xnvme_fioe_data *xd = td->io_ops_data;
+
+	dprint(FD_FILE, "xnvme open %s -- nopen: %ld\n", f->file_name, xd->nopen);
+
+	if (f->fileno > (int)xd->nallocated) {
+		log_err("ioeng->open(): f->fileno > xd->nallocated; invalid assumption\n");
+		return 1;
+	}
+	if (xd->files[f->fileno].fio_file != f) {
+		log_err("ioeng->open(): fio_file != f; invalid assumption\n");
+		return 1;
+	}
+
+	++(xd->nopen);
+
+	return 0;
+}
+
+static int xnvme_fioe_invalidate(struct thread_data *td, struct fio_file *f)
+{
+	/* Consider only doing this with be:spdk */
+	return 0;
+}
+
+static int xnvme_fioe_get_max_open_zones(struct thread_data *td, struct fio_file *f,
+					 unsigned int *max_open_zones)
+{
+	struct xnvme_opts opts = xnvme_opts_from_fioe(td);
+	struct xnvme_dev *dev;
+	const struct xnvme_spec_znd_idfy_ns *zns;
+	int err = 0, err_lock;
+
+	if (f->filetype != FIO_TYPE_FILE && f->filetype != FIO_TYPE_BLOCK &&
+	    f->filetype != FIO_TYPE_CHAR) {
+		log_info("ioeng->get_max_open_zoned(): ignoring filetype: %d\n", f->filetype);
+		return 0;
+	}
+	err_lock = pthread_mutex_lock(&g_serialize);
+	if (err_lock) {
+		log_err("ioeng->get_max_open_zones(): pthread_mutex_lock(), err(%d)\n", err_lock);
+		return -err_lock;
+	}
+
+	dev = xnvme_dev_open(f->file_name, &opts);
+	if (!dev) {
+		log_err("ioeng->get_max_open_zones(): xnvme_dev_open(), err(%d)\n", err_lock);
+		err = -errno;
+		goto exit;
+	}
+	if (xnvme_dev_get_geo(dev)->type != XNVME_GEO_ZONED) {
+		errno = EINVAL;
+		err = -errno;
+		goto exit;
+	}
+
+	zns = (void *)xnvme_dev_get_ns_css(dev);
+	if (!zns) {
+		log_err("ioeng->get_max_open_zones(): xnvme_dev_get_ns_css(), err(%d)\n", errno);
+		err = -errno;
+		goto exit;
+	}
+
+	/*
+	 * intentional overflow as the value is zero-based and NVMe
+	 * defines 0xFFFFFFFF as unlimited thus overflowing to 0 which
+	 * is how fio indicates unlimited and otherwise just converting
+	 * to one-based.
+	 */
+	*max_open_zones = zns->mor + 1;
+
+exit:
+	xnvme_dev_close(dev);
+	err_lock = pthread_mutex_unlock(&g_serialize);
+	if (err_lock)
+		log_err("ioeng->get_max_open_zones(): pthread_mutex_unlock(), err(%d)\n",
+			err_lock);
+
+	return err;
+}
+
+/**
+ * Currently, this function is called before of I/O engine initialization, so,
+ * we cannot consult the file-wrapping done when 'fioe' initializes.
+ * Instead we just open based on the given filename.
+ *
+ * TODO: unify the different setup methods, consider keeping the handle around,
+ * and consider how to support the --be option in this usecase
+ */
+static int xnvme_fioe_get_zoned_model(struct thread_data *td, struct fio_file *f,
+				      enum zbd_zoned_model *model)
+{
+	struct xnvme_opts opts = xnvme_opts_from_fioe(td);
+	struct xnvme_dev *dev;
+	int err = 0, err_lock;
+
+	if (f->filetype != FIO_TYPE_FILE && f->filetype != FIO_TYPE_BLOCK &&
+	    f->filetype != FIO_TYPE_CHAR) {
+		log_info("ioeng->get_zoned_model(): ignoring filetype: %d\n", f->filetype);
+		return -EINVAL;
+	}
+
+	err = pthread_mutex_lock(&g_serialize);
+	if (err) {
+		log_err("ioeng->get_zoned_model(): pthread_mutex_lock(), err(%d)\n", err);
+		return -err;
+	}
+
+	dev = xnvme_dev_open(f->file_name, &opts);
+	if (!dev) {
+		log_err("ioeng->get_zoned_model(): xnvme_dev_open(%s) failed, errno: %d\n",
+			f->file_name, errno);
+		err = -errno;
+		goto exit;
+	}
+
+	switch (xnvme_dev_get_geo(dev)->type) {
+	case XNVME_GEO_UNKNOWN:
+		dprint(FD_ZBD, "%s: got 'unknown', assigning ZBD_NONE\n", f->file_name);
+		*model = ZBD_NONE;
+		break;
+
+	case XNVME_GEO_CONVENTIONAL:
+		dprint(FD_ZBD, "%s: got 'conventional', assigning ZBD_NONE\n", f->file_name);
+		*model = ZBD_NONE;
+		break;
+
+	case XNVME_GEO_ZONED:
+		dprint(FD_ZBD, "%s: got 'zoned', assigning ZBD_HOST_MANAGED\n", f->file_name);
+		*model = ZBD_HOST_MANAGED;
+		break;
+
+	default:
+		dprint(FD_ZBD, "%s: hit-default, assigning ZBD_NONE\n", f->file_name);
+		*model = ZBD_NONE;
+		errno = EINVAL;
+		err = -errno;
+		break;
+	}
+
+exit:
+	xnvme_dev_close(dev);
+
+	err_lock = pthread_mutex_unlock(&g_serialize);
+	if (err_lock)
+		log_err("ioeng->get_zoned_model(): pthread_mutex_unlock(), err(%d)\n", err_lock);
+
+	return err;
+}
+
+/**
+ * Fills the given ``zbdz`` with at most ``nr_zones`` zone-descriptors.
+ *
+ * The implementation converts the NVMe Zoned Command Set log-pages for Zone
+ * descriptors into the Linux Kernel Zoned Block Report format.
+ *
+ * NOTE: This function is called before I/O engine initialization, that is,
+ * before ``_dev_open`` has been called and file-wrapping is setup. Thus is has
+ * to do the ``_dev_open`` itself, and shut it down again once it is done
+ * retrieving the log-pages and converting them to the report format.
+ *
+ * TODO: unify the different setup methods, consider keeping the handle around,
+ * and consider how to support the --async option in this usecase
+ */
+static int xnvme_fioe_report_zones(struct thread_data *td, struct fio_file *f, uint64_t offset,
+				   struct zbd_zone *zbdz, unsigned int nr_zones)
+{
+	struct xnvme_opts opts = xnvme_opts_from_fioe(td);
+	const struct xnvme_spec_znd_idfy_lbafe *lbafe = NULL;
+	struct xnvme_dev *dev = NULL;
+	const struct xnvme_geo *geo = NULL;
+	struct xnvme_znd_report *rprt = NULL;
+	uint32_t ssw;
+	uint64_t slba;
+	unsigned int limit = 0;
+	int err = 0, err_lock;
+
+	dprint(FD_ZBD, "%s: report_zones() offset: %zu, nr_zones: %u\n", f->file_name, offset,
+	       nr_zones);
+
+	err = pthread_mutex_lock(&g_serialize);
+	if (err) {
+		log_err("ioeng->report_zones(%s): pthread_mutex_lock(), err(%d)\n", f->file_name,
+			err);
+		return -err;
+	}
+
+	dev = xnvme_dev_open(f->file_name, &opts);
+	if (!dev) {
+		log_err("ioeng->report_zones(%s): xnvme_dev_open(), err(%d)\n", f->file_name,
+			errno);
+		goto exit;
+	}
+
+	geo = xnvme_dev_get_geo(dev);
+	ssw = xnvme_dev_get_ssw(dev);
+	lbafe = xnvme_znd_dev_get_lbafe(dev);
+
+	limit = nr_zones > geo->nzone ? geo->nzone : nr_zones;
+
+	dprint(FD_ZBD, "%s: limit: %u\n", f->file_name, limit);
+
+	slba = ((offset >> ssw) / geo->nsect) * geo->nsect;
+
+	rprt = xnvme_znd_report_from_dev(dev, slba, limit, 0);
+	if (!rprt) {
+		log_err("ioeng->report_zones(%s): xnvme_znd_report_from_dev(), err(%d)\n",
+			f->file_name, errno);
+		err = -errno;
+		goto exit;
+	}
+	if (rprt->nentries != limit) {
+		log_err("ioeng->report_zones(%s): nentries != nr_zones\n", f->file_name);
+		err = 1;
+		goto exit;
+	}
+	if (offset > geo->tbytes) {
+		log_err("ioeng->report_zones(%s): out-of-bounds\n", f->file_name);
+		goto exit;
+	}
+
+	/* Transform the zone-report */
+	for (uint32_t idx = 0; idx < rprt->nentries; ++idx) {
+		struct xnvme_spec_znd_descr *descr = XNVME_ZND_REPORT_DESCR(rprt, idx);
+
+		zbdz[idx].start = descr->zslba << ssw;
+		zbdz[idx].len = lbafe->zsze << ssw;
+		zbdz[idx].capacity = descr->zcap << ssw;
+		zbdz[idx].wp = descr->wp << ssw;
+
+		switch (descr->zt) {
+		case XNVME_SPEC_ZND_TYPE_SEQWR:
+			zbdz[idx].type = ZBD_ZONE_TYPE_SWR;
+			break;
+
+		default:
+			log_err("ioeng->report_zones(%s): invalid type for zone at offset(%zu)\n",
+				f->file_name, zbdz[idx].start);
+			err = -EIO;
+			goto exit;
+		}
+
+		switch (descr->zs) {
+		case XNVME_SPEC_ZND_STATE_EMPTY:
+			zbdz[idx].cond = ZBD_ZONE_COND_EMPTY;
+			break;
+		case XNVME_SPEC_ZND_STATE_IOPEN:
+			zbdz[idx].cond = ZBD_ZONE_COND_IMP_OPEN;
+			break;
+		case XNVME_SPEC_ZND_STATE_EOPEN:
+			zbdz[idx].cond = ZBD_ZONE_COND_EXP_OPEN;
+			break;
+		case XNVME_SPEC_ZND_STATE_CLOSED:
+			zbdz[idx].cond = ZBD_ZONE_COND_CLOSED;
+			break;
+		case XNVME_SPEC_ZND_STATE_FULL:
+			zbdz[idx].cond = ZBD_ZONE_COND_FULL;
+			break;
+
+		case XNVME_SPEC_ZND_STATE_RONLY:
+		case XNVME_SPEC_ZND_STATE_OFFLINE:
+		default:
+			zbdz[idx].cond = ZBD_ZONE_COND_OFFLINE;
+			break;
+		}
+	}
+
+exit:
+	xnvme_buf_virt_free(rprt);
+
+	xnvme_dev_close(dev);
+
+	err_lock = pthread_mutex_unlock(&g_serialize);
+	if (err_lock)
+		log_err("ioeng->report_zones(): pthread_mutex_unlock(), err: %d\n", err_lock);
+
+	dprint(FD_ZBD, "err: %d, nr_zones: %d\n", err, (int)nr_zones);
+
+	return err ? err : (int)limit;
+}
+
+/**
+ * NOTE: This function may get called before I/O engine initialization, that is,
+ * before ``_dev_open`` has been called and file-wrapping is setup. In such
+ * case it has to do ``_dev_open`` itself, and shut it down again once it is
+ * done resetting write pointer of zones.
+ */
+static int xnvme_fioe_reset_wp(struct thread_data *td, struct fio_file *f, uint64_t offset,
+			       uint64_t length)
+{
+	struct xnvme_opts opts = xnvme_opts_from_fioe(td);
+	struct xnvme_fioe_data *xd = NULL;
+	struct xnvme_fioe_fwrap *fwrap = NULL;
+	struct xnvme_dev *dev = NULL;
+	const struct xnvme_geo *geo = NULL;
+	uint64_t first, last;
+	uint32_t ssw;
+	uint32_t nsid;
+	int err = 0, err_lock;
+
+	if (td->io_ops_data) {
+		xd = td->io_ops_data;
+		fwrap = &xd->files[f->fileno];
+
+		assert(fwrap->dev);
+		assert(fwrap->geo);
+
+		dev = fwrap->dev;
+		geo = fwrap->geo;
+		ssw = fwrap->ssw;
+	} else {
+		err = pthread_mutex_lock(&g_serialize);
+		if (err) {
+			log_err("ioeng->reset_wp(): pthread_mutex_lock(), err(%d)\n", err);
+			return -err;
+		}
+
+		dev = xnvme_dev_open(f->file_name, &opts);
+		if (!dev) {
+			log_err("ioeng->reset_wp(): xnvme_dev_open(%s) failed, errno(%d)\n",
+				f->file_name, errno);
+			goto exit;
+		}
+		geo = xnvme_dev_get_geo(dev);
+		ssw = xnvme_dev_get_ssw(dev);
+	}
+
+	nsid = xnvme_dev_get_nsid(dev);
+
+	first = ((offset >> ssw) / geo->nsect) * geo->nsect;
+	last = (((offset + length) >> ssw) / geo->nsect) * geo->nsect;
+	dprint(FD_ZBD, "first: 0x%lx, last: 0x%lx\n", first, last);
+
+	for (uint64_t zslba = first; zslba < last; zslba += geo->nsect) {
+		struct xnvme_cmd_ctx ctx = xnvme_cmd_ctx_from_dev(dev);
+
+		if (zslba >= (geo->nsect * geo->nzone)) {
+			log_err("ioeng->reset_wp(): out-of-bounds\n");
+			err = 0;
+			break;
+		}
+
+		err = xnvme_znd_mgmt_send(&ctx, nsid, zslba, false,
+					  XNVME_SPEC_ZND_CMD_MGMT_SEND_RESET, 0x0, NULL);
+		if (err || xnvme_cmd_ctx_cpl_status(&ctx)) {
+			err = err ? err : -EIO;
+			log_err("ioeng->reset_wp(): err(%d), sc(%d)", err, ctx.cpl.status.sc);
+			goto exit;
+		}
+	}
+
+exit:
+	if (!td->io_ops_data) {
+		xnvme_dev_close(dev);
+
+		err_lock = pthread_mutex_unlock(&g_serialize);
+		if (err_lock)
+			log_err("ioeng->reset_wp(): pthread_mutex_unlock(), err(%d)\n", err_lock);
+	}
+
+	return err;
+}
+
+static int xnvme_fioe_get_file_size(struct thread_data *td, struct fio_file *f)
+{
+	struct xnvme_opts opts = xnvme_opts_from_fioe(td);
+	struct xnvme_dev *dev;
+	int ret = 0, err;
+
+	if (fio_file_size_known(f))
+		return 0;
+
+	ret = pthread_mutex_lock(&g_serialize);
+	if (ret) {
+		log_err("ioeng->reset_wp(): pthread_mutex_lock(), err(%d)\n", ret);
+		return -ret;
+	}
+
+	dev = xnvme_dev_open(f->file_name, &opts);
+	if (!dev) {
+		log_err("%s: failed retrieving device handle, errno: %d\n", f->file_name, errno);
+		ret = -errno;
+		goto exit;
+	}
+
+	f->real_file_size = xnvme_dev_get_geo(dev)->tbytes;
+	fio_file_set_size_known(f);
+	f->filetype = FIO_TYPE_BLOCK;
+
+exit:
+	xnvme_dev_close(dev);
+	err = pthread_mutex_unlock(&g_serialize);
+	if (err)
+		log_err("ioeng->reset_wp(): pthread_mutex_unlock(), err(%d)\n", err);
+
+	return ret;
+}
+
+FIO_STATIC struct ioengine_ops ioengine = {
+	.name = "xnvme",
+	.version = FIO_IOOPS_VERSION,
+	.options = options,
+	.option_struct_size = sizeof(struct xnvme_fioe_options),
+	.flags = FIO_DISKLESSIO | FIO_NODISKUTIL | FIO_NOEXTEND | FIO_MEMALIGN | FIO_RAWIO,
+
+	.cleanup = xnvme_fioe_cleanup,
+	.init = xnvme_fioe_init,
+
+	.iomem_free = xnvme_fioe_iomem_free,
+	.iomem_alloc = xnvme_fioe_iomem_alloc,
+
+	.io_u_free = xnvme_fioe_io_u_free,
+	.io_u_init = xnvme_fioe_io_u_init,
+
+	.event = xnvme_fioe_event,
+	.getevents = xnvme_fioe_getevents,
+	.queue = xnvme_fioe_queue,
+
+	.close_file = xnvme_fioe_close,
+	.open_file = xnvme_fioe_open,
+	.get_file_size = xnvme_fioe_get_file_size,
+
+	.invalidate = xnvme_fioe_invalidate,
+	.get_max_open_zones = xnvme_fioe_get_max_open_zones,
+	.get_zoned_model = xnvme_fioe_get_zoned_model,
+	.report_zones = xnvme_fioe_report_zones,
+	.reset_wp = xnvme_fioe_reset_wp,
+};
+
+static void fio_init fio_xnvme_register(void)
+{
+	register_ioengine(&ioengine);
+}
+
+static void fio_exit fio_xnvme_unregister(void)
+{
+	unregister_ioengine(&ioengine);
+}
diff --git a/examples/xnvme-compare.fio b/examples/xnvme-compare.fio
new file mode 100644
index 00000000..b89dfdf4
--- /dev/null
+++ b/examples/xnvme-compare.fio
@@ -0,0 +1,72 @@
+; Compare fio IO engines with a random-read workload using BS=4k at QD=1
+;
+; README
+;
+; This job-file is intended to be used as:
+;
+; # Use the built-in io_uring engine to get baseline numbers
+; fio examples/xnvme-compare.fio \
+;   --section=default \
+;   --ioengine=io_uring \
+;   --sqthread_poll=1 \
+;   --filename=/dev/nvme0n1
+;
+; # Use the xNVMe io-engine engine with Linux backend and io_uring async. impl.
+; fio examples/xnvme-compare.fio \
+;   --section=default \
+;   --ioengine=xnvme \
+;   --sqthread_poll=1 \
+;   --xnvme_async=io_uring \
+;   --filename=/dev/nvme0n1
+;
+; # Use the xNVMe io-engine engine with Linux backend and libaio async. impl.
+; fio examples/xnvme-compare.fio \
+;   --section=default \
+;   --ioengine=xnvme \
+;   --xnvme_async=libaio \
+;   --filename=/dev/nvme0n1
+;
+; # Use the xNVMe io-engine engine with SPDK backend, note that you have to set the Namespace-id
+; fio examples/xnvme-compare.fio \
+;   --section=default \
+;   --ioengine=xnvme \
+;   --xnvme_dev_nsid=1 \
+;   --filename=0000\\:01\\:00.0
+;
+; NOTE: The URI encoded in the filename above, the ":" must be escaped.
+;
+; On the command-line using two "\\":
+;
+; --filename=0000\\:01\\:00.0
+;
+; Within a fio-script using a single "\":
+;
+; filename=0000\:01\:00.0
+;
+; NOTE: If you want to override the default bs, iodepth, and workload, then
+; invoke it as:
+;
+; FIO_BS="512" FIO_RW="verify" FIO_IODEPTH=16 fio examples/xnvme-compare.fio \
+;   --section=override
+;
+[global]
+rw=randread
+size=12G
+iodepth=1
+bs=4K
+direct=1
+thread=1
+time_based=1
+runtime=7
+ramp_time=3
+norandommap=1
+
+; Avoid accidentally creating device files; e.g. "/dev/nvme0n1", "/dev/nullb0"
+allow_file_create=0
+
+[default]
+
+[override]
+rw=${FIO_RW}
+iodepth=${FIO_IODEPTH}
+bs=${FIO_BS}
diff --git a/examples/xnvme-zoned.fio b/examples/xnvme-zoned.fio
new file mode 100644
index 00000000..1344f9a1
--- /dev/null
+++ b/examples/xnvme-zoned.fio
@@ -0,0 +1,87 @@
+; Running xNVMe/fio on a Zoned Device
+;
+; Writes 1GB at QD1 using 4K BS and verifies it.
+;
+; README
+;
+; This job-file is intended to be used as:
+;
+; # Use the built-in io_uring engine to get baseline numbers
+; fio examples/xnvme-zoned.fio \
+;   --section=default \
+;   --ioengine=io_uring \
+;   --sqthread_poll=1 \
+;   --filename=/dev/nvme0n1
+;
+; # Use the xNVMe io-engine engine with Linux backend and io_uring async. impl.
+; fio examples/xnvme-zoned.fio \
+;   --section=default \
+;   --ioengine=xnvme \
+;   --sqthread_poll=1 \
+;   --xnvme_async=io_uring \
+;   --filename=/dev/nvme0n1
+;
+; # Use the xNVMe io-engine engine with Linux backend and libaio async. impl.
+; fio examples/xnvme-zoned.fio \
+;   --section=default \
+;   --ioengine=xnvme \
+;   --xnvme_async=libaio \
+;   --filename=/dev/nvme0n1
+;
+; # Use the xNVMe io-engine engine with SPDK backend, note that you have to set the Namespace-id
+; fio examples/xnvme-zoned.fio \
+;   --section=default \
+;   --ioengine=xnvme \
+;   --xnvme_dev_nsid=1 \
+;   --filename=0000\\:01\\:00.0
+;
+; NOTE: The URI encoded in the filename above, the ":" must be escaped.
+;
+; On the command-line using two "\\":
+;
+; --filename=0000\\:01\\:00.0
+;
+; Within a fio-script using a single "\":
+;
+; filename=0000\:01\:00.0
+;
+; NOTE: If you want to override the default bs, iodepth, and workload, then
+; invoke it as:
+;
+; FIO_BS="512" FIO_RW="verify" FIO_IODEPTH=16 fio examples/xnvme-zoned.fio \
+;   --section=override
+;
+; To reset all zones on the device to EMPTY state aka. wipe the entire device.
+;
+; # zoned mgmt-reset /dev/nvme0n2 --slba 0x0 --all
+;
+[global]
+zonemode=zbd
+rw=write
+size=1G
+iodepth=1
+bs=4K
+direct=1
+thread=1
+ramp_time=1
+norandommap=1
+verify=crc32c
+; Avoid accidentally creating device files; e.g. "/dev/nvme0n1", "/dev/nullb0"
+allow_file_create=0
+;
+; NOTE: If fio complains about zone-size, then run:
+;
+; # zoned info /dev/nvme0n1
+;
+; The command will provide the values you need, then in the fio-script define:
+;
+; zonesize=nsect * nbytes
+;
+;zonesize=
+
+[default]
+
+[override]
+rw=${FIO_RW}
+iodepth=${FIO_IODEPTH}
+bs=${FIO_BS}
diff --git a/fio.1 b/fio.1
index 609947dc..ded7bbfc 100644
--- a/fio.1
+++ b/fio.1
@@ -1965,6 +1965,12 @@ via kernel NFS.
 .TP
 .B exec
 Execute 3rd party tools. Could be used to perform monitoring during jobs runtime.
+.TP
+.B xnvme
+I/O engine using the xNVMe C API, for NVMe devices. The xnvme engine provides
+flexibility to access GNU/Linux Kernel NVMe driver via libaio, IOCTLs, io_uring,
+the SPDK NVMe driver, or your own custom NVMe driver. The xnvme engine includes
+engine specific options. (See \fIhttps://xnvme.io/\fR).
 .SS "I/O engine specific parameters"
 In addition, there are some parameters which are only valid when a specific
 \fBioengine\fR is in use. These are used identically to normal parameters,
@@ -2039,7 +2045,7 @@ release them when IO is done. If this option is set, the pages are pre-mapped
 before IO is started. This eliminates the need to map and release for each IO.
 This is more efficient, and reduces the IO latency as well.
 .TP
-.BI (io_uring)hipri
+.BI (io_uring,xnvme)hipri
 If this option is set, fio will attempt to use polled IO completions. Normal IO
 completions generate interrupts to signal the completion of IO, polled
 completions do not. Hence they are require active reaping by the application.
@@ -2052,7 +2058,7 @@ This avoids the overhead of managing file counts in the kernel, making the
 submission and completion part more lightweight. Required for the below
 sqthread_poll option.
 .TP
-.BI (io_uring)sqthread_poll
+.BI (io_uring,xnvme)sqthread_poll
 Normally fio will submit IO by issuing a system call to notify the kernel of
 available items in the SQ ring. If this option is set, the act of submitting IO
 will be done by a polling thread in the kernel. This frees up cycles for fio, at
@@ -2480,6 +2486,66 @@ Defines the time between the SIGTERM and SIGKILL signals. Default is 1 second.
 .TP
 .BI (exec)std_redirect\fR=\fbool
 If set, stdout and stderr streams are redirected to files named from the job name. Default is true.
+.TP
+.BI (xnvme)xnvme_async\fR=\fPstr
+Select the xnvme async command interface. This can take these values.
+.RS
+.RS
+.TP
+.B emu
+This is default and used to emulate asynchronous I/O
+.TP
+.BI thrpool
+Use thread pool for Asynchronous I/O
+.TP
+.BI io_uring
+Use Linux io_uring/liburing for Asynchronous I/O
+.TP
+.BI libaio
+Use Linux aio for Asynchronous I/O
+.TP
+.BI posix
+Use POSIX aio for Asynchronous I/O
+.TP
+.BI nil
+Use nil-io; For introspective perf. evaluation
+.RE
+.RE
+.TP
+.BI (xnvme)xnvme_sync\fR=\fPstr
+Select the xnvme synchronous command interface. This can take these values.
+.RS
+.RS
+.TP
+.B nvme
+This is default and uses Linux NVMe Driver ioctl() for synchronous I/O
+.TP
+.BI psync
+Use pread()/write() for synchronous I/O
+.RE
+.RE
+.TP
+.BI (xnvme)xnvme_admin\fR=\fPstr
+Select the xnvme admin command interface. This can take these values.
+.RS
+.RS
+.TP
+.B nvme
+This is default and uses Linux NVMe Driver ioctl() for admin commands
+.TP
+.BI block
+Use Linux Block Layer ioctl() and sysfs for admin commands
+.TP
+.BI file_as_ns
+Use file-stat as to construct NVMe idfy responses
+.RE
+.RE
+.TP
+.BI (xnvme)xnvme_dev_nsid\fR=\fPint
+xnvme namespace identifier, for userspace NVMe driver.
+.TP
+.BI (xnvme)xnvme_iovec
+If this option is set, xnvme will use vectored read/write commands.
 .SS "I/O depth"
 .TP
 .BI iodepth \fR=\fPint
diff --git a/optgroup.h b/optgroup.h
index 3ac8f62a..dc73c8f3 100644
--- a/optgroup.h
+++ b/optgroup.h
@@ -72,6 +72,7 @@ enum opt_category_group {
 	__FIO_OPT_G_DFS,
 	__FIO_OPT_G_NFS,
 	__FIO_OPT_G_WINDOWSAIO,
+	__FIO_OPT_G_XNVME,
 
 	FIO_OPT_G_RATE		= (1ULL << __FIO_OPT_G_RATE),
 	FIO_OPT_G_ZONE		= (1ULL << __FIO_OPT_G_ZONE),
@@ -118,6 +119,7 @@ enum opt_category_group {
 	FIO_OPT_G_LIBCUFILE	= (1ULL << __FIO_OPT_G_LIBCUFILE),
 	FIO_OPT_G_DFS		= (1ULL << __FIO_OPT_G_DFS),
 	FIO_OPT_G_WINDOWSAIO	= (1ULL << __FIO_OPT_G_WINDOWSAIO),
+	FIO_OPT_G_XNVME         = (1ULL << __FIO_OPT_G_XNVME),
 };
 
 extern const struct opt_group *opt_group_from_mask(uint64_t *mask);
diff --git a/options.c b/options.c
index 3b83573b..2b183c60 100644
--- a/options.c
+++ b/options.c
@@ -2144,6 +2144,11 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
 			  { .ival = "nfs",
 			    .help = "NFS IO engine",
 			  },
+#endif
+#ifdef CONFIG_LIBXNVME
+			  { .ival = "xnvme",
+			    .help = "XNVME IO engine",
+			  },
 #endif
 		},
 	},

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-05-02 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-05-02 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 6e594a2fa8388892dffb2ffc9b865689e2d67833:

  Merge branch 'global_dedup' of https://github.com/bardavid/fio (2022-04-29 16:30:50 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 12db6deb8b767ac89dd73e34dbc6f06905441e07:

  Merge branch 'patch-1' of https://github.com/ferdnyc/fio (2022-05-01 07:29:05 -0600)

----------------------------------------------------------------
Frank Dana (1):
      README: Update Fedora pkg URL

Jens Axboe (1):
      Merge branch 'patch-1' of https://github.com/ferdnyc/fio

 README.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

---

Diff of recent changes:

diff --git a/README.rst b/README.rst
index d566fae3..527f33ab 100644
--- a/README.rst
+++ b/README.rst
@@ -107,7 +107,7 @@ Ubuntu:
 Red Hat, Fedora, CentOS & Co:
 	Starting with Fedora 9/Extra Packages for Enterprise Linux 4, fio
 	packages are part of the Fedora/EPEL repositories.
-	https://apps.fedoraproject.org/packages/fio .
+	https://packages.fedoraproject.org/pkgs/fio/ .
 
 Mandriva:
 	Mandriva has integrated fio into their package repository, so installing

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-04-30 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-04-30 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 5f2d43188c2d65674aaba6280e2a87107e5d7099:

  Merge branch 'fix/json/strdup_memory_leak' of https://github.com/dpronin/fio (2022-04-17 16:47:22 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 6e594a2fa8388892dffb2ffc9b865689e2d67833:

  Merge branch 'global_dedup' of https://github.com/bardavid/fio (2022-04-29 16:30:50 -0600)

----------------------------------------------------------------
Bar David (2):
      Introducing support for generation of dedup buffers across jobs. The dedup buffers are spread evenly between the jobs that enabled the dedupe_global option
      adding an example for dedupe_global usage and DRR testing

Jens Axboe (1):
      Merge branch 'global_dedup' of https://github.com/bardavid/fio

 HOWTO.rst                  |  6 +++++
 backend.c                  |  5 ++++
 cconv.c                    |  2 ++
 dedupe.c                   | 46 +++++++++++++++++++++++++++++++++----
 dedupe.h                   |  3 ++-
 examples/dedupe-global.fio | 57 ++++++++++++++++++++++++++++++++++++++++++++++
 fio.1                      |  9 ++++++++
 init.c                     |  2 +-
 options.c                  | 10 ++++++++
 server.h                   |  2 +-
 thread_options.h           |  3 +++
 11 files changed, 138 insertions(+), 7 deletions(-)
 create mode 100644 examples/dedupe-global.fio

---

Diff of recent changes:

diff --git a/HOWTO.rst b/HOWTO.rst
index a5fa432e..6a3e09f5 100644
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -1749,6 +1749,12 @@ Buffers and memory
 	Note that size needs to be explicitly provided and only 1 file per
 	job is supported
 
+.. option:: dedupe_global=bool
+
+	This controls whether the deduplication buffers will be shared amongst
+	all jobs that have this option set. The buffers are spread evenly between
+	participating jobs.
+
 .. option:: invalidate=bool
 
 	Invalidate the buffer/page cache parts of the files to be used prior to
diff --git a/backend.c b/backend.c
index 317e4f6c..ffbb7e2a 100644
--- a/backend.c
+++ b/backend.c
@@ -2570,6 +2570,11 @@ int fio_backend(struct sk_out *sk_out)
 		setup_log(&agg_io_log[DDIR_TRIM], &p, "agg-trim_bw.log");
 	}
 
+	if (init_global_dedupe_working_set_seeds()) {
+		log_err("fio: failed to initialize global dedupe working set\n");
+		return 1;
+	}
+
 	startup_sem = fio_sem_init(FIO_SEM_LOCKED);
 	if (!sk_out)
 		is_local_backend = true;
diff --git a/cconv.c b/cconv.c
index 62d02e36..6c36afb7 100644
--- a/cconv.c
+++ b/cconv.c
@@ -305,6 +305,7 @@ void convert_thread_options_to_cpu(struct thread_options *o,
 	o->dedupe_percentage = le32_to_cpu(top->dedupe_percentage);
 	o->dedupe_mode = le32_to_cpu(top->dedupe_mode);
 	o->dedupe_working_set_percentage = le32_to_cpu(top->dedupe_working_set_percentage);
+	o->dedupe_global = le32_to_cpu(top->dedupe_global);
 	o->block_error_hist = le32_to_cpu(top->block_error_hist);
 	o->replay_align = le32_to_cpu(top->replay_align);
 	o->replay_scale = le32_to_cpu(top->replay_scale);
@@ -513,6 +514,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
 	top->dedupe_percentage = cpu_to_le32(o->dedupe_percentage);
 	top->dedupe_mode = cpu_to_le32(o->dedupe_mode);
 	top->dedupe_working_set_percentage = cpu_to_le32(o->dedupe_working_set_percentage);
+	top->dedupe_global = cpu_to_le32(o->dedupe_global);
 	top->block_error_hist = cpu_to_le32(o->block_error_hist);
 	top->replay_align = cpu_to_le32(o->replay_align);
 	top->replay_scale = cpu_to_le32(o->replay_scale);
diff --git a/dedupe.c b/dedupe.c
index fd116dfb..8214a786 100644
--- a/dedupe.c
+++ b/dedupe.c
@@ -1,13 +1,37 @@
 #include "fio.h"
 
-int init_dedupe_working_set_seeds(struct thread_data *td)
+/**
+ * initializes the global dedup workset.
+ * this needs to be called after all jobs' seeds
+ * have been initialized
+ */
+int init_global_dedupe_working_set_seeds(void)
 {
-	unsigned long long i, j, num_seed_advancements;
+	int i;
+	struct thread_data *td;
+
+	for_each_td(td, i) {
+		if (!td->o.dedupe_global)
+			continue;
+
+		if (init_dedupe_working_set_seeds(td, 1))
+			return 1;
+	}
+
+	return 0;
+}
+
+int init_dedupe_working_set_seeds(struct thread_data *td, bool global_dedup)
+{
+	int tindex;
+	struct thread_data *td_seed;
+	unsigned long long i, j, num_seed_advancements, pages_per_seed;
 	struct frand_state dedupe_working_set_state = {0};
 
 	if (!td->o.dedupe_percentage || !(td->o.dedupe_mode == DEDUPE_MODE_WORKING_SET))
 		return 0;
 
+	tindex = td->thread_number - 1;
 	num_seed_advancements = td->o.min_bs[DDIR_WRITE] /
 		min_not_zero(td->o.min_bs[DDIR_WRITE], (unsigned long long) td->o.compress_chunk);
 	/*
@@ -20,9 +44,11 @@ int init_dedupe_working_set_seeds(struct thread_data *td)
 		log_err("fio: could not allocate dedupe working set\n");
 		return 1;
 	}
+
 	frand_copy(&dedupe_working_set_state, &td->buf_state);
-	for (i = 0; i < td->num_unique_pages; i++) {
-		frand_copy(&td->dedupe_working_set_states[i], &dedupe_working_set_state);
+	frand_copy(&td->dedupe_working_set_states[0], &dedupe_working_set_state);
+	pages_per_seed = max(td->num_unique_pages / thread_number, 1ull);
+	for (i = 1; i < td->num_unique_pages; i++) {
 		/*
 		 * When compression is used the seed is advanced multiple times to
 		 * generate the buffer. We want to regenerate the same buffer when
@@ -30,6 +56,18 @@ int init_dedupe_working_set_seeds(struct thread_data *td)
 		 */
 		for (j = 0; j < num_seed_advancements; j++)
 			__get_next_seed(&dedupe_working_set_state);
+
+		/*
+		 * When global dedup is used, we rotate the seeds to allow
+		 * generating same buffers across different jobs. Deduplication buffers
+		 * are spread evenly across jobs participating in global dedupe
+		 */
+		if (global_dedup && i % pages_per_seed == 0) {
+			td_seed = tnumber_to_td(++tindex % thread_number);
+			frand_copy(&dedupe_working_set_state, &td_seed->buf_state);
+		}
+
+		frand_copy(&td->dedupe_working_set_states[i], &dedupe_working_set_state);
 	}
 
 	return 0;
diff --git a/dedupe.h b/dedupe.h
index d4c4dc37..bd1f9c0c 100644
--- a/dedupe.h
+++ b/dedupe.h
@@ -1,6 +1,7 @@
 #ifndef DEDUPE_H
 #define DEDUPE_H
 
-int init_dedupe_working_set_seeds(struct thread_data *td);
+int init_dedupe_working_set_seeds(struct thread_data *td, bool global_dedupe);
+int init_global_dedupe_working_set_seeds(void);
 
 #endif
diff --git a/examples/dedupe-global.fio b/examples/dedupe-global.fio
new file mode 100644
index 00000000..edaaad55
--- /dev/null
+++ b/examples/dedupe-global.fio
@@ -0,0 +1,57 @@
+# Writing to 2 files that share the duplicate blocks.
+# The dedupe working set is spread uniformly such that when
+# each of the jobs choose to perform a dedup operation they will
+# regenerate a buffer from the global space.
+# If you test the dedup ratio on either file by itself the result
+# is likely lower than if you test the ratio of the two files combined.
+#
+# Use `./t/fio-dedupe <file> -C 1 -c 1 -b 4096` to test the total
+# data reduction ratio.
+#
+#
+# Full example of test:
+# $ ./fio ./examples/dedupe-global.fio
+#
+# Checking ratio on a and b individually:
+# $ ./t/fio-dedupe a.0.0 -C 1 -c 1 -b 4096
+#
+# $ Extents=25600, Unique extents=16817 Duplicated extents=5735
+# $ De-dupe ratio: 1:0.52
+# $ De-dupe working set at least: 22.40%
+# $ Fio setting: dedupe_percentage=34
+# $ Unique capacity 33MB
+#
+# ./t/fio-dedupe b.0.0 -C 1 -c 1 -b 4096
+# $ Extents=25600, Unique extents=17009 Duplicated extents=5636
+# $ De-dupe ratio: 1:0.51
+# $ De-dupe working set at least: 22.02%
+# $ Fio setting: dedupe_percentage=34
+# $ Unique capacity 34MB
+#
+# Combining files:
+# $ cat a.0.0 > c.0.0
+# $ cat b.0.0 >> c.0.0
+#
+# Checking data reduction ratio on combined file:
+# $ ./t/fio-dedupe c.0.0 -C 1 -c 1 -b 4096
+# $ Extents=51200, Unique extents=25747 Duplicated extents=11028
+# $ De-dupe ratio: 1:0.99
+# $ De-dupe working set at least: 21.54%
+# $ Fio setting: dedupe_percentage=50
+# $ Unique capacity 51MB
+#
+[global]
+ioengine=libaio
+iodepth=256
+size=100m
+dedupe_mode=working_set
+dedupe_global=1
+dedupe_percentage=50
+blocksize=4k
+rw=write
+buffer_compress_percentage=50
+dedupe_working_set_percentage=50
+
+[a]
+
+[b]
diff --git a/fio.1 b/fio.1
index a2ec836f..609947dc 100644
--- a/fio.1
+++ b/fio.1
@@ -1553,6 +1553,15 @@ Note that \fBsize\fR needs to be explicitly provided and only 1 file
 per job is supported
 .RE
 .TP
+.BI dedupe_global \fR=\fPbool
+This controls whether the deduplication buffers will be shared amongst
+all jobs that have this option set. The buffers are spread evenly between
+participating jobs.
+.P
+.RS
+Note that \fBdedupe_mode\fR must be set to \fBworking_set\fR for this to work.
+Can be used in combination with compression
+.TP
 .BI invalidate \fR=\fPbool
 Invalidate the buffer/page cache parts of the files to be used prior to
 starting I/O if the platform and file type support it. Defaults to true.
diff --git a/init.c b/init.c
index 6f186051..f7d702f8 100644
--- a/init.c
+++ b/init.c
@@ -1541,7 +1541,7 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
 	if (fixup_options(td))
 		goto err;
 
-	if (init_dedupe_working_set_seeds(td))
+	if (!td->o.dedupe_global && init_dedupe_working_set_seeds(td, 0))
 		goto err;
 
 	/*
diff --git a/options.c b/options.c
index e06d9b66..3b83573b 100644
--- a/options.c
+++ b/options.c
@@ -4665,6 +4665,16 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
 		.category = FIO_OPT_C_IO,
 		.group	= FIO_OPT_G_IO_BUF,
 	},
+	{
+		.name	= "dedupe_global",
+		.lname	= "Global deduplication",
+		.type	= FIO_OPT_BOOL,
+		.off1	= offsetof(struct thread_options, dedupe_global),
+		.help	= "Share deduplication buffers across jobs",
+		.def	= "0",
+		.category = FIO_OPT_C_IO,
+		.group	= FIO_OPT_G_IO_BUF,
+	},
 	{
 		.name	= "dedupe_mode",
 		.lname	= "Dedupe mode",
diff --git a/server.h b/server.h
index 0e62b6df..b0c5e2df 100644
--- a/server.h
+++ b/server.h
@@ -51,7 +51,7 @@ struct fio_net_cmd_reply {
 };
 
 enum {
-	FIO_SERVER_VER			= 96,
+	FIO_SERVER_VER			= 97,
 
 	FIO_SERVER_MAX_FRAGMENT_PDU	= 1024,
 	FIO_SERVER_MAX_CMD_MB		= 2048,
diff --git a/thread_options.h b/thread_options.h
index 4162c42f..634070af 100644
--- a/thread_options.h
+++ b/thread_options.h
@@ -263,6 +263,7 @@ struct thread_options {
 	unsigned int dedupe_percentage;
 	unsigned int dedupe_mode;
 	unsigned int dedupe_working_set_percentage;
+	unsigned int dedupe_global;
 	unsigned int time_based;
 	unsigned int disable_lat;
 	unsigned int disable_clat;
@@ -578,6 +579,7 @@ struct thread_options_pack {
 	uint32_t dedupe_percentage;
 	uint32_t dedupe_mode;
 	uint32_t dedupe_working_set_percentage;
+	uint32_t dedupe_global;
 	uint32_t time_based;
 	uint32_t disable_lat;
 	uint32_t disable_clat;
@@ -596,6 +598,7 @@ struct thread_options_pack {
 	uint32_t lat_percentiles;
 	uint32_t slat_percentiles;
 	uint32_t percentile_precision;
+	uint32_t pad5;
 	fio_fp64_t percentile_list[FIO_IO_U_LIST_MAX_LEN];
 
 	uint8_t read_iolog_file[FIO_TOP_STR_MAX];

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-04-18 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-04-18 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit d684bb2839d1fa010fba1e64f9b0c16240d8bdae:

  Merge branch 'fix/remove-sudo-in-test-script' of https://github.com/dpronin/fio (2022-04-10 15:18:42 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 5f2d43188c2d65674aaba6280e2a87107e5d7099:

  Merge branch 'fix/json/strdup_memory_leak' of https://github.com/dpronin/fio (2022-04-17 16:47:22 -0600)

----------------------------------------------------------------
Denis Pronin (5):
      fixed possible and actual memory leaks
      fixed memory leak of not freed jobs_eta in several cases
      use flist_first_entry instead of flist_entry applied to 'next' list item
      fixed bunch of memory leaks in json constructor
      updated logging of iops1, iops2, ratio in FioJobTest_iops_rate

Jens Axboe (3):
      Merge branch 'fix/memory-leak' of https://github.com/dpronin/fio
      Merge branch 'fix/jobs_eta_memory_leak' of https://github.com/dpronin/fio
      Merge branch 'fix/json/strdup_memory_leak' of https://github.com/dpronin/fio

 backend.c          | 3 +++
 eta.c              | 7 ++++---
 ioengines.c        | 2 ++
 json.h             | 7 ++++++-
 server.c           | 2 +-
 stat.c             | 2 ++
 t/run-fio-tests.py | 3 ++-
 7 files changed, 20 insertions(+), 6 deletions(-)

---

Diff of recent changes:

diff --git a/backend.c b/backend.c
index 001b2b96..317e4f6c 100644
--- a/backend.c
+++ b/backend.c
@@ -2433,8 +2433,10 @@ reap:
 			} else {
 				pid_t pid;
 				struct fio_file **files;
+				void *eo;
 				dprint(FD_PROCESS, "will fork\n");
 				files = td->files;
+				eo = td->eo;
 				read_barrier();
 				pid = fork();
 				if (!pid) {
@@ -2447,6 +2449,7 @@ reap:
 				// freeing previously allocated memory for files
 				// this memory freed MUST NOT be shared between processes, only the pointer itself may be shared within TD
 				free(files);
+				free(eo);
 				free(fd);
 				fd = NULL;
 			}
diff --git a/eta.c b/eta.c
index 17970c78..6017ca31 100644
--- a/eta.c
+++ b/eta.c
@@ -3,6 +3,7 @@
  */
 #include <unistd.h>
 #include <string.h>
+#include <stdlib.h>
 #ifdef CONFIG_VALGRIND_DEV
 #include <valgrind/drd.h>
 #else
@@ -707,10 +708,10 @@ void print_thread_status(void)
 	size_t size;
 
 	je = get_jobs_eta(false, &size);
-	if (je)
+	if (je) {
 		display_thread_status(je);
-
-	free(je);
+		free(je);
+	}
 }
 
 void print_status_init(int thr_number)
diff --git a/ioengines.c b/ioengines.c
index d08a511a..68f307e5 100644
--- a/ioengines.c
+++ b/ioengines.c
@@ -223,6 +223,8 @@ struct ioengine_ops *load_ioengine(struct thread_data *td)
  */
 void free_ioengine(struct thread_data *td)
 {
+	assert(td != NULL && td->io_ops != NULL);
+
 	dprint(FD_IO, "free ioengine %s\n", td->io_ops->name);
 
 	if (td->eo && td->io_ops->options) {
diff --git a/json.h b/json.h
index d9824263..66bb06b1 100644
--- a/json.h
+++ b/json.h
@@ -81,8 +81,13 @@ static inline int json_object_add_value_string(struct json_object *obj,
 	struct json_value arg = {
 		.type = JSON_TYPE_STRING,
 	};
+	union {
+		const char *a;
+		char *b;
+	} string;
 
-	arg.string = strdup(val ? : "");
+	string.a = val ? val : "";
+	arg.string = string.b;
 	return json_object_add_value_type(obj, name, &arg);
 }
 
diff --git a/server.c b/server.c
index 914a8c74..4c71bd44 100644
--- a/server.c
+++ b/server.c
@@ -1323,7 +1323,7 @@ static int handle_xmits(struct sk_out *sk_out)
 	sk_unlock(sk_out);
 
 	while (!flist_empty(&list)) {
-		entry = flist_entry(list.next, struct sk_entry, list);
+		entry = flist_first_entry(&list, struct sk_entry, list);
 		flist_del(&entry->list);
 		ret += handle_sk_entry(sk_out, entry);
 	}
diff --git a/stat.c b/stat.c
index 356083e2..949af5ed 100644
--- a/stat.c
+++ b/stat.c
@@ -1,5 +1,6 @@
 #include <stdio.h>
 #include <string.h>
+#include <stdlib.h>
 #include <sys/time.h>
 #include <sys/stat.h>
 #include <math.h>
@@ -1698,6 +1699,7 @@ static struct json_object *show_thread_status_json(struct thread_stat *ts,
 	if (je) {
 		json_object_add_value_int(root, "eta", je->eta_sec);
 		json_object_add_value_int(root, "elapsed", je->elapsed_sec);
+		free(je);
 	}
 
 	if (opt_list)
diff --git a/t/run-fio-tests.py b/t/run-fio-tests.py
index 612e50ca..ecceb67e 100755
--- a/t/run-fio-tests.py
+++ b/t/run-fio-tests.py
@@ -546,9 +546,10 @@ class FioJobTest_iops_rate(FioJobTest):
             return
 
         iops1 = self.json_data['jobs'][0]['read']['iops']
+        logging.debug("Test %d: iops1: %f", self.testnum, iops1)
         iops2 = self.json_data['jobs'][1]['read']['iops']
+        logging.debug("Test %d: iops2: %f", self.testnum, iops2)
         ratio = iops2 / iops1
-        logging.debug("Test %d: iops1: %f", self.testnum, iops1)
         logging.debug("Test %d: ratio: %f", self.testnum, ratio)
 
         if iops1 < 950 or iops1 > 1050:

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-04-11 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-04-11 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 6d01ac19170fadaf46a6db6b4cc347f1b389f422:

  iolog: Use %llu for 64-bit (2022-04-08 12:46:44 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to d684bb2839d1fa010fba1e64f9b0c16240d8bdae:

  Merge branch 'fix/remove-sudo-in-test-script' of https://github.com/dpronin/fio (2022-04-10 15:18:42 -0600)

----------------------------------------------------------------
Denis Pronin (1):
      actions-full-test.sh, removed sudo from the script

Jens Axboe (1):
      Merge branch 'fix/remove-sudo-in-test-script' of https://github.com/dpronin/fio

 ci/actions-full-test.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

---

Diff of recent changes:

diff --git a/ci/actions-full-test.sh b/ci/actions-full-test.sh
index 91790664..8282002f 100755
--- a/ci/actions-full-test.sh
+++ b/ci/actions-full-test.sh
@@ -6,9 +6,9 @@ main() {
     echo "Running long running tests..."
     export PYTHONUNBUFFERED="TRUE"
     if [[ "${CI_TARGET_ARCH}" == "arm64" ]]; then
-        sudo python3 t/run-fio-tests.py --skip 6 1007 1008 --debug -p 1010:"--skip 15 16 17 18 19 20"
+        python3 t/run-fio-tests.py --skip 6 1007 1008 --debug -p 1010:"--skip 15 16 17 18 19 20"
     else
-        sudo python3 t/run-fio-tests.py --skip 6 1007 1008 --debug
+        python3 t/run-fio-tests.py --skip 6 1007 1008 --debug
     fi
     make -C doc html
 }

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-04-09 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-04-09 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit a3e48f483db27d20e02cbd81e3a8f18c6c5c50f5:

  Fio 3.30 (2022-04-06 17:10:00 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 6d01ac19170fadaf46a6db6b4cc347f1b389f422:

  iolog: Use %llu for 64-bit (2022-04-08 12:46:44 -0600)

----------------------------------------------------------------
Jens Axboe (2):
      iolog: fix warning for 32-bit compilation
      iolog: Use %llu for 64-bit

Mohamad Gebai (3):
      iolog: add version 3 to support timestamp-based replay
      iolog: add iolog_write for version 3
      iolog: update man page for version 3

 HOWTO.rst  |  29 +++++++++++++++-
 blktrace.c |  17 ++--------
 fio.1      |  35 +++++++++++++++++++-
 fio.h      |   4 ++-
 iolog.c    | 109 ++++++++++++++++++++++++++++++++++++++++++++++++-------------
 iolog.h    |   8 ++---
 6 files changed, 158 insertions(+), 44 deletions(-)

---

Diff of recent changes:

diff --git a/HOWTO.rst b/HOWTO.rst
index 0978879c..a5fa432e 100644
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -4398,7 +4398,9 @@ given in bytes. The `action` can be one of these:
 
 **wait**
 	   Wait for `offset` microseconds. Everything below 100 is discarded.
-	   The time is relative to the previous `wait` statement.
+	   The time is relative to the previous `wait` statement. Note that
+	   action `wait` is not allowed as of version 3, as the same behavior
+	   can be achieved using timestamps.
 **read**
 	   Read `length` bytes beginning from `offset`.
 **write**
@@ -4411,6 +4413,31 @@ given in bytes. The `action` can be one of these:
 	   Trim the given file from the given `offset` for `length` bytes.
 
 
+Trace file format v3
+~~~~~~~~~~~~~~~~~~~~
+
+The third version of the trace file format was added in fio version 3.31. It
+forces each action to have a timestamp associated with it.
+
+The first line of the trace file has to be::
+
+    fio version 3 iolog
+
+Following this can be lines in two different formats, which are described below.
+
+The file management format::
+
+    timestamp filename action
+
+The file I/O action format::
+
+    timestamp filename action offset length
+
+The `timestamp` is relative to the beginning of the run (ie starts at 0). The
+`filename`, `action`, `offset` and `length`  are identical to version 2, except
+that version 3 does not allow the `wait` action.
+
+
 I/O Replay - Merging Traces
 ---------------------------
 
diff --git a/blktrace.c b/blktrace.c
index ead60130..619121c7 100644
--- a/blktrace.c
+++ b/blktrace.c
@@ -313,25 +313,14 @@ static bool queue_trace(struct thread_data *td, struct blk_io_trace *t,
 			 unsigned long *ios, unsigned long long *bs,
 			 struct file_cache *cache)
 {
-	unsigned long long *last_ttime = &td->io_log_blktrace_last_ttime;
+	unsigned long long *last_ttime = &td->io_log_last_ttime;
 	unsigned long long delay = 0;
 
 	if ((t->action & 0xffff) != __BLK_TA_QUEUE)
 		return false;
 
 	if (!(t->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
-		if (!*last_ttime || td->o.no_stall || t->time < *last_ttime)
-			delay = 0;
-		else if (td->o.replay_time_scale == 100)
-			delay = t->time - *last_ttime;
-		else {
-			double tmp = t->time - *last_ttime;
-			double scale;
-
-			scale = (double) 100.0 / (double) td->o.replay_time_scale;
-			tmp *= scale;
-			delay = tmp;
-		}
+		delay = delay_since_ttime(td, t->time);
 		*last_ttime = t->time;
 	}
 
@@ -422,7 +411,7 @@ bool init_blktrace_read(struct thread_data *td, const char *filename, int need_s
 		goto err;
 	}
 	td->io_log_blktrace_swap = need_swap;
-	td->io_log_blktrace_last_ttime = 0;
+	td->io_log_last_ttime = 0;
 	td->o.size = 0;
 
 	free_release_files(td);
diff --git a/fio.1 b/fio.1
index 98410655..a2ec836f 100644
--- a/fio.1
+++ b/fio.1
@@ -4117,7 +4117,9 @@ given in bytes. The `action' can be one of these:
 .TP
 .B wait
 Wait for `offset' microseconds. Everything below 100 is discarded.
-The time is relative to the previous `wait' statement.
+The time is relative to the previous `wait' statement. Note that action `wait`
+is not allowed as of version 3, as the same behavior can be achieved using
+timestamps.
 .TP
 .B read
 Read `length' bytes beginning from `offset'.
@@ -4135,6 +4137,37 @@ Write `length' bytes beginning from `offset'.
 Trim the given file from the given `offset' for `length' bytes.
 .RE
 .RE
+.RE
+.TP
+.B Trace file format v3
+The third version of the trace file format was added in fio version 3.31. It
+forces each action to have a timestamp associated with it.
+.RS
+.P
+The first line of the trace file has to be:
+.RS
+.P
+"fio version 3 iolog"
+.RE
+.P
+Following this can be lines in two different formats, which are described below.
+.P
+.B
+The file management format:
+.RS
+timestamp filename action
+.P
+.RE
+.B
+The file I/O action format:
+.RS
+timestamp filename action offset length
+.P
+The `timestamp` is relative to the beginning of the run (ie starts at 0). The
+`filename`, `action`, `offset` and `length`  are identical to version 2, except
+that version 3 does not allow the `wait` action.
+.RE
+.RE
 .SH I/O REPLAY \- MERGING TRACES
 Colocation is a common practice used to get the most out of a machine.
 Knowing which workloads play nicely with each other and which ones don't is
diff --git a/fio.h b/fio.h
index 776fb51f..de7eca79 100644
--- a/fio.h
+++ b/fio.h
@@ -431,10 +431,12 @@ struct thread_data {
 	FILE *io_log_rfile;
 	unsigned int io_log_blktrace;
 	unsigned int io_log_blktrace_swap;
-	unsigned long long io_log_blktrace_last_ttime;
+	unsigned long long io_log_last_ttime;
+	struct timespec io_log_start_time;
 	unsigned int io_log_current;
 	unsigned int io_log_checkmark;
 	unsigned int io_log_highmark;
+	unsigned int io_log_version;
 	struct timespec io_log_highmark_time;
 
 	/*
diff --git a/iolog.c b/iolog.c
index 724ec1fe..37e799a1 100644
--- a/iolog.c
+++ b/iolog.c
@@ -31,6 +31,7 @@
 static int iolog_flush(struct io_log *log);
 
 static const char iolog_ver2[] = "fio version 2 iolog";
+static const char iolog_ver3[] = "fio version 3 iolog";
 
 void queue_io_piece(struct thread_data *td, struct io_piece *ipo)
 {
@@ -40,18 +41,24 @@ void queue_io_piece(struct thread_data *td, struct io_piece *ipo)
 
 void log_io_u(const struct thread_data *td, const struct io_u *io_u)
 {
+	struct timespec now;
+
 	if (!td->o.write_iolog_file)
 		return;
 
-	fprintf(td->iolog_f, "%s %s %llu %llu\n", io_u->file->file_name,
-						io_ddir_name(io_u->ddir),
-						io_u->offset, io_u->buflen);
+	fio_gettime(&now, NULL);
+	fprintf(td->iolog_f, "%llu %s %s %llu %llu\n",
+		(unsigned long long) utime_since_now(&td->io_log_start_time),
+		io_u->file->file_name, io_ddir_name(io_u->ddir), io_u->offset,
+		io_u->buflen);
+
 }
 
 void log_file(struct thread_data *td, struct fio_file *f,
 	      enum file_log_act what)
 {
 	const char *act[] = { "add", "open", "close" };
+	struct timespec now;
 
 	assert(what < 3);
 
@@ -65,7 +72,10 @@ void log_file(struct thread_data *td, struct fio_file *f,
 	if (!td->iolog_f)
 		return;
 
-	fprintf(td->iolog_f, "%s %s\n", f->file_name, act[what]);
+	fio_gettime(&now, NULL);
+	fprintf(td->iolog_f, "%llu %s %s\n",
+		(unsigned long long) utime_since_now(&td->io_log_start_time),
+		f->file_name, act[what]);
 }
 
 static void iolog_delay(struct thread_data *td, unsigned long delay)
@@ -116,6 +126,10 @@ static int ipo_special(struct thread_data *td, struct io_piece *ipo)
 
 	f = td->files[ipo->fileno];
 
+	if (ipo->delay)
+		iolog_delay(td, ipo->delay);
+	if (fio_fill_issue_time(td))
+		fio_gettime(&td->last_issue, NULL);
 	switch (ipo->file_action) {
 	case FIO_LOG_OPEN_FILE:
 		if (td->o.replay_redirect && fio_file_open(f)) {
@@ -134,6 +148,11 @@ static int ipo_special(struct thread_data *td, struct io_piece *ipo)
 	case FIO_LOG_UNLINK_FILE:
 		td_io_unlink_file(td, f);
 		break;
+	case FIO_LOG_ADD_FILE:
+		/*
+		 * Nothing to do
+		 */
+		break;
 	default:
 		log_err("fio: bad file action %d\n", ipo->file_action);
 		break;
@@ -142,7 +161,25 @@ static int ipo_special(struct thread_data *td, struct io_piece *ipo)
 	return 1;
 }
 
-static bool read_iolog2(struct thread_data *td);
+static bool read_iolog(struct thread_data *td);
+
+unsigned long long delay_since_ttime(const struct thread_data *td,
+	       unsigned long long time)
+{
+	double tmp;
+	double scale;
+	const unsigned long long *last_ttime = &td->io_log_last_ttime;
+
+	if (!*last_ttime || td->o.no_stall || time < *last_ttime)
+		return 0;
+	else if (td->o.replay_time_scale == 100)
+		return time - *last_ttime;
+
+
+	scale = (double) 100.0 / (double) td->o.replay_time_scale;
+	tmp = time - *last_ttime;
+	return tmp * scale;
+}
 
 int read_iolog_get(struct thread_data *td, struct io_u *io_u)
 {
@@ -158,7 +195,7 @@ int read_iolog_get(struct thread_data *td, struct io_u *io_u)
 					if (!read_blktrace(td))
 						return 1;
 				} else {
-					if (!read_iolog2(td))
+					if (!read_iolog(td))
 						return 1;
 				}
 			}
@@ -388,14 +425,20 @@ int64_t iolog_items_to_fetch(struct thread_data *td)
 	return items_to_fetch;
 }
 
+#define io_act(_td, _r) (((_td)->io_log_version == 3 && (r) == 5) || \
+					((_td)->io_log_version == 2 && (r) == 4))
+#define file_act(_td, _r) (((_td)->io_log_version == 3 && (r) == 3) || \
+					((_td)->io_log_version == 2 && (r) == 2))
+
 /*
- * Read version 2 iolog data. It is enhanced to include per-file logging,
+ * Read version 2 and 3 iolog data. It is enhanced to include per-file logging,
  * syncs, etc.
  */
-static bool read_iolog2(struct thread_data *td)
+static bool read_iolog(struct thread_data *td)
 {
 	unsigned long long offset;
 	unsigned int bytes;
+	unsigned long long delay = 0;
 	int reads, writes, waits, fileno = 0, file_action = 0; /* stupid gcc */
 	char *rfname, *fname, *act;
 	char *str, *p;
@@ -422,14 +465,28 @@ static bool read_iolog2(struct thread_data *td)
 	while ((p = fgets(str, 4096, td->io_log_rfile)) != NULL) {
 		struct io_piece *ipo;
 		int r;
+		unsigned long long ttime;
 
-		r = sscanf(p, "%256s %256s %llu %u", rfname, act, &offset,
-									&bytes);
+		if (td->io_log_version == 3) {
+			r = sscanf(p, "%llu %256s %256s %llu %u", &ttime, rfname, act,
+							&offset, &bytes);
+			delay = delay_since_ttime(td, ttime);
+			td->io_log_last_ttime = ttime;
+			/*
+			 * "wait" is not allowed with version 3
+			 */
+			if (!strcmp(act, "wait")) {
+				log_err("iolog: ignoring wait command with"
+					" version 3 for file %s\n", fname);
+				continue;
+			}
+		} else /* version 2 */
+			r = sscanf(p, "%256s %256s %llu %u", rfname, act, &offset, &bytes);
 
 		if (td->o.replay_redirect)
 			fname = td->o.replay_redirect;
 
-		if (r == 4) {
+		if (io_act(td, r)) {
 			/*
 			 * Check action first
 			 */
@@ -451,7 +508,7 @@ static bool read_iolog2(struct thread_data *td)
 				continue;
 			}
 			fileno = get_fileno(td, fname);
-		} else if (r == 2) {
+		} else if (file_act(td, r)) {
 			rw = DDIR_INVAL;
 			if (!strcmp(act, "add")) {
 				if (td->o.replay_redirect &&
@@ -462,7 +519,6 @@ static bool read_iolog2(struct thread_data *td)
 					fileno = add_file(td, fname, td->subjob_number, 1);
 					file_action = FIO_LOG_ADD_FILE;
 				}
-				continue;
 			} else if (!strcmp(act, "open")) {
 				fileno = get_fileno(td, fname);
 				file_action = FIO_LOG_OPEN_FILE;
@@ -475,7 +531,7 @@ static bool read_iolog2(struct thread_data *td)
 				continue;
 			}
 		} else {
-			log_err("bad iolog2: %s\n", p);
+			log_err("bad iolog%d: %s\n", td->io_log_version, p);
 			continue;
 		}
 
@@ -506,6 +562,8 @@ static bool read_iolog2(struct thread_data *td)
 		ipo = calloc(1, sizeof(*ipo));
 		init_ipo(ipo);
 		ipo->ddir = rw;
+		if (td->io_log_version == 3)
+			ipo->delay = delay;
 		if (rw == DDIR_WAIT) {
 			ipo->delay = offset;
 		} else {
@@ -650,18 +708,22 @@ static bool init_iolog_read(struct thread_data *td, char *fname)
 	}
 
 	/*
-	 * version 2 of the iolog stores a specific string as the
+	 * versions 2 and 3 of the iolog store a specific string as the
 	 * first line, check for that
 	 */
-	if (!strncmp(iolog_ver2, buffer, strlen(iolog_ver2))) {
-		free_release_files(td);
-		td->io_log_rfile = f;
-		return read_iolog2(td);
+	if (!strncmp(iolog_ver2, buffer, strlen(iolog_ver2)))
+		td->io_log_version = 2;
+	else if (!strncmp(iolog_ver3, buffer, strlen(iolog_ver3)))
+		td->io_log_version = 3;
+	else {
+		log_err("fio: iolog version 1 is no longer supported\n");
+		fclose(f);
+		return false;
 	}
 
-	log_err("fio: iolog version 1 is no longer supported\n");
-	fclose(f);
-	return false;
+	free_release_files(td);
+	td->io_log_rfile = f;
+	return read_iolog(td);
 }
 
 /*
@@ -685,11 +747,12 @@ static bool init_iolog_write(struct thread_data *td)
 	td->iolog_f = f;
 	td->iolog_buf = malloc(8192);
 	setvbuf(f, td->iolog_buf, _IOFBF, 8192);
+	fio_gettime(&td->io_log_start_time, NULL);
 
 	/*
 	 * write our version line
 	 */
-	if (fprintf(f, "%s\n", iolog_ver2) < 0) {
+	if (fprintf(f, "%s\n", iolog_ver3) < 0) {
 		perror("iolog init\n");
 		return false;
 	}
diff --git a/iolog.h b/iolog.h
index a3986309..62cbd1b0 100644
--- a/iolog.h
+++ b/iolog.h
@@ -227,10 +227,8 @@ struct io_piece {
 	unsigned long len;
 	unsigned int flags;
 	enum fio_ddir ddir;
-	union {
-		unsigned long delay;
-		unsigned int file_action;
-	};
+	unsigned long delay;
+	unsigned int file_action;
 };
 
 /*
@@ -259,6 +257,8 @@ extern int iolog_compress_init(struct thread_data *, struct sk_out *);
 extern void iolog_compress_exit(struct thread_data *);
 extern size_t log_chunk_sizes(struct io_log *);
 extern int init_io_u_buffers(struct thread_data *);
+extern unsigned long long delay_since_ttime(const struct thread_data *,
+					     unsigned long long);
 
 #ifdef CONFIG_ZLIB
 extern int iolog_file_inflate(const char *);

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-04-07 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-04-07 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 06bbdc1cb857a11e6d1b7c089126397daca904fe:

  smalloc: fix ptr address in redzone error message (2022-04-05 11:47:35 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to a3e48f483db27d20e02cbd81e3a8f18c6c5c50f5:

  Fio 3.30 (2022-04-06 17:10:00 -0600)

----------------------------------------------------------------
Jens Axboe (1):
      Fio 3.30

 FIO-VERSION-GEN | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

---

Diff of recent changes:

diff --git a/FIO-VERSION-GEN b/FIO-VERSION-GEN
index 60f7bb21..fa64f50f 100755
--- a/FIO-VERSION-GEN
+++ b/FIO-VERSION-GEN
@@ -1,7 +1,7 @@
 #!/bin/sh
 
 GVF=FIO-VERSION-FILE
-DEF_VER=fio-3.29
+DEF_VER=fio-3.30
 
 LF='
 '

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-04-06 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-04-06 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 87933e32e356b15b85c6d9775d5e840994080a4f:

  Rename 'fallthrough' attribute to 'fio_fallthrough' (2022-03-30 17:31:36 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 06bbdc1cb857a11e6d1b7c089126397daca904fe:

  smalloc: fix ptr address in redzone error message (2022-04-05 11:47:35 -0600)

----------------------------------------------------------------
Vincent Fu (1):
      smalloc: fix ptr address in redzone error message

 smalloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

---

Diff of recent changes:

diff --git a/smalloc.c b/smalloc.c
index fa00f0ee..23243054 100644
--- a/smalloc.c
+++ b/smalloc.c
@@ -283,13 +283,13 @@ static void sfree_check_redzone(struct block_hdr *hdr)
 	if (hdr->prered != SMALLOC_PRE_RED) {
 		log_err("smalloc pre redzone destroyed!\n"
 			" ptr=%p, prered=%x, expected %x\n",
-				hdr, hdr->prered, SMALLOC_PRE_RED);
+				hdr+1, hdr->prered, SMALLOC_PRE_RED);
 		assert(0);
 	}
 	if (*postred != SMALLOC_POST_RED) {
 		log_err("smalloc post redzone destroyed!\n"
 			"  ptr=%p, postred=%x, expected %x\n",
-				hdr, *postred, SMALLOC_POST_RED);
+				hdr+1, *postred, SMALLOC_POST_RED);
 		assert(0);
 	}
 }

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-03-31 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-03-31 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 5e644771eb91e91dd0fa32f4b51f90c44853a2b1:

  Merge branch 'status-interval-finished-jobs' of https://github.com/mmkayPL/fio (2022-03-29 06:30:44 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 87933e32e356b15b85c6d9775d5e840994080a4f:

  Rename 'fallthrough' attribute to 'fio_fallthrough' (2022-03-30 17:31:36 -0600)

----------------------------------------------------------------
Jens Axboe (1):
      Rename 'fallthrough' attribute to 'fio_fallthrough'

 compiler/compiler.h |  4 ++--
 crc/murmur3.c       |  4 ++--
 engines/http.c      |  2 +-
 hash.h              | 24 ++++++++++++------------
 init.c              |  2 +-
 io_u.c              | 10 +++++-----
 lib/lfsr.c          | 32 ++++++++++++++++----------------
 parse.c             |  4 ++--
 t/lfsr-test.c       |  6 +++---
 9 files changed, 44 insertions(+), 44 deletions(-)

---

Diff of recent changes:

diff --git a/compiler/compiler.h b/compiler/compiler.h
index 3fd0822f..fefadeaa 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -72,9 +72,9 @@
 #endif
 
 #if __has_attribute(__fallthrough__)
-#define fallthrough	 __attribute__((__fallthrough__))
+#define fio_fallthrough	 __attribute__((__fallthrough__))
 #else
-#define fallthrough	do {} while (0)  /* fallthrough */
+#define fio_fallthrough	do {} while (0)  /* fallthrough */
 #endif
 
 #endif
diff --git a/crc/murmur3.c b/crc/murmur3.c
index ba408a9e..08660bc8 100644
--- a/crc/murmur3.c
+++ b/crc/murmur3.c
@@ -30,10 +30,10 @@ static uint32_t murmur3_tail(const uint8_t *data, const int nblocks,
 	switch (len & 3) {
 	case 3:
 		k1 ^= tail[2] << 16;
-		fallthrough;
+		fio_fallthrough;
 	case 2:
 		k1 ^= tail[1] << 8;
-		fallthrough;
+		fio_fallthrough;
 	case 1:
 		k1 ^= tail[0];
 		k1 *= c1;
diff --git a/engines/http.c b/engines/http.c
index 57d4967d..696febe1 100644
--- a/engines/http.c
+++ b/engines/http.c
@@ -297,7 +297,7 @@ static int _curl_trace(CURL *handle, curl_infotype type,
 	switch (type) {
 	case CURLINFO_TEXT:
 		fprintf(stderr, "== Info: %s", data);
-		fallthrough;
+		fio_fallthrough;
 	default:
 	case CURLINFO_SSL_DATA_OUT:
 	case CURLINFO_SSL_DATA_IN:
diff --git a/hash.h b/hash.h
index 2c04bc29..f7596a56 100644
--- a/hash.h
+++ b/hash.h
@@ -142,20 +142,20 @@ static inline uint32_t jhash(const void *key, uint32_t length, uint32_t initval)
 	/* Last block: affect all 32 bits of (c) */
 	/* All the case statements fall through */
 	switch (length) {
-	case 12: c += (uint32_t) k[11] << 24;	fallthrough;
-	case 11: c += (uint32_t) k[10] << 16;	fallthrough;
-	case 10: c += (uint32_t) k[9] << 8;	fallthrough;
-	case 9:  c += k[8];			fallthrough;
-	case 8:  b += (uint32_t) k[7] << 24;	fallthrough;
-	case 7:  b += (uint32_t) k[6] << 16;	fallthrough;
-	case 6:  b += (uint32_t) k[5] << 8;	fallthrough;
-	case 5:  b += k[4];			fallthrough;
-	case 4:  a += (uint32_t) k[3] << 24;	fallthrough;
-	case 3:  a += (uint32_t) k[2] << 16;	fallthrough;
-	case 2:  a += (uint32_t) k[1] << 8;	fallthrough;
+	case 12: c += (uint32_t) k[11] << 24;	fio_fallthrough;
+	case 11: c += (uint32_t) k[10] << 16;	fio_fallthrough;
+	case 10: c += (uint32_t) k[9] << 8;	fio_fallthrough;
+	case 9:  c += k[8];			fio_fallthrough;
+	case 8:  b += (uint32_t) k[7] << 24;	fio_fallthrough;
+	case 7:  b += (uint32_t) k[6] << 16;	fio_fallthrough;
+	case 6:  b += (uint32_t) k[5] << 8;	fio_fallthrough;
+	case 5:  b += k[4];			fio_fallthrough;
+	case 4:  a += (uint32_t) k[3] << 24;	fio_fallthrough;
+	case 3:  a += (uint32_t) k[2] << 16;	fio_fallthrough;
+	case 2:  a += (uint32_t) k[1] << 8;	fio_fallthrough;
 	case 1:  a += k[0];
 		 __jhash_final(a, b, c);
-		 fallthrough;
+		 fio_fallthrough;
 	case 0: /* Nothing left to add */
 		break;
 	}
diff --git a/init.c b/init.c
index b7f866e6..6f186051 100644
--- a/init.c
+++ b/init.c
@@ -2990,7 +2990,7 @@ int parse_cmd_line(int argc, char *argv[], int client_type)
 			log_err("%s: unrecognized option '%s'\n", argv[0],
 							argv[optind - 1]);
 			show_closest_option(argv[optind - 1]);
-			fallthrough;
+			fio_fallthrough;
 		default:
 			do_exit++;
 			exit_val = 1;
diff --git a/io_u.c b/io_u.c
index 50197a4b..eec378dd 100644
--- a/io_u.c
+++ b/io_u.c
@@ -993,7 +993,7 @@ static void __io_u_mark_map(uint64_t *map, unsigned int nr)
 		break;
 	case 1 ... 4:
 		idx = 1;
-		fallthrough;
+		fio_fallthrough;
 	case 0:
 		break;
 	}
@@ -1035,7 +1035,7 @@ void io_u_mark_depth(struct thread_data *td, unsigned int nr)
 		break;
 	case 2 ... 3:
 		idx = 1;
-		fallthrough;
+		fio_fallthrough;
 	case 1:
 		break;
 	}
@@ -1076,7 +1076,7 @@ static void io_u_mark_lat_nsec(struct thread_data *td, unsigned long long nsec)
 		break;
 	case 2 ... 3:
 		idx = 1;
-		fallthrough;
+		fio_fallthrough;
 	case 0 ... 1:
 		break;
 	}
@@ -1118,7 +1118,7 @@ static void io_u_mark_lat_usec(struct thread_data *td, unsigned long long usec)
 		break;
 	case 2 ... 3:
 		idx = 1;
-		fallthrough;
+		fio_fallthrough;
 	case 0 ... 1:
 		break;
 	}
@@ -1166,7 +1166,7 @@ static void io_u_mark_lat_msec(struct thread_data *td, unsigned long long msec)
 		break;
 	case 2 ... 3:
 		idx = 1;
-		fallthrough;
+		fio_fallthrough;
 	case 0 ... 1:
 		break;
 	}
diff --git a/lib/lfsr.c b/lib/lfsr.c
index a32e850a..e86086c4 100644
--- a/lib/lfsr.c
+++ b/lib/lfsr.c
@@ -88,37 +88,37 @@ static inline void __lfsr_next(struct fio_lfsr *fl, unsigned int spin)
 	 */
 	switch (spin) {
 		case 15: __LFSR_NEXT(fl, fl->last_val);
-		fallthrough;
+		fio_fallthrough;
 		case 14: __LFSR_NEXT(fl, fl->last_val);
-		fallthrough;
+		fio_fallthrough;
 		case 13: __LFSR_NEXT(fl, fl->last_val);
-		fallthrough;
+		fio_fallthrough;
 		case 12: __LFSR_NEXT(fl, fl->last_val);
-		fallthrough;
+		fio_fallthrough;
 		case 11: __LFSR_NEXT(fl, fl->last_val);
-		fallthrough;
+		fio_fallthrough;
 		case 10: __LFSR_NEXT(fl, fl->last_val);
-		fallthrough;
+		fio_fallthrough;
 		case  9: __LFSR_NEXT(fl, fl->last_val);
-		fallthrough;
+		fio_fallthrough;
 		case  8: __LFSR_NEXT(fl, fl->last_val);
-		fallthrough;
+		fio_fallthrough;
 		case  7: __LFSR_NEXT(fl, fl->last_val);
-		fallthrough;
+		fio_fallthrough;
 		case  6: __LFSR_NEXT(fl, fl->last_val);
-		fallthrough;
+		fio_fallthrough;
 		case  5: __LFSR_NEXT(fl, fl->last_val);
-		fallthrough;
+		fio_fallthrough;
 		case  4: __LFSR_NEXT(fl, fl->last_val);
-		fallthrough;
+		fio_fallthrough;
 		case  3: __LFSR_NEXT(fl, fl->last_val);
-		fallthrough;
+		fio_fallthrough;
 		case  2: __LFSR_NEXT(fl, fl->last_val);
-		fallthrough;
+		fio_fallthrough;
 		case  1: __LFSR_NEXT(fl, fl->last_val);
-		fallthrough;
+		fio_fallthrough;
 		case  0: __LFSR_NEXT(fl, fl->last_val);
-		fallthrough;
+		fio_fallthrough;
 		default: break;
 	}
 }
diff --git a/parse.c b/parse.c
index e0bee004..656a5025 100644
--- a/parse.c
+++ b/parse.c
@@ -601,7 +601,7 @@ static int __handle_option(const struct fio_option *o, const char *ptr,
 	}
 	case FIO_OPT_STR_VAL_TIME:
 		is_time = 1;
-		fallthrough;
+		fio_fallthrough;
 	case FIO_OPT_ULL:
 	case FIO_OPT_INT:
 	case FIO_OPT_STR_VAL:
@@ -980,7 +980,7 @@ store_option_value:
 	}
 	case FIO_OPT_DEPRECATED:
 		ret = 1;
-		fallthrough;
+		fio_fallthrough;
 	case FIO_OPT_SOFT_DEPRECATED:
 		log_info("Option %s is deprecated\n", o->name);
 		break;
diff --git a/t/lfsr-test.c b/t/lfsr-test.c
index 279e07f0..4b255e19 100644
--- a/t/lfsr-test.c
+++ b/t/lfsr-test.c
@@ -41,11 +41,11 @@ int main(int argc, char *argv[])
 	switch (argc) {
 		case 5: if (strncmp(argv[4], "verify", 7) == 0)
 				verify = 1;
-			fallthrough;
+			fio_fallthrough;
 		case 4: spin = atoi(argv[3]);
-			fallthrough;
+			fio_fallthrough;
 		case 3: seed = atol(argv[2]);
-			fallthrough;
+			fio_fallthrough;
 		case 2: numbers = strtol(argv[1], NULL, 16);
 				break;
 		default: usage();

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-03-30 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-03-30 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit a57d3fdce796f1bb516c74db95d016bb6db170c1:

  Merge branch 'master' of https://github.com/cccheng/fio (2022-03-28 06:43:56 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 5e644771eb91e91dd0fa32f4b51f90c44853a2b1:

  Merge branch 'status-interval-finished-jobs' of https://github.com/mmkayPL/fio (2022-03-29 06:30:44 -0600)

----------------------------------------------------------------
Jens Axboe (1):
      Merge branch 'status-interval-finished-jobs' of https://github.com/mmkayPL/fio

Kozlowski Mateusz (1):
      Handle finished jobs when using status-interval

 stat.c | 6 ++++++
 1 file changed, 6 insertions(+)

---

Diff of recent changes:

diff --git a/stat.c b/stat.c
index 7947edb4..356083e2 100644
--- a/stat.c
+++ b/stat.c
@@ -2731,6 +2731,9 @@ int __show_running_run_stats(void)
 	fio_gettime(&ts, NULL);
 
 	for_each_td(td, i) {
+		if (td->runstate >= TD_EXITED)
+			continue;
+
 		td->update_rusage = 1;
 		for_each_rw_ddir(ddir) {
 			td->ts.io_bytes[ddir] = td->io_bytes[ddir];
@@ -2759,6 +2762,9 @@ int __show_running_run_stats(void)
 	__show_run_stats();
 
 	for_each_td(td, i) {
+		if (td->runstate >= TD_EXITED)
+			continue;
+
 		if (td_read(td) && td->ts.io_bytes[DDIR_READ])
 			td->ts.runtime[DDIR_READ] -= rt[i];
 		if (td_write(td) && td->ts.io_bytes[DDIR_WRITE])

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-03-29 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-03-29 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit e3de2e7fe2889942d46699e72ac06b96eab09e27:

  Merge branch 'github-1372' of https://github.com/vincentkfu/fio (2022-03-24 10:11:34 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to a57d3fdce796f1bb516c74db95d016bb6db170c1:

  Merge branch 'master' of https://github.com/cccheng/fio (2022-03-28 06:43:56 -0600)

----------------------------------------------------------------
Chung-Chiang Cheng (1):
      Fix compile error of GCC 4

Jens Axboe (1):
      Merge branch 'master' of https://github.com/cccheng/fio

 compiler/compiler.h | 1 +
 1 file changed, 1 insertion(+)

---

Diff of recent changes:

diff --git a/compiler/compiler.h b/compiler/compiler.h
index 44fa87b9..3fd0822f 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -67,6 +67,7 @@
 #endif
 
 #ifndef __has_attribute
+#define __has_attribute(x) __GCC4_has_attribute_##x
 #define __GCC4_has_attribute___fallthrough__	0
 #endif
 

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-03-25 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-03-25 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit c822572d68e326384ce179b9484de0e4abf3d514:

  engines/null: use correct -include (2022-03-20 09:31:20 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to e3de2e7fe2889942d46699e72ac06b96eab09e27:

  Merge branch 'github-1372' of https://github.com/vincentkfu/fio (2022-03-24 10:11:34 -0600)

----------------------------------------------------------------
Jens Axboe (1):
      Merge branch 'github-1372' of https://github.com/vincentkfu/fio

Vincent Fu (1):
      io_u: produce bad offsets for some time_based jobs

 io_u.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

---

Diff of recent changes:

diff --git a/io_u.c b/io_u.c
index 806ceb77..50197a4b 100644
--- a/io_u.c
+++ b/io_u.c
@@ -355,7 +355,7 @@ static int get_next_seq_offset(struct thread_data *td, struct fio_file *f,
 	 * and invalidate the cache, if we need to.
 	 */
 	if (f->last_pos[ddir] >= f->io_size + get_start_offset(td, f) &&
-	    o->time_based) {
+	    o->time_based && o->nr_files == 1) {
 		f->last_pos[ddir] = f->file_offset;
 		loop_cache_invalidate(td, f);
 	}

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-03-21 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-03-21 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 1953e1adb5a28ed21370e85991d7f5c3cdc699f3:

  Merge branch 'flags-fix' of https://github.com/albertofaria/fio (2022-03-15 17:21:41 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to c822572d68e326384ce179b9484de0e4abf3d514:

  engines/null: use correct -include (2022-03-20 09:31:20 -0600)

----------------------------------------------------------------
Jens Axboe (3):
      engines/null: update external engine compilation
      Merge branch 'master' of https://github.com/jnoc/fio
      engines/null: use correct -include

Jonathon Carter (1):
      Added citation.cff for easy APA/BibTeX citation directly from the Github repository

 CITATION.cff   | 11 +++++++++++
 engines/null.c |  7 ++++---
 2 files changed, 15 insertions(+), 3 deletions(-)
 create mode 100644 CITATION.cff

---

Diff of recent changes:

diff --git a/CITATION.cff b/CITATION.cff
new file mode 100644
index 00000000..3df315e5
--- /dev/null
+++ b/CITATION.cff
@@ -0,0 +1,11 @@
+cff-version: 1.2.0
+preferred-citation:
+  type: software
+  authors:
+  - family-names: "Axboe"
+    given-names: "Jens"
+    email: axboe@kernel.dk
+  title: "Flexible I/O Tester"
+  year: 2022
+  url: "https://github.com/axboe/fio"
+licence: GNU GPL v2.0
diff --git a/engines/null.c b/engines/null.c
index 4cc0102b..8dcd1b21 100644
--- a/engines/null.c
+++ b/engines/null.c
@@ -6,7 +6,8 @@
  *
  * It also can act as external C++ engine - compiled with:
  *
- * g++ -O2 -g -shared -rdynamic -fPIC -o cpp_null null.c -DFIO_EXTERNAL_ENGINE
+ * g++ -O2 -g -shared -rdynamic -fPIC -o cpp_null null.c \
+ *	-include ../config-host.h -DFIO_EXTERNAL_ENGINE
  *
  * to test it execute:
  *
@@ -201,7 +202,7 @@ struct NullData {
 		return null_commit(td, impl_);
 	}
 
-	int fio_null_queue(struct thread_data *td, struct io_u *io_u)
+	fio_q_status fio_null_queue(struct thread_data *td, struct io_u *io_u)
 	{
 		return null_queue(td, impl_, io_u);
 	}
@@ -233,7 +234,7 @@ static int fio_null_commit(struct thread_data *td)
 	return NullData::get(td)->fio_null_commit(td);
 }
 
-static int fio_null_queue(struct thread_data *td, struct io_u *io_u)
+static fio_q_status fio_null_queue(struct thread_data *td, struct io_u *io_u)
 {
 	return NullData::get(td)->fio_null_queue(td, io_u);
 }

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-03-16 12:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-03-16 12:00 UTC (permalink / raw)
  To: fio

The following changes since commit 1fe261a24794f60bf374cd1852e09ec56997a20a:

  t/dedupe: ensure that 'ret' is initialized (2022-03-11 06:15:53 -0700)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 1953e1adb5a28ed21370e85991d7f5c3cdc699f3:

  Merge branch 'flags-fix' of https://github.com/albertofaria/fio (2022-03-15 17:21:41 -0600)

----------------------------------------------------------------
Alberto Faria (1):
      Properly encode engine flags in thread_data::flags

Jens Axboe (1):
      Merge branch 'flags-fix' of https://github.com/albertofaria/fio

 fio.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

---

Diff of recent changes:

diff --git a/fio.h b/fio.h
index c314f0a8..776fb51f 100644
--- a/fio.h
+++ b/fio.h
@@ -184,7 +184,7 @@ struct zone_split_index {
  */
 struct thread_data {
 	struct flist_head opt_list;
-	unsigned long flags;
+	unsigned long long flags;
 	struct thread_options o;
 	void *eo;
 	pthread_t thread;
@@ -681,12 +681,12 @@ enum {
 };
 
 #define TD_ENG_FLAG_SHIFT	18
-#define TD_ENG_FLAG_MASK	((1U << 18) - 1)
+#define TD_ENG_FLAG_MASK	((1ULL << 18) - 1)
 
 static inline void td_set_ioengine_flags(struct thread_data *td)
 {
 	td->flags = (~(TD_ENG_FLAG_MASK << TD_ENG_FLAG_SHIFT) & td->flags) |
-		    (td->io_ops->flags << TD_ENG_FLAG_SHIFT);
+		    ((unsigned long long)td->io_ops->flags << TD_ENG_FLAG_SHIFT);
 }
 
 static inline bool td_ioengine_flagged(struct thread_data *td,

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-03-12 13:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-03-12 13:00 UTC (permalink / raw)
  To: fio

The following changes since commit 16b1e24562347d371d6d62e0bb9a03ad4e2a8a96:

  t/dedupe: handle errors more gracefully (2022-03-11 05:09:20 -0700)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 1fe261a24794f60bf374cd1852e09ec56997a20a:

  t/dedupe: ensure that 'ret' is initialized (2022-03-11 06:15:53 -0700)

----------------------------------------------------------------
Jens Axboe (1):
      t/dedupe: ensure that 'ret' is initialized

 t/dedupe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

---

Diff of recent changes:

diff --git a/t/dedupe.c b/t/dedupe.c
index 561aa08d..d21e96f4 100644
--- a/t/dedupe.c
+++ b/t/dedupe.c
@@ -280,7 +280,7 @@ static int insert_chunks(struct item *items, unsigned int nitems,
 			 uint64_t *ndupes, uint64_t *unique_capacity,
 			 struct zlib_ctrl *zc)
 {
-	int i, ret;
+	int i, ret = 0;
 
 	fio_sem_down(rb_lock);
 

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-03-11 13:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-03-11 13:00 UTC (permalink / raw)
  To: fio

The following changes since commit df0ab55ff9e28f4b85c199e207aec904f8a76440:

  Merge branch 'master' of https://github.com/dpronin/fio (2022-03-09 06:20:31 -0700)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 16b1e24562347d371d6d62e0bb9a03ad4e2a8a96:

  t/dedupe: handle errors more gracefully (2022-03-11 05:09:20 -0700)

----------------------------------------------------------------
Denis Pronin (4):
      configure script refactoring
      improvements in dup_files function
      fixed memory leak detected by ASAN
      ASAN enabling when configuring

Jens Axboe (7):
      Merge branch 'master' of https://github.com/dpronin/fio
      Merge branch 'refactoring/configure' of https://github.com/dpronin/fio
      Merge branch 'improvement/prevent-sigsegv-when-dup-files' of https://github.com/dpronin/fio
      Merge branch 'improvement/enable-asan' of https://github.com/dpronin/fio
      t/io_uring: only enable sync if we have preadv2
      Merge branch 'fuzz-cleanup' of https://github.com/vincentkfu/fio
      t/dedupe: handle errors more gracefully

Vincent Fu (1):
      fuzz: avoid building t/fuzz/parse_ini by default

 Makefile     |  8 +++++++-
 backend.c    |  6 ++++++
 configure    | 14 ++++++++++----
 filesetup.c  |  3 ++-
 t/dedupe.c   | 57 +++++++++++++++++++++++++++++++++++----------------------
 t/io_uring.c | 13 +++++++++++++
 6 files changed, 73 insertions(+), 28 deletions(-)

---

Diff of recent changes:

diff --git a/Makefile b/Makefile
index 6ffd3d13..e670c1f2 100644
--- a/Makefile
+++ b/Makefile
@@ -385,14 +385,16 @@ T_MEMLOCK_PROGS = t/memlock
 T_TT_OBJS = t/time-test.o
 T_TT_PROGS = t/time-test
 
+ifneq (,$(findstring -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION,$(CFLAGS)))
 T_FUZZ_OBJS = t/fuzz/fuzz_parseini.o
 T_FUZZ_OBJS += $(OBJS)
 ifdef CONFIG_ARITHMETIC
 T_FUZZ_OBJS += lex.yy.o y.tab.o
 endif
+# For proper fio code teardown CFLAGS needs to include -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
 # in case there is no fuzz driver defined by environment variable LIB_FUZZING_ENGINE, use a simple one
 # For instance, with compiler clang, address sanitizer and libFuzzer as a fuzzing engine, you should define
-# export CFLAGS="-fsanitize=address,fuzzer-no-link"
+# export CFLAGS="-fsanitize=address,fuzzer-no-link -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION"
 # export LIB_FUZZING_ENGINE="-fsanitize=address"
 # export CC=clang
 # before running configure && make
@@ -401,6 +403,10 @@ ifndef LIB_FUZZING_ENGINE
 T_FUZZ_OBJS += t/fuzz/onefile.o
 endif
 T_FUZZ_PROGS = t/fuzz/fuzz_parseini
+else	# CFLAGS includes -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+T_FUZZ_OBJS =
+T_FUZZ_PROGS =
+endif
 
 T_OBJS = $(T_SMALLOC_OBJS)
 T_OBJS += $(T_IEEE_OBJS)
diff --git a/backend.c b/backend.c
index cd7f4e5f..001b2b96 100644
--- a/backend.c
+++ b/backend.c
@@ -2432,7 +2432,10 @@ reap:
 							strerror(ret));
 			} else {
 				pid_t pid;
+				struct fio_file **files;
 				dprint(FD_PROCESS, "will fork\n");
+				files = td->files;
+				read_barrier();
 				pid = fork();
 				if (!pid) {
 					int ret;
@@ -2441,6 +2444,9 @@ reap:
 					_exit(ret);
 				} else if (i == fio_debug_jobno)
 					*fio_debug_jobp = pid;
+				// freeing previously allocated memory for files
+				// this memory freed MUST NOT be shared between processes, only the pointer itself may be shared within TD
+				free(files);
 				free(fd);
 				fd = NULL;
 			}
diff --git a/configure b/configure
index 67e5d535..d327d2ca 100755
--- a/configure
+++ b/configure
@@ -248,6 +248,8 @@ for opt do
   ;;
   --disable-dfs) dfs="no"
   ;;
+  --enable-asan) asan="yes"
+  ;;
   --help)
     show_help="yes"
     ;;
@@ -290,9 +292,10 @@ if test "$show_help" = "yes" ; then
   echo "--enable-libiscsi       Enable iscsi support"
   echo "--enable-libnbd         Enable libnbd (NBD engine) support"
   echo "--disable-libzbc        Disable libzbc even if found"
-  echo "--disable-tcmalloc	Disable tcmalloc support"
-  echo "--dynamic-libengines	Lib-based ioengines as dynamic libraries"
-  echo "--disable-dfs		Disable DAOS File System support even if found"
+  echo "--disable-tcmalloc      Disable tcmalloc support"
+  echo "--dynamic-libengines    Lib-based ioengines as dynamic libraries"
+  echo "--disable-dfs           Disable DAOS File System support even if found"
+  echo "--enable-asan           Enable address sanitizer"
   exit $exit_val
 fi
 
@@ -3196,7 +3199,10 @@ fi
 if test "$fcntl_sync" = "yes" ; then
   output_sym "CONFIG_FCNTL_SYNC"
 fi
-
+if test "$asan" = "yes"; then
+  CFLAGS="$CFLAGS -fsanitize=address"
+  LDFLAGS="$LDFLAGS -fsanitize=address"
+fi
 print_config "Lib-based ioengines dynamic" "$dynamic_engines"
 cat > $TMPC << EOF
 int main(int argc, char **argv)
diff --git a/filesetup.c b/filesetup.c
index 7c32d0af..ab6c488b 100644
--- a/filesetup.c
+++ b/filesetup.c
@@ -2031,11 +2031,12 @@ void dup_files(struct thread_data *td, struct thread_data *org)
 	if (!org->files)
 		return;
 
-	td->files = malloc(org->files_index * sizeof(f));
+	td->files = calloc(org->files_index, sizeof(f));
 
 	if (td->o.file_lock_mode != FILE_LOCK_NONE)
 		td->file_locks = malloc(org->files_index);
 
+	assert(org->files_index >= org->o.nr_files);
 	for_each_file(org, f, i) {
 		struct fio_file *__f;
 
diff --git a/t/dedupe.c b/t/dedupe.c
index 109ea1af..561aa08d 100644
--- a/t/dedupe.c
+++ b/t/dedupe.c
@@ -143,15 +143,15 @@ static int read_block(int fd, void *buf, off_t offset)
 	return __read_block(fd, buf, offset, blocksize);
 }
 
-static void account_unique_capacity(uint64_t offset, uint64_t *unique_capacity,
-				    struct zlib_ctrl *zc)
+static int account_unique_capacity(uint64_t offset, uint64_t *unique_capacity,
+				   struct zlib_ctrl *zc)
 {
 	z_stream *stream = &zc->stream;
 	unsigned int compressed_len;
 	int ret;
 
 	if (read_block(file.fd, zc->buf_in, offset))
-		return;
+		return 1;
 
 	stream->next_in = zc->buf_in;
 	stream->avail_in = blocksize;
@@ -159,7 +159,8 @@ static void account_unique_capacity(uint64_t offset, uint64_t *unique_capacity,
 	stream->next_out = zc->buf_out;
 
 	ret = deflate(stream, Z_FINISH);
-	assert(ret != Z_STREAM_ERROR);
+	if (ret == Z_STREAM_ERROR)
+		return 1;
 	compressed_len = blocksize - stream->avail_out;
 
 	if (dump_output)
@@ -169,6 +170,7 @@ static void account_unique_capacity(uint64_t offset, uint64_t *unique_capacity,
 
 	*unique_capacity += compressed_len;
 	deflateReset(stream);
+	return 0;
 }
 
 static void add_item(struct chunk *c, struct item *i)
@@ -225,12 +227,12 @@ static struct chunk *alloc_chunk(void)
 	return c;
 }
 
-static void insert_chunk(struct item *i, uint64_t *unique_capacity,
-			 struct zlib_ctrl *zc)
+static int insert_chunk(struct item *i, uint64_t *unique_capacity,
+			struct zlib_ctrl *zc)
 {
 	struct fio_rb_node **p, *parent;
 	struct chunk *c;
-	int diff;
+	int ret, diff;
 
 	p = &rb_root.rb_node;
 	parent = NULL;
@@ -244,8 +246,6 @@ static void insert_chunk(struct item *i, uint64_t *unique_capacity,
 		} else if (diff > 0) {
 			p = &(*p)->rb_right;
 		} else {
-			int ret;
-
 			if (!collision_check)
 				goto add;
 
@@ -266,17 +266,21 @@ static void insert_chunk(struct item *i, uint64_t *unique_capacity,
 	memcpy(c->hash, i->hash, sizeof(i->hash));
 	rb_link_node(&c->rb_node, parent, p);
 	rb_insert_color(&c->rb_node, &rb_root);
-	if (compression)
-		account_unique_capacity(i->offset, unique_capacity, zc);
+	if (compression) {
+		ret = account_unique_capacity(i->offset, unique_capacity, zc);
+		if (ret)
+			return ret;
+	}
 add:
 	add_item(c, i);
+	return 0;
 }
 
-static void insert_chunks(struct item *items, unsigned int nitems,
-			  uint64_t *ndupes, uint64_t *unique_capacity,
-			  struct zlib_ctrl *zc)
+static int insert_chunks(struct item *items, unsigned int nitems,
+			 uint64_t *ndupes, uint64_t *unique_capacity,
+			 struct zlib_ctrl *zc)
 {
-	int i;
+	int i, ret;
 
 	fio_sem_down(rb_lock);
 
@@ -288,11 +292,15 @@ static void insert_chunks(struct item *items, unsigned int nitems,
 			s = sizeof(items[i].hash) / sizeof(uint32_t);
 			r = bloom_set(bloom, items[i].hash, s);
 			*ndupes += r;
-		} else
-			insert_chunk(&items[i], unique_capacity, zc);
+		} else {
+			ret = insert_chunk(&items[i], unique_capacity, zc);
+			if (ret)
+				break;
+		}
 	}
 
 	fio_sem_up(rb_lock);
+	return ret;
 }
 
 static void crc_buf(void *buf, uint32_t *hash)
@@ -320,6 +328,7 @@ static int do_work(struct worker_thread *thread, void *buf)
 	uint64_t ndupes = 0;
 	uint64_t unique_capacity = 0;
 	struct item *items;
+	int ret;
 
 	offset = thread->cur_offset;
 
@@ -339,13 +348,17 @@ static int do_work(struct worker_thread *thread, void *buf)
 		nitems++;
 	}
 
-	insert_chunks(items, nitems, &ndupes, &unique_capacity, &thread->zc);
+	ret = insert_chunks(items, nitems, &ndupes, &unique_capacity, &thread->zc);
 
 	free(items);
-	thread->items += nitems;
-	thread->dupes += ndupes;
-	thread->unique_capacity += unique_capacity;
-	return 0;
+	if (!ret) {
+		thread->items += nitems;
+		thread->dupes += ndupes;
+		thread->unique_capacity += unique_capacity;
+		return 0;
+	}
+
+	return ret;
 }
 
 static void thread_init_zlib_control(struct worker_thread *thread)
diff --git a/t/io_uring.c b/t/io_uring.c
index 157eea9e..10035912 100644
--- a/t/io_uring.c
+++ b/t/io_uring.c
@@ -939,6 +939,7 @@ submit:
 	return NULL;
 }
 
+#ifdef CONFIG_PWRITEV2
 static void *submitter_sync_fn(void *data)
 {
 	struct submitter *s = data;
@@ -1004,6 +1005,13 @@ static void *submitter_sync_fn(void *data)
 	finish = 1;
 	return NULL;
 }
+#else
+static void *submitter_sync_fn(void *data)
+{
+	finish = 1;
+	return NULL;
+}
+#endif
 
 static struct submitter *get_submitter(int offset)
 {
@@ -1346,7 +1354,12 @@ int main(int argc, char *argv[])
 			register_ring = !!atoi(optarg);
 			break;
 		case 'S':
+#ifdef CONFIG_PWRITEV2
 			use_sync = !!atoi(optarg);
+#else
+			fprintf(stderr, "preadv2 not supported\n");
+			exit(1);
+#endif
 			break;
 		case 'h':
 		case '?':

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-03-10 13:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-03-10 13:00 UTC (permalink / raw)
  To: fio

The following changes since commit a24ef2702e2c1b948df37080eb3f18cca60d414b:

  Merge branch 'master' of https://github.com/dpronin/fio (2022-03-08 16:42:37 -0700)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to df0ab55ff9e28f4b85c199e207aec904f8a76440:

  Merge branch 'master' of https://github.com/dpronin/fio (2022-03-09 06:20:31 -0700)

----------------------------------------------------------------
Denis Pronin (3):
      - freeing job_sections array of strings upon freeing each its item in init.c
      - fixed memory leak, which is happening when parsing options, claimed by ASAN
      - fixed memory leak in parent process detected by ASAN when forking and not freeing memory in the parent process allocated for fork_data

Jens Axboe (3):
      Merge branch 'fix/asan-memleak' of https://github.com/dpronin/fio
      Merge branch 'fix/asan-memleak-forkdata' of https://github.com/dpronin/fio
      Merge branch 'master' of https://github.com/dpronin/fio

 backend.c | 2 ++
 init.c    | 4 ++++
 parse.c   | 2 ++
 3 files changed, 8 insertions(+)

---

Diff of recent changes:

diff --git a/backend.c b/backend.c
index a21dfef6..cd7f4e5f 100644
--- a/backend.c
+++ b/backend.c
@@ -2441,6 +2441,8 @@ reap:
 					_exit(ret);
 				} else if (i == fio_debug_jobno)
 					*fio_debug_jobp = pid;
+				free(fd);
+				fd = NULL;
 			}
 			dprint(FD_MUTEX, "wait on startup_sem\n");
 			if (fio_sem_down_timeout(startup_sem, 10000)) {
diff --git a/init.c b/init.c
index 81c30f8c..b7f866e6 100644
--- a/init.c
+++ b/init.c
@@ -2185,6 +2185,10 @@ static int __parse_jobs_ini(struct thread_data *td,
 		i++;
 	}
 
+	free(job_sections);
+	job_sections = NULL;
+	nr_job_sections = 0;
+
 	free(opts);
 out:
 	free(string);
diff --git a/parse.c b/parse.c
index d086ee48..e0bee004 100644
--- a/parse.c
+++ b/parse.c
@@ -817,6 +817,8 @@ store_option_value:
 
 		if (o->off1) {
 			cp = td_var(data, o, o->off1);
+			if (*cp)
+				free(*cp);
 			*cp = strdup(ptr);
 			if (strlen(ptr) > o->maxlen - 1) {
 				log_err("value exceeds max length of %d\n",

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-03-09 13:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-03-09 13:00 UTC (permalink / raw)
  To: fio

The following changes since commit dc44588f2e445edd7a4ca7dc9bf05bb3b4b2789e:

  Makefile: get rid of fortify source (2022-03-07 09:16:39 -0700)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to a24ef2702e2c1b948df37080eb3f18cca60d414b:

  Merge branch 'master' of https://github.com/dpronin/fio (2022-03-08 16:42:37 -0700)

----------------------------------------------------------------
Denis Pronin (1):
      - fixed typo in configure script

Jens Axboe (1):
      Merge branch 'master' of https://github.com/dpronin/fio

 configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

---

Diff of recent changes:

diff --git a/configure b/configure
index be4605f9..67e5d535 100755
--- a/configure
+++ b/configure
@@ -2098,7 +2098,7 @@ if test "$libhdfs" = "yes" ; then
     hdfs_conf_error=1
   fi
   if test "$FIO_LIBHDFS_INCLUDE" = "" ; then
-    echo "configure: FIO_LIBHDFS_INCLUDE should be defined to libhdfs inlude path"
+    echo "configure: FIO_LIBHDFS_INCLUDE should be defined to libhdfs include path"
     hdfs_conf_error=1
   fi
   if test "$FIO_LIBHDFS_LIB" = "" ; then

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-03-08 13:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-03-08 13:00 UTC (permalink / raw)
  To: fio

The following changes since commit c3773c171dffb79f771d213d94249cefc4b9b6de:

  windowsaio: open file for write if we have syncs (2022-02-26 10:43:20 -0700)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to dc44588f2e445edd7a4ca7dc9bf05bb3b4b2789e:

  Makefile: get rid of fortify source (2022-03-07 09:16:39 -0700)

----------------------------------------------------------------
Jens Axboe (7):
      t/io_uring: change map buffers registration opcode
      t/io_uring: change fatal map buffers condition with multiple files
      io_uring.h: sync with 5.18 kernel bits
      t/io_uring: add support for registering the ring fd
      t/io_uring: support using preadv2
      t/io_uring: add missing CR
      Makefile: get rid of fortify source

 Makefile            |   2 +-
 os/linux/io_uring.h |  17 ++++--
 t/io_uring.c        | 148 ++++++++++++++++++++++++++++++++++++++++++++++------
 3 files changed, 147 insertions(+), 20 deletions(-)

---

Diff of recent changes:

diff --git a/Makefile b/Makefile
index 0ab4f82c..6ffd3d13 100644
--- a/Makefile
+++ b/Makefile
@@ -28,7 +28,7 @@ PROGS	= fio
 SCRIPTS = $(addprefix $(SRCDIR)/,tools/fio_generate_plots tools/plot/fio2gnuplot tools/genfio tools/fiologparser.py tools/hist/fiologparser_hist.py tools/hist/fio-histo-log-pctiles.py tools/fio_jsonplus_clat2csv)
 
 ifndef CONFIG_FIO_NO_OPT
-  FIO_CFLAGS += -O3 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2
+  FIO_CFLAGS += -O3
 endif
 ifdef CONFIG_BUILD_NATIVE
   FIO_CFLAGS += -march=native
diff --git a/os/linux/io_uring.h b/os/linux/io_uring.h
index c45b5e9a..42b2fe84 100644
--- a/os/linux/io_uring.h
+++ b/os/linux/io_uring.h
@@ -70,6 +70,7 @@ enum {
 	IOSQE_IO_HARDLINK_BIT,
 	IOSQE_ASYNC_BIT,
 	IOSQE_BUFFER_SELECT_BIT,
+	IOSQE_CQE_SKIP_SUCCESS_BIT,
 };
 
 /*
@@ -87,6 +88,8 @@ enum {
 #define IOSQE_ASYNC		(1U << IOSQE_ASYNC_BIT)
 /* select buffer from sqe->buf_group */
 #define IOSQE_BUFFER_SELECT	(1U << IOSQE_BUFFER_SELECT_BIT)
+/* don't post CQE if request succeeded */
+#define IOSQE_CQE_SKIP_SUCCESS	(1U << IOSQE_CQE_SKIP_SUCCESS_BIT)
 
 /*
  * io_uring_setup() flags
@@ -254,10 +257,11 @@ struct io_cqring_offsets {
 /*
  * io_uring_enter(2) flags
  */
-#define IORING_ENTER_GETEVENTS	(1U << 0)
-#define IORING_ENTER_SQ_WAKEUP	(1U << 1)
-#define IORING_ENTER_SQ_WAIT	(1U << 2)
-#define IORING_ENTER_EXT_ARG	(1U << 3)
+#define IORING_ENTER_GETEVENTS		(1U << 0)
+#define IORING_ENTER_SQ_WAKEUP		(1U << 1)
+#define IORING_ENTER_SQ_WAIT		(1U << 2)
+#define IORING_ENTER_EXT_ARG		(1U << 3)
+#define IORING_ENTER_REGISTERED_RING	(1U << 4)
 
 /*
  * Passed in for io_uring_setup(2). Copied back with updated info on success
@@ -289,6 +293,7 @@ struct io_uring_params {
 #define IORING_FEAT_EXT_ARG		(1U << 8)
 #define IORING_FEAT_NATIVE_WORKERS	(1U << 9)
 #define IORING_FEAT_RSRC_TAGS		(1U << 10)
+#define IORING_FEAT_CQE_SKIP		(1U << 11)
 
 /*
  * io_uring_register(2) opcodes and arguments
@@ -321,6 +326,10 @@ enum {
 	/* set/get max number of io-wq workers */
 	IORING_REGISTER_IOWQ_MAX_WORKERS	= 19,
 
+	/* register/unregister io_uring fd with the ring */
+	IORING_REGISTER_RING_FDS		= 20,
+	IORING_UNREGISTER_RING_FDS		= 21,
+
 	/* this goes last */
 	IORING_REGISTER_LAST
 };
diff --git a/t/io_uring.c b/t/io_uring.c
index b8fcffe8..157eea9e 100644
--- a/t/io_uring.c
+++ b/t/io_uring.c
@@ -76,6 +76,7 @@ struct file {
 struct submitter {
 	pthread_t thread;
 	int ring_fd;
+	int enter_ring_fd;
 	int index;
 	struct io_sq_ring sq_ring;
 	struct io_uring_sqe *sqes;
@@ -127,6 +128,8 @@ static int stats = 0;		/* generate IO stats */
 static int aio = 0;		/* use libaio */
 static int runtime = 0;		/* runtime */
 static int random_io = 1;	/* random or sequential IO */
+static int register_ring = 1;	/* register ring */
+static int use_sync = 0;	/* use preadv2 */
 
 static unsigned long tsc_rate;
 
@@ -139,7 +142,7 @@ static float plist[] = { 1.0, 5.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0,
 static int plist_len = 17;
 
 #ifndef IORING_REGISTER_MAP_BUFFERS
-#define IORING_REGISTER_MAP_BUFFERS	20
+#define IORING_REGISTER_MAP_BUFFERS	22
 struct io_uring_map_buffers {
 	__s32	fd;
 	__u32	buf_start;
@@ -349,10 +352,8 @@ static int io_uring_map_buffers(struct submitter *s)
 
 	if (do_nop)
 		return 0;
-	if (s->nr_files > 1) {
-		fprintf(stderr, "Can't map buffers with multiple files\n");
-		return -1;
-	}
+	if (s->nr_files > 1)
+		fprintf(stdout, "Mapping buffers may not work with multiple files\n");
 
 	return syscall(__NR_io_uring_register, s->ring_fd,
 			IORING_REGISTER_MAP_BUFFERS, &map, 1);
@@ -422,12 +423,14 @@ out:
 static int io_uring_enter(struct submitter *s, unsigned int to_submit,
 			  unsigned int min_complete, unsigned int flags)
 {
+	if (register_ring)
+		flags |= IORING_ENTER_REGISTERED_RING;
 #ifdef FIO_ARCH_HAS_SYSCALL
-	return __do_syscall6(__NR_io_uring_enter, s->ring_fd, to_submit,
+	return __do_syscall6(__NR_io_uring_enter, s->enter_ring_fd, to_submit,
 				min_complete, flags, NULL, 0);
 #else
-	return syscall(__NR_io_uring_enter, s->ring_fd, to_submit, min_complete,
-			flags, NULL, 0);
+	return syscall(__NR_io_uring_enter, s->enter_ring_fd, to_submit,
+			min_complete, flags, NULL, 0);
 #endif
 }
 
@@ -795,6 +798,34 @@ static void *submitter_aio_fn(void *data)
 }
 #endif
 
+static void io_uring_unregister_ring(struct submitter *s)
+{
+	struct io_uring_rsrc_update up = {
+		.offset	= s->enter_ring_fd,
+	};
+
+	syscall(__NR_io_uring_register, s->ring_fd, IORING_UNREGISTER_RING_FDS,
+		&up, 1);
+}
+
+static int io_uring_register_ring(struct submitter *s)
+{
+	struct io_uring_rsrc_update up = {
+		.data	= s->ring_fd,
+		.offset	= -1U,
+	};
+	int ret;
+
+	ret = syscall(__NR_io_uring_register, s->ring_fd,
+			IORING_REGISTER_RING_FDS, &up, 1);
+	if (ret == 1) {
+		s->enter_ring_fd = up.offset;
+		return 0;
+	}
+	register_ring = 0;
+	return -1;
+}
+
 static void *submitter_uring_fn(void *data)
 {
 	struct submitter *s = data;
@@ -806,6 +837,9 @@ static void *submitter_uring_fn(void *data)
 	submitter_init(s);
 #endif
 
+	if (register_ring)
+		io_uring_register_ring(s);
+
 	prepped = 0;
 	do {
 		int to_wait, to_submit, this_reap, to_prep;
@@ -898,6 +932,75 @@ submit:
 		}
 	} while (!s->finish);
 
+	if (register_ring)
+		io_uring_unregister_ring(s);
+
+	finish = 1;
+	return NULL;
+}
+
+static void *submitter_sync_fn(void *data)
+{
+	struct submitter *s = data;
+	int ret;
+
+	submitter_init(s);
+
+	do {
+		uint64_t offset;
+		struct file *f;
+		long r;
+
+		if (s->nr_files == 1) {
+			f = &s->files[0];
+		} else {
+			f = &s->files[s->cur_file];
+			if (f->pending_ios >= file_depth(s)) {
+				s->cur_file++;
+				if (s->cur_file == s->nr_files)
+					s->cur_file = 0;
+				f = &s->files[s->cur_file];
+			}
+		}
+		f->pending_ios++;
+
+		if (random_io) {
+			r = __rand64(&s->rand_state);
+			offset = (r % (f->max_blocks - 1)) * bs;
+		} else {
+			offset = f->cur_off;
+			f->cur_off += bs;
+			if (f->cur_off + bs > f->max_size)
+				f->cur_off = 0;
+		}
+
+#ifdef ARCH_HAVE_CPU_CLOCK
+		if (stats)
+			s->clock_batch[s->clock_index] = get_cpu_clock();
+#endif
+
+		s->inflight++;
+		s->calls++;
+
+		if (polled)
+			ret = preadv2(f->real_fd, &s->iovecs[0], 1, offset, RWF_HIPRI);
+		else
+			ret = preadv2(f->real_fd, &s->iovecs[0], 1, offset, 0);
+
+		if (ret < 0) {
+			perror("preadv2");
+			break;
+		} else if (ret != bs) {
+			break;
+		}
+
+		s->done++;
+		s->inflight--;
+		f->pending_ios--;
+		if (stats)
+			add_stat(s, s->clock_index, 1);
+	} while (!s->finish);
+
 	finish = 1;
 	return NULL;
 }
@@ -1000,7 +1103,7 @@ static int setup_ring(struct submitter *s)
 		perror("io_uring_setup");
 		return 1;
 	}
-	s->ring_fd = fd;
+	s->ring_fd = s->enter_ring_fd = fd;
 
 	io_uring_probe(fd);
 
@@ -1105,10 +1208,13 @@ static void usage(char *argv, int status)
 		" -T <int>  : TSC rate in HZ\n"
 		" -r <int>  : Runtime in seconds, default %s\n"
 		" -R <bool> : Use random IO, default %d\n"
-		" -a <bool> : Use legacy aio, default %d\n",
+		" -a <bool> : Use legacy aio, default %d\n"
+		" -S <bool> : Use sync IO (preadv2), default %d\n"
+		" -X <bool> : Use registered ring %d\n",
 		argv, DEPTH, BATCH_SUBMIT, BATCH_COMPLETE, BS, polled,
 		fixedbufs, dma_map, register_files, nthreads, !buffered, do_nop,
-		stats, runtime == 0 ? "unlimited" : runtime_str, random_io, aio);
+		stats, runtime == 0 ? "unlimited" : runtime_str, random_io, aio,
+		use_sync, register_ring);
 	exit(status);
 }
 
@@ -1169,7 +1275,7 @@ int main(int argc, char *argv[])
 	if (!do_nop && argc < 2)
 		usage(argv[0], 1);
 
-	while ((opt = getopt(argc, argv, "d:s:c:b:p:B:F:n:N:O:t:T:a:r:D:R:h?")) != -1) {
+	while ((opt = getopt(argc, argv, "d:s:c:b:p:B:F:n:N:O:t:T:a:r:D:R:X:S:h?")) != -1) {
 		switch (opt) {
 		case 'a':
 			aio = !!atoi(optarg);
@@ -1236,6 +1342,12 @@ int main(int argc, char *argv[])
 		case 'R':
 			random_io = !!atoi(optarg);
 			break;
+		case 'X':
+			register_ring = !!atoi(optarg);
+			break;
+		case 'S':
+			use_sync = !!atoi(optarg);
+			break;
 		case 'h':
 		case '?':
 		default:
@@ -1346,7 +1458,9 @@ int main(int argc, char *argv[])
 	for (j = 0; j < nthreads; j++) {
 		s = get_submitter(j);
 
-		if (!aio)
+		if (use_sync)
+			continue;
+		else if (!aio)
 			err = setup_ring(s);
 		else
 			err = setup_aio(s);
@@ -1357,14 +1471,18 @@ int main(int argc, char *argv[])
 	}
 	s = get_submitter(0);
 	printf("polled=%d, fixedbufs=%d/%d, register_files=%d, buffered=%d, QD=%d\n", polled, fixedbufs, dma_map, register_files, buffered, depth);
-	if (!aio)
+	if (use_sync)
+		printf("Engine=preadv2\n");
+	else if (!aio)
 		printf("Engine=io_uring, sq_ring=%d, cq_ring=%d\n", *s->sq_ring.ring_entries, *s->cq_ring.ring_entries);
 	else
 		printf("Engine=aio\n");
 
 	for (j = 0; j < nthreads; j++) {
 		s = get_submitter(j);
-		if (!aio)
+		if (use_sync)
+			pthread_create(&s->thread, NULL, submitter_sync_fn, s);
+		else if (!aio)
 			pthread_create(&s->thread, NULL, submitter_uring_fn, s);
 #ifdef CONFIG_LIBAIO
 		else

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-02-27 13:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-02-27 13:00 UTC (permalink / raw)
  To: fio

The following changes since commit cf2511565f40be1b78b3fc1194e823baf305f0a0:

  Merge branch 'master' of https://github.com/bvanassche/fio (2022-02-24 12:40:19 -0700)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to c3773c171dffb79f771d213d94249cefc4b9b6de:

  windowsaio: open file for write if we have syncs (2022-02-26 10:43:20 -0700)

----------------------------------------------------------------
Jens Axboe (2):
      Add TD_F_SYNCS thread flag
      windowsaio: open file for write if we have syncs

 blktrace.c           | 4 ++++
 engines/windowsaio.c | 2 +-
 fio.h                | 6 ++++--
 ioengines.h          | 2 +-
 iolog.c              | 9 +++++++--
 5 files changed, 17 insertions(+), 6 deletions(-)

---

Diff of recent changes:

diff --git a/blktrace.c b/blktrace.c
index e1804765..ead60130 100644
--- a/blktrace.c
+++ b/blktrace.c
@@ -297,6 +297,10 @@ static bool handle_trace_flush(struct thread_data *td, struct blk_io_trace *t,
 
 	ios[DDIR_SYNC]++;
 	dprint(FD_BLKTRACE, "store flush delay=%lu\n", ipo->delay);
+
+	if (!(td->flags & TD_F_SYNCS))
+		td->flags |= TD_F_SYNCS;
+
 	queue_io_piece(td, ipo);
 	return true;
 }
diff --git a/engines/windowsaio.c b/engines/windowsaio.c
index d82c8053..6681f8bb 100644
--- a/engines/windowsaio.c
+++ b/engines/windowsaio.c
@@ -248,7 +248,7 @@ static int fio_windowsaio_open_file(struct thread_data *td, struct fio_file *f)
 		log_err("fio: unknown fadvise type %d\n", td->o.fadvise_hint);
 	}
 
-	if (!td_write(td) || read_only)
+	if ((!td_write(td) && !(td->flags & TD_F_SYNCS)) || read_only)
 		access = GENERIC_READ;
 	else
 		access = (GENERIC_READ | GENERIC_WRITE);
diff --git a/fio.h b/fio.h
index 88df117d..c314f0a8 100644
--- a/fio.h
+++ b/fio.h
@@ -97,6 +97,7 @@ enum {
 	__TD_F_MMAP_KEEP,
 	__TD_F_DIRS_CREATED,
 	__TD_F_CHECK_RATE,
+	__TD_F_SYNCS,
 	__TD_F_LAST,		/* not a real bit, keep last */
 };
 
@@ -118,6 +119,7 @@ enum {
 	TD_F_MMAP_KEEP		= 1U << __TD_F_MMAP_KEEP,
 	TD_F_DIRS_CREATED	= 1U << __TD_F_DIRS_CREATED,
 	TD_F_CHECK_RATE		= 1U << __TD_F_CHECK_RATE,
+	TD_F_SYNCS		= 1U << __TD_F_SYNCS,
 };
 
 enum {
@@ -678,8 +680,8 @@ enum {
 	TD_NR,
 };
 
-#define TD_ENG_FLAG_SHIFT	17
-#define TD_ENG_FLAG_MASK	((1U << 17) - 1)
+#define TD_ENG_FLAG_SHIFT	18
+#define TD_ENG_FLAG_MASK	((1U << 18) - 1)
 
 static inline void td_set_ioengine_flags(struct thread_data *td)
 {
diff --git a/ioengines.h b/ioengines.h
index b3f755b4..acdb0071 100644
--- a/ioengines.h
+++ b/ioengines.h
@@ -8,7 +8,7 @@
 #include "io_u.h"
 #include "zbd_types.h"
 
-#define FIO_IOOPS_VERSION	30
+#define FIO_IOOPS_VERSION	31
 
 #ifndef CONFIG_DYNAMIC_ENGINES
 #define FIO_STATIC	static
diff --git a/iolog.c b/iolog.c
index a2cf0c1c..724ec1fe 100644
--- a/iolog.c
+++ b/iolog.c
@@ -402,6 +402,7 @@ static bool read_iolog2(struct thread_data *td)
 	enum fio_ddir rw;
 	bool realloc = false;
 	int64_t items_to_fetch = 0;
+	int syncs;
 
 	if (td->o.read_iolog_chunked) {
 		items_to_fetch = iolog_items_to_fetch(td);
@@ -417,7 +418,7 @@ static bool read_iolog2(struct thread_data *td)
 	rfname = fname = malloc(256+16);
 	act = malloc(256+16);
 
-	reads = writes = waits = 0;
+	syncs = reads = writes = waits = 0;
 	while ((p = fgets(str, 4096, td->io_log_rfile)) != NULL) {
 		struct io_piece *ipo;
 		int r;
@@ -492,7 +493,9 @@ static bool read_iolog2(struct thread_data *td)
 				continue;
 			waits++;
 		} else if (rw == DDIR_INVAL) {
-		} else if (!ddir_sync(rw)) {
+		} else if (ddir_sync(rw)) {
+			syncs++;
+		} else {
 			log_err("bad ddir: %d\n", rw);
 			continue;
 		}
@@ -547,6 +550,8 @@ static bool read_iolog2(struct thread_data *td)
 			" read-only\n", td->o.name, writes);
 		writes = 0;
 	}
+	if (syncs)
+		td->flags |= TD_F_SYNCS;
 
 	if (td->o.read_iolog_chunked) {
 		if (td->io_log_current == 0) {

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-02-25 13:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-02-25 13:00 UTC (permalink / raw)
  To: fio

The following changes since commit c377f4f85943e5b155b3daaab1ce5213077531d8:

  io_uring: use syscall helpers for the hot path (2022-02-21 09:43:48 -0700)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to cf2511565f40be1b78b3fc1194e823baf305f0a0:

  Merge branch 'master' of https://github.com/bvanassche/fio (2022-02-24 12:40:19 -0700)

----------------------------------------------------------------
Bart Van Assche (1):
      Fix three compiler warnings

Jens Axboe (1):
      Merge branch 'master' of https://github.com/bvanassche/fio

 engines/cmdprio.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

---

Diff of recent changes:

diff --git a/engines/cmdprio.c b/engines/cmdprio.c
index dd358754..979a81b6 100644
--- a/engines/cmdprio.c
+++ b/engines/cmdprio.c
@@ -319,7 +319,7 @@ static int fio_cmdprio_gen_perc(struct thread_data *td, struct cmdprio *cmdprio)
 {
 	struct cmdprio_options *options = cmdprio->options;
 	struct cmdprio_prio *prio;
-	struct cmdprio_values values[CMDPRIO_RWDIR_CNT] = {0};
+	struct cmdprio_values values[CMDPRIO_RWDIR_CNT] = {};
 	struct thread_stat *ts = &td->ts;
 	enum fio_ddir ddir;
 	int ret;
@@ -368,8 +368,8 @@ static int fio_cmdprio_parse_and_gen_bssplit(struct thread_data *td,
 					     struct cmdprio *cmdprio)
 {
 	struct cmdprio_options *options = cmdprio->options;
-	struct cmdprio_parse_result parse_res[CMDPRIO_RWDIR_CNT] = {0};
-	struct cmdprio_values values[CMDPRIO_RWDIR_CNT] = {0};
+	struct cmdprio_parse_result parse_res[CMDPRIO_RWDIR_CNT] = {};
+	struct cmdprio_values values[CMDPRIO_RWDIR_CNT] = {};
 	struct thread_stat *ts = &td->ts;
 	int ret, implicit_cmdprio;
 	enum fio_ddir ddir;

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-02-22 13:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-02-22 13:00 UTC (permalink / raw)
  To: fio

The following changes since commit 3f43022d4021850905886e391ec68c02c99aec5a:

  Merge branch 'genfio-tempfile' of https://github.com/scop/fio (2022-02-20 12:39:11 -0700)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to c377f4f85943e5b155b3daaab1ce5213077531d8:

  io_uring: use syscall helpers for the hot path (2022-02-21 09:43:48 -0700)

----------------------------------------------------------------
Jens Axboe (3):
      aarch64: add system call definitions
      x86-64: add system call definitions
      io_uring: use syscall helpers for the hot path

 arch/arch-aarch64.h |  77 +++++++++++++++++++++++++++++++++++
 arch/arch-x86_64.h  | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 engines/io_uring.c  |   5 +++
 t/io_uring.c        |   5 +++
 4 files changed, 200 insertions(+)

---

Diff of recent changes:

diff --git a/arch/arch-aarch64.h b/arch/arch-aarch64.h
index 94571709..951d1718 100644
--- a/arch/arch-aarch64.h
+++ b/arch/arch-aarch64.h
@@ -44,4 +44,81 @@ static inline int arch_init(char *envp[])
 	return 0;
 }
 
+#define __do_syscallN(...) ({						\
+	__asm__ volatile (						\
+		"svc 0"							\
+		: "=r"(x0)						\
+		: __VA_ARGS__						\
+		: "memory", "cc");					\
+	(long) x0;							\
+})
+
+#define __do_syscall0(__n) ({						\
+	register long x8 __asm__("x8") = __n;				\
+	register long x0 __asm__("x0");					\
+									\
+	__do_syscallN("r" (x8));					\
+})
+
+#define __do_syscall1(__n, __a) ({					\
+	register long x8 __asm__("x8") = __n;				\
+	register __typeof__(__a) x0 __asm__("x0") = __a;		\
+									\
+	__do_syscallN("r" (x8), "0" (x0));				\
+})
+
+#define __do_syscall2(__n, __a, __b) ({					\
+	register long x8 __asm__("x8") = __n;				\
+	register __typeof__(__a) x0 __asm__("x0") = __a;		\
+	register __typeof__(__b) x1 __asm__("x1") = __b;		\
+									\
+	__do_syscallN("r" (x8), "0" (x0), "r" (x1));			\
+})
+
+#define __do_syscall3(__n, __a, __b, __c) ({				\
+	register long x8 __asm__("x8") = __n;				\
+	register __typeof__(__a) x0 __asm__("x0") = __a;		\
+	register __typeof__(__b) x1 __asm__("x1") = __b;		\
+	register __typeof__(__c) x2 __asm__("x2") = __c;		\
+									\
+	__do_syscallN("r" (x8), "0" (x0), "r" (x1), "r" (x2));		\
+})
+
+#define __do_syscall4(__n, __a, __b, __c, __d) ({			\
+	register long x8 __asm__("x8") = __n;				\
+	register __typeof__(__a) x0 __asm__("x0") = __a;		\
+	register __typeof__(__b) x1 __asm__("x1") = __b;		\
+	register __typeof__(__c) x2 __asm__("x2") = __c;		\
+	register __typeof__(__d) x3 __asm__("x3") = __d;		\
+									\
+	__do_syscallN("r" (x8), "0" (x0), "r" (x1), "r" (x2), "r" (x3));\
+})
+
+#define __do_syscall5(__n, __a, __b, __c, __d, __e) ({			\
+	register long x8 __asm__("x8") = __n;				\
+	register __typeof__(__a) x0 __asm__("x0") = __a;		\
+	register __typeof__(__b) x1 __asm__("x1") = __b;		\
+	register __typeof__(__c) x2 __asm__("x2") = __c;		\
+	register __typeof__(__d) x3 __asm__("x3") = __d;		\
+	register __typeof__(__e) x4 __asm__("x4") = __e;		\
+									\
+	__do_syscallN("r" (x8), "0" (x0), "r" (x1), "r" (x2), "r" (x3),	\
+			"r"(x4));					\
+})
+
+#define __do_syscall6(__n, __a, __b, __c, __d, __e, __f) ({		\
+	register long x8 __asm__("x8") = __n;				\
+	register __typeof__(__a) x0 __asm__("x0") = __a;		\
+	register __typeof__(__b) x1 __asm__("x1") = __b;		\
+	register __typeof__(__c) x2 __asm__("x2") = __c;		\
+	register __typeof__(__d) x3 __asm__("x3") = __d;		\
+	register __typeof__(__e) x4 __asm__("x4") = __e;		\
+	register __typeof__(__f) x5 __asm__("x5") = __f;		\
+									\
+	__do_syscallN("r" (x8), "0" (x0), "r" (x1), "r" (x2), "r" (x3),	\
+			"r" (x4), "r"(x5));				\
+})
+
+#define FIO_ARCH_HAS_SYSCALL
+
 #endif
diff --git a/arch/arch-x86_64.h b/arch/arch-x86_64.h
index 25850f90..86ce1b7e 100644
--- a/arch/arch-x86_64.h
+++ b/arch/arch-x86_64.h
@@ -68,4 +68,117 @@ static inline int arch_rand_seed(unsigned long *seed)
 	return 0;
 }
 
+#define __do_syscall0(NUM) ({			\
+	intptr_t rax;				\
+						\
+	__asm__ volatile(			\
+		"syscall"			\
+		: "=a"(rax)	/* %rax */	\
+		: "a"(NUM)	/* %rax */	\
+		: "rcx", "r11", "memory"	\
+	);					\
+	rax;					\
+})
+
+#define __do_syscall1(NUM, ARG1) ({		\
+	intptr_t rax;				\
+						\
+	__asm__ volatile(			\
+		"syscall"			\
+		: "=a"(rax)	/* %rax */	\
+		: "a"((NUM)),	/* %rax */	\
+		  "D"((ARG1))	/* %rdi */	\
+		: "rcx", "r11", "memory"	\
+	);					\
+	rax;					\
+})
+
+#define __do_syscall2(NUM, ARG1, ARG2) ({	\
+	intptr_t rax;				\
+						\
+	__asm__ volatile(			\
+		"syscall"			\
+		: "=a"(rax)	/* %rax */	\
+		: "a"((NUM)),	/* %rax */	\
+		  "D"((ARG1)),	/* %rdi */	\
+		  "S"((ARG2))	/* %rsi */	\
+		: "rcx", "r11", "memory"	\
+	);					\
+	rax;					\
+})
+
+#define __do_syscall3(NUM, ARG1, ARG2, ARG3) ({	\
+	intptr_t rax;				\
+						\
+	__asm__ volatile(			\
+		"syscall"			\
+		: "=a"(rax)	/* %rax */	\
+		: "a"((NUM)),	/* %rax */	\
+		  "D"((ARG1)),	/* %rdi */	\
+		  "S"((ARG2)),	/* %rsi */	\
+		  "d"((ARG3))	/* %rdx */	\
+		: "rcx", "r11", "memory"	\
+	);					\
+	rax;					\
+})
+
+#define __do_syscall4(NUM, ARG1, ARG2, ARG3, ARG4) ({			\
+	intptr_t rax;							\
+	register __typeof__(ARG4) __r10 __asm__("r10") = (ARG4);	\
+									\
+	__asm__ volatile(						\
+		"syscall"						\
+		: "=a"(rax)	/* %rax */				\
+		: "a"((NUM)),	/* %rax */				\
+		  "D"((ARG1)),	/* %rdi */				\
+		  "S"((ARG2)),	/* %rsi */				\
+		  "d"((ARG3)),	/* %rdx */				\
+		  "r"(__r10)	/* %r10 */				\
+		: "rcx", "r11", "memory"				\
+	);								\
+	rax;								\
+})
+
+#define __do_syscall5(NUM, ARG1, ARG2, ARG3, ARG4, ARG5) ({		\
+	intptr_t rax;							\
+	register __typeof__(ARG4) __r10 __asm__("r10") = (ARG4);	\
+	register __typeof__(ARG5) __r8 __asm__("r8") = (ARG5);		\
+									\
+	__asm__ volatile(						\
+		"syscall"						\
+		: "=a"(rax)	/* %rax */				\
+		: "a"((NUM)),	/* %rax */				\
+		  "D"((ARG1)),	/* %rdi */				\
+		  "S"((ARG2)),	/* %rsi */				\
+		  "d"((ARG3)),	/* %rdx */				\
+		  "r"(__r10),	/* %r10 */				\
+		  "r"(__r8)	/* %r8 */				\
+		: "rcx", "r11", "memory"				\
+	);								\
+	rax;								\
+})
+
+#define __do_syscall6(NUM, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6) ({	\
+	intptr_t rax;							\
+	register __typeof__(ARG4) __r10 __asm__("r10") = (ARG4);	\
+	register __typeof__(ARG5) __r8 __asm__("r8") = (ARG5);		\
+	register __typeof__(ARG6) __r9 __asm__("r9") = (ARG6);		\
+									\
+	__asm__ volatile(						\
+		"syscall"						\
+		: "=a"(rax)	/* %rax */				\
+		: "a"((NUM)),	/* %rax */				\
+		  "D"((ARG1)),	/* %rdi */				\
+		  "S"((ARG2)),	/* %rsi */				\
+		  "d"((ARG3)),	/* %rdx */				\
+		  "r"(__r10),	/* %r10 */				\
+		  "r"(__r8),	/* %r8 */				\
+		  "r"(__r9)	/* %r9 */				\
+		: "rcx", "r11", "memory"				\
+	);								\
+	rax;								\
+})
+
+#define FIO_ARCH_HAS_SYSCALL
+
 #endif
diff --git a/engines/io_uring.c b/engines/io_uring.c
index a2533c88..1e15647e 100644
--- a/engines/io_uring.c
+++ b/engines/io_uring.c
@@ -278,8 +278,13 @@ static struct fio_option options[] = {
 static int io_uring_enter(struct ioring_data *ld, unsigned int to_submit,
 			 unsigned int min_complete, unsigned int flags)
 {
+#ifdef FIO_ARCH_HAS_SYSCALL
+	return __do_syscall6(__NR_io_uring_enter, ld->ring_fd, to_submit,
+				min_complete, flags, NULL, 0);
+#else
 	return syscall(__NR_io_uring_enter, ld->ring_fd, to_submit,
 			min_complete, flags, NULL, 0);
+#endif
 }
 
 static int fio_ioring_prep(struct thread_data *td, struct io_u *io_u)
diff --git a/t/io_uring.c b/t/io_uring.c
index f513d7dc..b8fcffe8 100644
--- a/t/io_uring.c
+++ b/t/io_uring.c
@@ -422,8 +422,13 @@ out:
 static int io_uring_enter(struct submitter *s, unsigned int to_submit,
 			  unsigned int min_complete, unsigned int flags)
 {
+#ifdef FIO_ARCH_HAS_SYSCALL
+	return __do_syscall6(__NR_io_uring_enter, s->ring_fd, to_submit,
+				min_complete, flags, NULL, 0);
+#else
 	return syscall(__NR_io_uring_enter, s->ring_fd, to_submit, min_complete,
 			flags, NULL, 0);
+#endif
 }
 
 #ifndef CONFIG_HAVE_GETTID

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-02-21 13:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-02-21 13:00 UTC (permalink / raw)
  To: fio

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain, Size: 29560 bytes --]

The following changes since commit 933651ec130ce4d27a5c249d649d20afeb2bdf38:

  Merge branch 'rpma-update-RPMA-engines-with-new-librpma-completions-API' of https://github.com/ldorau/fio (2022-02-18 09:02:03 -0700)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 3f43022d4021850905886e391ec68c02c99aec5a:

  Merge branch 'genfio-tempfile' of https://github.com/scop/fio (2022-02-20 12:39:11 -0700)

----------------------------------------------------------------
Jens Axboe (3):
      Merge branch 'which-command-v-type-P' of https://github.com/scop/fio
      Merge branch 'spelling' of https://github.com/scop/fio
      Merge branch 'genfio-tempfile' of https://github.com/scop/fio

Ville Skyttä (3):
      genfio: fix temporary file handling
      ci, t, tools: use `command` and `type` instead of `which`
      Spelling and grammar fixes

 HOWTO.rst                           | 4 ++--
 ci/travis-install-pmdk.sh           | 2 +-
 crc/xxhash.c                        | 4 ++--
 engines/exec.c                      | 4 ++--
 engines/http.c                      | 4 ++--
 engines/ime.c                       | 2 +-
 engines/libhdfs.c                   | 2 +-
 engines/librpma_fio.c               | 2 +-
 engines/librpma_gpspm.c             | 2 +-
 engines/nbd.c                       | 2 +-
 engines/rados.c                     | 2 +-
 engines/rbd.c                       | 4 ++--
 engines/rdma.c                      | 2 +-
 examples/enospc-pressure.fio        | 4 ++--
 examples/falloc.fio                 | 2 +-
 examples/librpma_apm-server.fio     | 2 +-
 examples/librpma_gpspm-server.fio   | 2 +-
 examples/rand-zones.fio             | 2 +-
 filesetup.c                         | 2 +-
 fio.1                               | 4 ++--
 graph.c                             | 2 +-
 lib/pattern.c                       | 6 +++---
 options.c                           | 4 ++--
 os/os-android.h                     | 2 +-
 os/os-netbsd.h                      | 2 +-
 os/windows/posix.c                  | 2 +-
 oslib/libmtd.h                      | 6 +++---
 stat.c                              | 2 +-
 stat.h                              | 2 +-
 t/latency_percentiles.py            | 2 +-
 t/one-core-peak.sh                  | 6 +++---
 t/readonly.py                       | 2 +-
 t/sgunmap-test.py                   | 2 +-
 t/steadystate_tests.py              | 2 +-
 t/time-test.c                       | 2 +-
 tools/fio_generate_plots            | 2 +-
 tools/fio_jsonplus_clat2csv         | 4 ++--
 tools/fiograph/fiograph.py          | 2 +-
 tools/genfio                        | 5 +++--
 tools/hist/fio-histo-log-pctiles.py | 2 +-
 tools/plot/fio2gnuplot              | 4 ++--
 tools/plot/fio2gnuplot.1            | 2 +-
 tools/plot/fio2gnuplot.manpage      | 2 +-
 43 files changed, 61 insertions(+), 60 deletions(-)

---

Diff of recent changes:

diff --git a/HOWTO.rst b/HOWTO.rst
index ac1f3478..0978879c 100644
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -1443,7 +1443,7 @@ I/O type
 	range of possible random values.
 	Defaults are: random for **pareto** and **zipf**, and 0.5 for **normal**.
 	If you wanted to use **zipf** with a `theta` of 1.2 centered on 1/4 of allowed value range,
-	you would use ``random_distibution=zipf:1.2:0.25``.
+	you would use ``random_distribution=zipf:1.2:0.25``.
 
 	For a **zoned** distribution, fio supports specifying percentages of I/O
 	access that should fall within what range of the file or device. For
@@ -3370,7 +3370,7 @@ Verification
 	To avoid false verification errors, do not use the norandommap option when
 	verifying data with async I/O engines and I/O depths > 1.  Or use the
 	norandommap and the lfsr random generator together to avoid writing to the
-	same offset with muliple outstanding I/Os.
+	same offset with multiple outstanding I/Os.
 
 .. option:: verify_offset=int
 
diff --git a/ci/travis-install-pmdk.sh b/ci/travis-install-pmdk.sh
index 803438f8..3b0b5bbc 100755
--- a/ci/travis-install-pmdk.sh
+++ b/ci/travis-install-pmdk.sh
@@ -12,7 +12,7 @@ WORKDIR=$(pwd)
 #    /bin/sh: 1: clang: not found
 # if CC is not set to the full path of clang.
 #
-export CC=$(which $CC)
+export CC=$(type -P $CC)
 
 # Install PMDK libraries, because PMDK's libpmem
 # is a dependency of the librpma fio engine.
diff --git a/crc/xxhash.c b/crc/xxhash.c
index 4736c528..0119564b 100644
--- a/crc/xxhash.c
+++ b/crc/xxhash.c
@@ -50,10 +50,10 @@ You can contact the author at :
 //#define XXH_ACCEPT_NULL_INPUT_POINTER 1
 
 // XXH_FORCE_NATIVE_FORMAT :
-// By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
+// By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
 // Results are therefore identical for little-endian and big-endian CPU.
 // This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
-// Should endian-independance be of no importance for your application, you may set the #define below to 1.
+// Should endian-independence be of no importance for your application, you may set the #define below to 1.
 // It will improve speed for Big-endian CPU.
 // This option has no impact on Little_Endian CPU.
 #define XXH_FORCE_NATIVE_FORMAT 0
diff --git a/engines/exec.c b/engines/exec.c
index ab3639c5..20e50e00 100644
--- a/engines/exec.c
+++ b/engines/exec.c
@@ -67,8 +67,8 @@ char *str_replace(char *orig, const char *rep, const char *with)
 	/*
 	 * Replace a substring by another.
 	 *
-	 * Returns the new string if occurences were found
-	 * Returns orig if no occurence is found
+	 * Returns the new string if occurrences were found
+	 * Returns orig if no occurrence is found
 	 */
 	char *result, *insert, *tmp;
 	int len_rep, len_with, len_front, count;
diff --git a/engines/http.c b/engines/http.c
index 35c44871..57d4967d 100644
--- a/engines/http.c
+++ b/engines/http.c
@@ -388,7 +388,7 @@ static void _add_aws_auth_header(CURL *curl, struct curl_slist *slist, struct ht
 
 	signature = _conv_hex(md, SHA256_DIGEST_LENGTH);
 
-	/* Surpress automatic Accept: header */
+	/* Suppress automatic Accept: header */
 	slist = curl_slist_append(slist, "Accept:");
 
 	snprintf(s, sizeof(s), "x-amz-content-sha256: %s", dsha);
@@ -419,7 +419,7 @@ static void _add_swift_header(CURL *curl, struct curl_slist *slist, struct http_
 	if (op == DDIR_WRITE) {
 		dsha = _gen_hex_md5(buf, len);
 	}
-	/* Surpress automatic Accept: header */
+	/* Suppress automatic Accept: header */
 	slist = curl_slist_append(slist, "Accept:");
 
 	snprintf(s, sizeof(s), "etag: %s", dsha);
diff --git a/engines/ime.c b/engines/ime.c
index 440cc29e..f6690cc1 100644
--- a/engines/ime.c
+++ b/engines/ime.c
@@ -83,7 +83,7 @@ struct ime_data {
 	};
 	struct iovec 	*iovecs;		/* array of queued iovecs */
 	struct io_u 	**io_us;		/* array of queued io_u pointers */
-	struct io_u 	**event_io_us;	/* array of the events retieved afer get_events*/
+	struct io_u 	**event_io_us;	/* array of the events retrieved after get_events*/
 	unsigned int 	queued;			/* iovecs/io_us in the queue */
 	unsigned int 	events;			/* number of committed iovecs/io_us */
 
diff --git a/engines/libhdfs.c b/engines/libhdfs.c
index eb55c3c5..f20e45ca 100644
--- a/engines/libhdfs.c
+++ b/engines/libhdfs.c
@@ -27,7 +27,7 @@ struct hdfsio_data {
 };
 
 struct hdfsio_options {
-	void *pad;			/* needed because offset can't be 0 for a option defined used offsetof */
+	void *pad;			/* needed because offset can't be 0 for an option defined used offsetof */
 	char *host;
 	char *directory;
 	unsigned int port;
diff --git a/engines/librpma_fio.c b/engines/librpma_fio.c
index dfd82180..34818904 100644
--- a/engines/librpma_fio.c
+++ b/engines/librpma_fio.c
@@ -426,7 +426,7 @@ int librpma_fio_client_post_init(struct thread_data *td)
 
 	/*
 	 * td->orig_buffer is not aligned. The engine requires aligned io_us
-	 * so FIO alignes up the address using the formula below.
+	 * so FIO aligns up the address using the formula below.
 	 */
 	ccd->orig_buffer_aligned = PTR_ALIGN(td->orig_buffer, page_mask) +
 			td->o.mem_align;
diff --git a/engines/librpma_gpspm.c b/engines/librpma_gpspm.c
index 14626e7f..5cf97472 100644
--- a/engines/librpma_gpspm.c
+++ b/engines/librpma_gpspm.c
@@ -431,7 +431,7 @@ static int server_post_init(struct thread_data *td)
 
 	/*
 	 * td->orig_buffer is not aligned. The engine requires aligned io_us
-	 * so FIO alignes up the address using the formula below.
+	 * so FIO aligns up the address using the formula below.
 	 */
 	sd->orig_buffer_aligned = PTR_ALIGN(td->orig_buffer, page_mask) +
 			td->o.mem_align;
diff --git a/engines/nbd.c b/engines/nbd.c
index b0ba75e6..7c2d5f4b 100644
--- a/engines/nbd.c
+++ b/engines/nbd.c
@@ -52,7 +52,7 @@ static struct fio_option options[] = {
 	},
 };
 
-/* Alocates nbd_data. */
+/* Allocates nbd_data. */
 static int nbd_setup(struct thread_data *td)
 {
 	struct nbd_data *nbd_data;
diff --git a/engines/rados.c b/engines/rados.c
index 23e62c4c..976f9229 100644
--- a/engines/rados.c
+++ b/engines/rados.c
@@ -151,7 +151,7 @@ static int _fio_rados_connect(struct thread_data *td)
 		char *client_name = NULL;
 
 		/*
-		* If we specify cluser name, the rados_create2
+		* If we specify cluster name, the rados_create2
 		* will not assume 'client.'. name is considered
 		* as a full type.id namestr
 		*/
diff --git a/engines/rbd.c b/engines/rbd.c
index c6203d4c..2f25889a 100644
--- a/engines/rbd.c
+++ b/engines/rbd.c
@@ -173,7 +173,7 @@ static int _fio_rbd_connect(struct thread_data *td)
 		char *client_name = NULL; 
 
 		/*
-		 * If we specify cluser name, the rados_create2
+		 * If we specify cluster name, the rados_create2
 		 * will not assume 'client.'. name is considered
 		 * as a full type.id namestr
 		 */
@@ -633,7 +633,7 @@ static int fio_rbd_setup(struct thread_data *td)
 
 	/* taken from "net" engine. Pretend we deal with files,
 	 * even if we do not have any ideas about files.
-	 * The size of the RBD is set instead of a artificial file.
+	 * The size of the RBD is set instead of an artificial file.
 	 */
 	if (!td->files_index) {
 		add_file(td, td->o.filename ? : "rbd", 0, 0);
diff --git a/engines/rdma.c b/engines/rdma.c
index f4471869..4eb86652 100644
--- a/engines/rdma.c
+++ b/engines/rdma.c
@@ -1194,7 +1194,7 @@ static int check_set_rlimits(struct thread_data *td)
 
 static int compat_options(struct thread_data *td)
 {
-	// The original RDMA engine had an ugly / seperator
+	// The original RDMA engine had an ugly / separator
 	// on the filename for it's options. This function
 	// retains backwards compatibility with it. Note we do not
 	// support setting the bindname option is this legacy mode.
diff --git a/examples/enospc-pressure.fio b/examples/enospc-pressure.fio
index ca9d8f7a..fa404fd5 100644
--- a/examples/enospc-pressure.fio
+++ b/examples/enospc-pressure.fio
@@ -35,8 +35,8 @@ bs=4k
 rw=randtrim
 filename=raicer
 
-# Verifier thread continiously write to newly allcated blocks
-# and veryfy written content
+# Verifier thread continuously writes to newly allcated blocks
+# and verifies written content
 [aio-dio-verifier]
 create_on_open=1
 verify=crc32c-intel
diff --git a/examples/falloc.fio b/examples/falloc.fio
index fadf1321..5a3e88b8 100644
--- a/examples/falloc.fio
+++ b/examples/falloc.fio
@@ -29,7 +29,7 @@ rw=randtrim
 numjobs=2
 filename=fragmented_file
 
-## Mesure IO performance on fragmented file
+## Measure IO performance on fragmented file
 [sequential aio-dio write]
 stonewall
 ioengine=libaio
diff --git a/examples/librpma_apm-server.fio b/examples/librpma_apm-server.fio
index 062b5215..dc1ddba2 100644
--- a/examples/librpma_apm-server.fio
+++ b/examples/librpma_apm-server.fio
@@ -20,7 +20,7 @@ thread
 # (https://pmem.io/rpma/documentation/basic-direct-write-to-pmem.html)
 direct_write_to_pmem=0
 
-numjobs=1 # number of expected incomming connections
+numjobs=1 # number of expected incoming connections
 size=100MiB # size of workspace for a single connection
 filename=malloc # device dax or an existing fsdax file or "malloc" for allocation from DRAM
 # filename=/dev/dax1.0
diff --git a/examples/librpma_gpspm-server.fio b/examples/librpma_gpspm-server.fio
index 67e92a28..4555314f 100644
--- a/examples/librpma_gpspm-server.fio
+++ b/examples/librpma_gpspm-server.fio
@@ -22,7 +22,7 @@ thread
 direct_write_to_pmem=0
 # set to 0 (false) to wait for completion instead of busy-wait polling completion.
 busy_wait_polling=1
-numjobs=1 # number of expected incomming connections
+numjobs=1 # number of expected incoming connections
 iodepth=2 # number of parallel GPSPM requests
 size=100MiB # size of workspace for a single connection
 filename=malloc # device dax or an existing fsdax file or "malloc" for allocation from DRAM
diff --git a/examples/rand-zones.fio b/examples/rand-zones.fio
index 169137d4..10e71727 100644
--- a/examples/rand-zones.fio
+++ b/examples/rand-zones.fio
@@ -21,6 +21,6 @@ random_distribution=zoned:50/5:30/15:20/
 # The above applies to all of reads/writes/trims. If we wanted to do
 # something differently for writes, let's say 50% for the first 10%
 # and 50% for the remaining 90%, we could do it by adding a new section
-# after a a comma.
+# after a comma.
 
 # random_distribution=zoned:50/5:30/15:20/,50/10:50/90
diff --git a/filesetup.c b/filesetup.c
index fb556d84..7c32d0af 100644
--- a/filesetup.c
+++ b/filesetup.c
@@ -1486,7 +1486,7 @@ static bool init_rand_distribution(struct thread_data *td)
 
 /*
  * Check if the number of blocks exceeds the randomness capability of
- * the selected generator. Tausworthe is 32-bit, the others are fullly
+ * the selected generator. Tausworthe is 32-bit, the others are fully
  * 64-bit capable.
  */
 static int check_rand_gen_limits(struct thread_data *td, struct fio_file *f,
diff --git a/fio.1 b/fio.1
index e23d4092..98410655 100644
--- a/fio.1
+++ b/fio.1
@@ -1221,7 +1221,7 @@ more control over most probable outcome. This value is in range [0-1] which maps
 range of possible random values.
 Defaults are: random for \fBpareto\fR and \fBzipf\fR, and 0.5 for \fBnormal\fR.
 If you wanted to use \fBzipf\fR with a `theta` of 1.2 centered on 1/4 of allowed value range,
-you would use `random_distibution=zipf:1.2:0.25`.
+you would use `random_distribution=zipf:1.2:0.25`.
 .P
 For a \fBzoned\fR distribution, fio supports specifying percentages of I/O
 access that should fall within what range of the file or device. For
@@ -3082,7 +3082,7 @@ the verify will be of the newly written data.
 To avoid false verification errors, do not use the norandommap option when
 verifying data with async I/O engines and I/O depths > 1.  Or use the
 norandommap and the lfsr random generator together to avoid writing to the
-same offset with muliple outstanding I/Os.
+same offset with multiple outstanding I/Os.
 .RE
 .TP
 .BI verify_offset \fR=\fPint
diff --git a/graph.c b/graph.c
index 7a174170..c49cdae1 100644
--- a/graph.c
+++ b/graph.c
@@ -999,7 +999,7 @@ const char *graph_find_tooltip(struct graph *g, int ix, int iy)
 				ydiff = fabs(yval - y);
 
 				/*
-				 * zero delta, or within or match critera, break
+				 * zero delta, or within or match criteria, break
 				 */
 				if (ydiff < best_delta) {
 					best_delta = ydiff;
diff --git a/lib/pattern.c b/lib/pattern.c
index 680a12be..d8203630 100644
--- a/lib/pattern.c
+++ b/lib/pattern.c
@@ -211,7 +211,7 @@ static const char *parse_number(const char *beg, char *out,
  * This function tries to find formats, e.g.:
  *   %o - offset of the block
  *
- * In case of successfull parsing it fills the format param
+ * In case of successful parsing it fills the format param
  * with proper offset and the size of the expected value, which
  * should be pasted into buffer using the format 'func' callback.
  *
@@ -267,7 +267,7 @@ static const char *parse_format(const char *in, char *out, unsigned int parsed,
  * @fmt_desc - array of pattern format descriptors [input]
  * @fmt - array of pattern formats [output]
  * @fmt_sz - pointer where the size of pattern formats array stored [input],
- *           after successfull parsing this pointer will contain the number
+ *           after successful parsing this pointer will contain the number
  *           of parsed formats if any [output].
  *
  * strings:
@@ -275,7 +275,7 @@ static const char *parse_format(const char *in, char *out, unsigned int parsed,
  *   NOTE: there is no way to escape quote, so "123\"abc" does not work.
  *
  * numbers:
- *   hexidecimal - sequence of hex bytes starting from 0x or 0X prefix,
+ *   hexadecimal - sequence of hex bytes starting from 0x or 0X prefix,
  *                 e.g. 0xff12ceff1100ff
  *   decimal     - decimal number in range [INT_MIN, INT_MAX]
  *
diff --git a/options.c b/options.c
index 6cdbd268..e06d9b66 100644
--- a/options.c
+++ b/options.c
@@ -1366,7 +1366,7 @@ int get_max_str_idx(char *input)
 }
 
 /*
- * Returns the directory at the index, indexes > entires will be
+ * Returns the directory at the index, indexes > entries will be
  * assigned via modulo division of the index
  */
 int set_name_idx(char *target, size_t tlen, char *input, int index,
@@ -1560,7 +1560,7 @@ static int str_gtod_reduce_cb(void *data, int *il)
 	int val = *il;
 
 	/*
-	 * Only modfiy options if gtod_reduce==1
+	 * Only modify options if gtod_reduce==1
 	 * Otherwise leave settings alone.
 	 */
 	if (val) {
diff --git a/os/os-android.h b/os/os-android.h
index 10c51b83..2f73d249 100644
--- a/os/os-android.h
+++ b/os/os-android.h
@@ -66,7 +66,7 @@
 
 #ifndef CONFIG_NO_SHM
 /*
- * Bionic doesn't support SysV shared memeory, so implement it using ashmem
+ * Bionic doesn't support SysV shared memory, so implement it using ashmem
  */
 #include <stdio.h>
 #include <linux/ashmem.h>
diff --git a/os/os-netbsd.h b/os/os-netbsd.h
index 624c7fa5..b553a430 100644
--- a/os/os-netbsd.h
+++ b/os/os-netbsd.h
@@ -13,7 +13,7 @@
 #include <sys/endian.h>
 #include <sys/sysctl.h>
 
-/* XXX hack to avoid confilcts between rbtree.h and <sys/rbtree.h> */
+/* XXX hack to avoid conflicts between rbtree.h and <sys/rbtree.h> */
 #undef rb_node
 #undef rb_left
 #undef rb_right
diff --git a/os/windows/posix.c b/os/windows/posix.c
index 0d415e1e..a3a6c89f 100644
--- a/os/windows/posix.c
+++ b/os/windows/posix.c
@@ -1165,7 +1165,7 @@ HANDLE windows_handle_connection(HANDLE hjob, int sk)
 		ret = pi.hProcess;
 
 	/* duplicate socket and write the protocol_info to pipe so child can
-	 * duplicate the communciation socket */
+	 * duplicate the communication socket */
 	if (WSADuplicateSocket(sk, GetProcessId(pi.hProcess), &protocol_info)) {
 		log_err("WSADuplicateSocket failed (%lu).\n", GetLastError());
 		ret = INVALID_HANDLE_VALUE;
diff --git a/oslib/libmtd.h b/oslib/libmtd.h
index a0c90dcb..668e7798 100644
--- a/oslib/libmtd.h
+++ b/oslib/libmtd.h
@@ -256,7 +256,7 @@ int mtd_mark_bad(const struct mtd_dev_info *mtd, int fd, int eb);
  * @mtd: MTD device description object
  * @fd: MTD device node file descriptor
  * @eb: eraseblock to read from
- * @offs: offset withing the eraseblock to read from
+ * @offs: offset within the eraseblock to read from
  * @buf: buffer to read data to
  * @len: how many bytes to read
  *
@@ -273,7 +273,7 @@ int mtd_read(const struct mtd_dev_info *mtd, int fd, int eb, int offs,
  * @mtd: MTD device description object
  * @fd: MTD device node file descriptor
  * @eb: eraseblock to write to
- * @offs: offset withing the eraseblock to write to
+ * @offs: offset within the eraseblock to write to
  * @data: data buffer to write
  * @len: how many data bytes to write
  * @oob: OOB buffer to write
@@ -329,7 +329,7 @@ int mtd_write_oob(libmtd_t desc, const struct mtd_dev_info *mtd, int fd,
  * @mtd: MTD device description object
  * @fd: MTD device node file descriptor
  * @eb: eraseblock to write to
- * @offs: offset withing the eraseblock to write to
+ * @offs: offset within the eraseblock to write to
  * @img_name: the file to write
  *
  * This function writes an image @img_name the MTD device defined by @mtd. @eb
diff --git a/stat.c b/stat.c
index 1764eebc..7947edb4 100644
--- a/stat.c
+++ b/stat.c
@@ -377,7 +377,7 @@ void show_group_stats(struct group_run_stats *rs, struct buf_output *out)
 		free(maxalt);
 	}
 
-	/* Need to aggregate statisitics to show mixed values */
+	/* Need to aggregate statistics to show mixed values */
 	if (rs->unified_rw_rep == UNIFIED_BOTH)
 		show_mixed_group_stats(rs, out);
 }
diff --git a/stat.h b/stat.h
index dce0bb0d..eb7845af 100644
--- a/stat.h
+++ b/stat.h
@@ -68,7 +68,7 @@ struct group_run_stats {
  * than one. This method has low accuracy when the value is small. For
  * example, let the buckets be {[0,99],[100,199],...,[900,999]}, and
  * the represented value of each bucket be the mean of the range. Then
- * a value 0 has an round-off error of 49.5. To improve on this, we
+ * a value 0 has a round-off error of 49.5. To improve on this, we
  * use buckets with non-uniform ranges, while bounding the error of
  * each bucket within a ratio of the sample value. A simple example
  * would be when error_bound = 0.005, buckets are {
diff --git a/t/latency_percentiles.py b/t/latency_percentiles.py
index 9e37d9fe..81704700 100755
--- a/t/latency_percentiles.py
+++ b/t/latency_percentiles.py
@@ -270,7 +270,7 @@ class FioLatTest():
             #
             # Check only for the presence/absence of json+
             # latency bins. Future work can check the
-            # accurracy of the bin values and counts.
+            # accuracy of the bin values and counts.
             #
             # Because the latency percentiles are based on
             # the bins, we can be confident that the bin
diff --git a/t/one-core-peak.sh b/t/one-core-peak.sh
index 9da8304e..3ac119f6 100755
--- a/t/one-core-peak.sh
+++ b/t/one-core-peak.sh
@@ -33,8 +33,8 @@ check_binary() {
   # Ensure the binaries are present and executable
   for bin in "$@"; do
     if [ ! -x ${bin} ]; then
-      which ${bin} >/dev/null
-      [ $? -eq 0 ] || fatal "${bin} doesn't exists or is not executable"
+      command -v ${bin} >/dev/null
+      [ $? -eq 0 ] || fatal "${bin} doesn't exist or is not executable"
     fi
   done
 }
@@ -197,7 +197,7 @@ show_nvme() {
   fw=$(cat ${device_dir}/firmware_rev | xargs) #xargs for trimming spaces
   serial=$(cat ${device_dir}/serial | xargs) #xargs for trimming spaces
   info ${device_name} "MODEL=${model} FW=${fw} serial=${serial} PCI=${pci_addr}@${link_speed} IRQ=${irq} NUMA=${numa} CPUS=${cpus} "
-  which nvme &> /dev/null
+  command -v nvme > /dev/null
   if [ $? -eq 0 ]; then
     status=""
     NCQA=$(nvme get-feature -H -f 0x7 ${device} 2>&1 |grep NCQA |cut -d ':' -f 2 | xargs)
diff --git a/t/readonly.py b/t/readonly.py
index 464847c6..80fac639 100755
--- a/t/readonly.py
+++ b/t/readonly.py
@@ -6,7 +6,7 @@
 #
 # readonly.py
 #
-# Do some basic tests of the --readonly paramter
+# Do some basic tests of the --readonly parameter
 #
 # USAGE
 # python readonly.py [-f fio-executable]
diff --git a/t/sgunmap-test.py b/t/sgunmap-test.py
index 4960a040..6687494f 100755
--- a/t/sgunmap-test.py
+++ b/t/sgunmap-test.py
@@ -3,7 +3,7 @@
 #
 # sgunmap-test.py
 #
-# Limited functonality test for trim workloads using fio's sg ioengine
+# Limited functionality test for trim workloads using fio's sg ioengine
 # This checks only the three sets of reported iodepths
 #
 # !!!WARNING!!!
diff --git a/t/steadystate_tests.py b/t/steadystate_tests.py
index e8bd768c..d6ffd177 100755
--- a/t/steadystate_tests.py
+++ b/t/steadystate_tests.py
@@ -2,7 +2,7 @@
 #
 # steadystate_tests.py
 #
-# Test option parsing and functonality for fio's steady state detection feature.
+# Test option parsing and functionality for fio's steady state detection feature.
 #
 # steadystate_tests.py --read file-for-read-testing --write file-for-write-testing ./fio
 #
diff --git a/t/time-test.c b/t/time-test.c
index a74d9206..3c87d4d4 100644
--- a/t/time-test.c
+++ b/t/time-test.c
@@ -67,7 +67,7 @@
  *	accuracy because the (ticks * clock_mult) product used for final
  *	fractional chunk
  *
- *  iv) 64-bit arithmetic with the clock ticks to nsec conversion occuring in
+ *  iv) 64-bit arithmetic with the clock ticks to nsec conversion occurring in
  *	two stages. This is carried out using locks to update the number of
  *	large time chunks (MAX_CLOCK_SEC_2STAGE) that have elapsed.
  *
diff --git a/tools/fio_generate_plots b/tools/fio_generate_plots
index e4558788..468cf27a 100755
--- a/tools/fio_generate_plots
+++ b/tools/fio_generate_plots
@@ -21,7 +21,7 @@ if [ -z "$1" ]; then
 	exit 1
 fi
 
-GNUPLOT=$(which gnuplot)
+GNUPLOT=$(command -v gnuplot)
 if [ ! -x "$GNUPLOT" ]
 then
 	echo You need gnuplot installed to generate graphs
diff --git a/tools/fio_jsonplus_clat2csv b/tools/fio_jsonplus_clat2csv
index 7f310fcc..8fdd014d 100755
--- a/tools/fio_jsonplus_clat2csv
+++ b/tools/fio_jsonplus_clat2csv
@@ -135,7 +135,7 @@ def more_bins(indices, bins):
 
     Returns:
         True if the indices do not yet point to the end of each bin in bins.
-        False if the indices point beyond their repsective bins.
+        False if the indices point beyond their respective bins.
     """
 
     for key, value in six.iteritems(indices):
@@ -160,7 +160,7 @@ def debug_print(debug, *args):
 def get_csvfile(dest, jobnum):
     """Generate CSV filename from command-line arguments and job numbers.
 
-    Paramaters:
+    Parameters:
         dest        file specification for CSV filename.
         jobnum      job number.
 
diff --git a/tools/fiograph/fiograph.py b/tools/fiograph/fiograph.py
index b5669a2d..384decda 100755
--- a/tools/fiograph/fiograph.py
+++ b/tools/fiograph/fiograph.py
@@ -218,7 +218,7 @@ def fio_to_graphviz(filename, format):
     # The first job will be a new execution group
     new_execution_group = True
 
-    # Let's interate on all sections to create links between them
+    # Let's iterate on all sections to create links between them
     for section_name in fio_file.sections():
         # The current section
         section = fio_file[section_name]
diff --git a/tools/genfio b/tools/genfio
index 8518bbcc..c9bc2f76 100755
--- a/tools/genfio
+++ b/tools/genfio
@@ -22,7 +22,8 @@
 BLK_SIZE=
 BLOCK_SIZE=4k
 SEQ=-1
-TEMPLATE=/tmp/template.fio
+TEMPLATE=$(mktemp "${TMPDIR:-${TEMP:-/tmp}}/template.fio.XXXXXX") || exit $?
+trap 'rm -f "$TEMPLATE"' EXIT
 OUTFILE=
 DISKS=
 PRINTABLE_DISKS=
@@ -48,7 +49,7 @@ show_help() {
 					one test after another then one disk after another
 					Disabled by default
 -p				: Run parallel test
-					one test after anoter but all disks at the same time
+					one test after another but all disks at the same time
 					Enabled by default
 -D iodepth			: Run with the specified iodepth
 					Default is $IODEPTH
diff --git a/tools/hist/fio-histo-log-pctiles.py b/tools/hist/fio-histo-log-pctiles.py
index 08e7722d..b5d167de 100755
--- a/tools/hist/fio-histo-log-pctiles.py
+++ b/tools/hist/fio-histo-log-pctiles.py
@@ -748,7 +748,7 @@ if unittest2_imported:
     def test_e2_get_pctiles_highest_pct(self):
         fio_v3_bucket_count = 29 * 64
         with open(self.fn, 'w') as f:
-            # make a empty fio v3 histogram
+            # make an empty fio v3 histogram
             buckets = [ 0 for j in range(0, fio_v3_bucket_count) ]
             # add one I/O request to last bucket
             buckets[-1] = 1
diff --git a/tools/plot/fio2gnuplot b/tools/plot/fio2gnuplot
index d2dc81df..ce3ca2cc 100755
--- a/tools/plot/fio2gnuplot
+++ b/tools/plot/fio2gnuplot
@@ -492,8 +492,8 @@ def main(argv):
     #We need to adjust the output filename regarding the pattern required by the user
     if (pattern_set_by_user == True):
         gnuplot_output_filename=pattern
-        # As we do have some glob in the pattern, let's make this simpliest
-        # We do remove the simpliest parts of the expression to get a clear file name
+        # As we do have some glob in the pattern, let's make this simplest
+        # We do remove the simplest parts of the expression to get a clear file name
         gnuplot_output_filename=gnuplot_output_filename.replace('-*-','-')
         gnuplot_output_filename=gnuplot_output_filename.replace('*','-')
         gnuplot_output_filename=gnuplot_output_filename.replace('--','-')
diff --git a/tools/plot/fio2gnuplot.1 b/tools/plot/fio2gnuplot.1
index 6fb1283f..bfa10d26 100644
--- a/tools/plot/fio2gnuplot.1
+++ b/tools/plot/fio2gnuplot.1
@@ -35,7 +35,7 @@ The resulting graph helps at understanding trends.
 .TP
 .B
 Grouped 2D graph
-All files are plotted in a single image to ease the comparaison. The same rendering options as per the individual 2D graph are used :
+All files are plotted in a single image to ease the comparison. The same rendering options as per the individual 2D graph are used :
 .RS
 .IP \(bu 3
 raw
diff --git a/tools/plot/fio2gnuplot.manpage b/tools/plot/fio2gnuplot.manpage
index 6a12cf81..be3f13c2 100644
--- a/tools/plot/fio2gnuplot.manpage
+++ b/tools/plot/fio2gnuplot.manpage
@@ -20,7 +20,7 @@ DESCRIPTION
                     	The resulting graph helps at understanding trends.
 
  Grouped 2D graph   
-	All files are plotted in a single image to ease the comparaison. The same rendering options as per the individual 2D graph are used :
+	All files are plotted in a single image to ease the comparison. The same rendering options as per the individual 2D graph are used :
          - raw
          - smooth
          - trend

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-02-19 13:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-02-19 13:00 UTC (permalink / raw)
  To: fio

The following changes since commit c99c81adb3510a8dc34d47fd40b19ef657e32192:

  Correct F_FULLSYNC -> F_FULLFSYNC (2022-02-17 12:53:59 -0700)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 933651ec130ce4d27a5c249d649d20afeb2bdf38:

  Merge branch 'rpma-update-RPMA-engines-with-new-librpma-completions-API' of https://github.com/ldorau/fio (2022-02-18 09:02:03 -0700)

----------------------------------------------------------------
Jens Axboe (1):
      Merge branch 'rpma-update-RPMA-engines-with-new-librpma-completions-API' of https://github.com/ldorau/fio

Lukasz Dorau (1):
      rpma: RPMA engines require librpma>=v0.11.0 with rpma_cq_get_wc()

Oksana Salyk (1):
      rpma: update RPMA engines with new librpma completions API

 configure               |  4 ++--
 engines/librpma_apm.c   |  8 +++-----
 engines/librpma_fio.c   | 46 +++++++++++++++++++++++++++++-----------------
 engines/librpma_fio.h   | 16 +++++++++-------
 engines/librpma_gpspm.c | 39 ++++++++++++++++++---------------------
 5 files changed, 61 insertions(+), 52 deletions(-)

---

Diff of recent changes:

diff --git a/configure b/configure
index 6160d84d..be4605f9 100755
--- a/configure
+++ b/configure
@@ -974,7 +974,7 @@ print_config "rdmacm" "$rdmacm"
 
 ##########################################
 # librpma probe
-# The librpma engine requires librpma>=v0.10.0 with rpma_mr_advise().
+# The librpma engines require librpma>=v0.11.0 with rpma_cq_get_wc().
 if test "$librpma" != "yes" ; then
   librpma="no"
 fi
@@ -982,7 +982,7 @@ cat > $TMPC << EOF
 #include <librpma.h>
 int main(void)
 {
-  void *ptr = rpma_mr_advise;
+  void *ptr = rpma_cq_get_wc;
   (void) ptr; /* unused */
   return 0;
 }
diff --git a/engines/librpma_apm.c b/engines/librpma_apm.c
index ffa3769d..d1166ad8 100644
--- a/engines/librpma_apm.c
+++ b/engines/librpma_apm.c
@@ -22,8 +22,7 @@ static inline int client_io_flush(struct thread_data *td,
 		struct io_u *first_io_u, struct io_u *last_io_u,
 		unsigned long long int len);
 
-static int client_get_io_u_index(struct rpma_completion *cmpl,
-		unsigned int *io_u_index);
+static int client_get_io_u_index(struct ibv_wc *wc, unsigned int *io_u_index);
 
 static int client_init(struct thread_data *td)
 {
@@ -188,10 +187,9 @@ static inline int client_io_flush(struct thread_data *td,
 	return 0;
 }
 
-static int client_get_io_u_index(struct rpma_completion *cmpl,
-		unsigned int *io_u_index)
+static int client_get_io_u_index(struct ibv_wc *wc, unsigned int *io_u_index)
 {
-	memcpy(io_u_index, &cmpl->op_context, sizeof(*io_u_index));
+	memcpy(io_u_index, &wc->wr_id, sizeof(*io_u_index));
 
 	return 1;
 }
diff --git a/engines/librpma_fio.c b/engines/librpma_fio.c
index 9d6ebf38..dfd82180 100644
--- a/engines/librpma_fio.c
+++ b/engines/librpma_fio.c
@@ -302,6 +302,12 @@ int librpma_fio_client_init(struct thread_data *td,
 	if (ccd->conn == NULL)
 		goto err_peer_delete;
 
+	/* get the connection's main CQ */
+	if ((ret = rpma_conn_get_cq(ccd->conn, &ccd->cq))) {
+		librpma_td_verror(td, ret, "rpma_conn_get_cq");
+		goto err_conn_delete;
+	}
+
 	/* get the connection's private data sent from the server */
 	if ((ret = rpma_conn_get_private_data(ccd->conn, &pdata))) {
 		librpma_td_verror(td, ret, "rpma_conn_get_private_data");
@@ -455,7 +461,7 @@ static enum fio_q_status client_queue_sync(struct thread_data *td,
 		struct io_u *io_u)
 {
 	struct librpma_fio_client_data *ccd = td->io_ops_data;
-	struct rpma_completion cmpl;
+	struct ibv_wc wc;
 	unsigned io_u_index;
 	int ret;
 
@@ -478,31 +484,31 @@ static enum fio_q_status client_queue_sync(struct thread_data *td,
 
 	do {
 		/* get a completion */
-		ret = rpma_conn_completion_get(ccd->conn, &cmpl);
+		ret = rpma_cq_get_wc(ccd->cq, 1, &wc, NULL);
 		if (ret == RPMA_E_NO_COMPLETION) {
 			/* lack of completion is not an error */
 			continue;
 		} else if (ret != 0) {
 			/* an error occurred */
-			librpma_td_verror(td, ret, "rpma_conn_completion_get");
+			librpma_td_verror(td, ret, "rpma_cq_get_wc");
 			goto err;
 		}
 
 		/* if io_us has completed with an error */
-		if (cmpl.op_status != IBV_WC_SUCCESS)
+		if (wc.status != IBV_WC_SUCCESS)
 			goto err;
 
-		if (cmpl.op == RPMA_OP_SEND)
+		if (wc.opcode == IBV_WC_SEND)
 			++ccd->op_send_completed;
 		else {
-			if (cmpl.op == RPMA_OP_RECV)
+			if (wc.opcode == IBV_WC_RECV)
 				++ccd->op_recv_completed;
 
 			break;
 		}
 	} while (1);
 
-	if (ccd->get_io_u_index(&cmpl, &io_u_index) != 1)
+	if (ccd->get_io_u_index(&wc, &io_u_index) != 1)
 		goto err;
 
 	if (io_u->index != io_u_index) {
@@ -654,8 +660,8 @@ int librpma_fio_client_commit(struct thread_data *td)
 static int client_getevent_process(struct thread_data *td)
 {
 	struct librpma_fio_client_data *ccd = td->io_ops_data;
-	struct rpma_completion cmpl;
-	/* io_u->index of completed io_u (cmpl.op_context) */
+	struct ibv_wc wc;
+	/* io_u->index of completed io_u (wc.wr_id) */
 	unsigned int io_u_index;
 	/* # of completed io_us */
 	int cmpl_num = 0;
@@ -665,7 +671,7 @@ static int client_getevent_process(struct thread_data *td)
 	int ret;
 
 	/* get a completion */
-	if ((ret = rpma_conn_completion_get(ccd->conn, &cmpl))) {
+	if ((ret = rpma_cq_get_wc(ccd->cq, 1, &wc, NULL))) {
 		/* lack of completion is not an error */
 		if (ret == RPMA_E_NO_COMPLETION) {
 			/* lack of completion is not an error */
@@ -673,22 +679,22 @@ static int client_getevent_process(struct thread_data *td)
 		}
 
 		/* an error occurred */
-		librpma_td_verror(td, ret, "rpma_conn_completion_get");
+		librpma_td_verror(td, ret, "rpma_cq_get_wc");
 		return -1;
 	}
 
 	/* if io_us has completed with an error */
-	if (cmpl.op_status != IBV_WC_SUCCESS) {
-		td->error = cmpl.op_status;
+	if (wc.status != IBV_WC_SUCCESS) {
+		td->error = wc.status;
 		return -1;
 	}
 
-	if (cmpl.op == RPMA_OP_SEND)
+	if (wc.opcode == IBV_WC_SEND)
 		++ccd->op_send_completed;
-	else if (cmpl.op == RPMA_OP_RECV)
+	else if (wc.opcode == IBV_WC_RECV)
 		++ccd->op_recv_completed;
 
-	if ((ret = ccd->get_io_u_index(&cmpl, &io_u_index)) != 1)
+	if ((ret = ccd->get_io_u_index(&wc, &io_u_index)) != 1)
 		return ret;
 
 	/* look for an io_u being completed */
@@ -750,7 +756,7 @@ int librpma_fio_client_getevents(struct thread_data *td, unsigned int min,
 
 			/*
 			 * To reduce CPU consumption one can use
-			 * the rpma_conn_completion_wait() function.
+			 * the rpma_cq_wait() function.
 			 * Note this greatly increase the latency
 			 * and make the results less stable.
 			 * The bandwidth stays more or less the same.
@@ -1029,6 +1035,12 @@ int librpma_fio_server_open_file(struct thread_data *td, struct fio_file *f,
 	csd->ws_ptr = ws_ptr;
 	csd->conn = conn;
 
+	/* get the connection's main CQ */
+	if ((ret = rpma_conn_get_cq(csd->conn, &csd->cq))) {
+		librpma_td_verror(td, ret, "rpma_conn_get_cq");
+		goto err_conn_delete;
+	}
+
 	return 0;
 
 err_conn_delete:
diff --git a/engines/librpma_fio.h b/engines/librpma_fio.h
index 2c507e9c..91290235 100644
--- a/engines/librpma_fio.h
+++ b/engines/librpma_fio.h
@@ -94,12 +94,13 @@ typedef int (*librpma_fio_flush_t)(struct thread_data *td,
  * - ( 0) - skip
  * - (-1) - on error
  */
-typedef int (*librpma_fio_get_io_u_index_t)(struct rpma_completion *cmpl,
+typedef int (*librpma_fio_get_io_u_index_t)(struct ibv_wc *wc,
 		unsigned int *io_u_index);
 
 struct librpma_fio_client_data {
 	struct rpma_peer *peer;
 	struct rpma_conn *conn;
+	struct rpma_cq *cq;
 
 	/* aligned td->orig_buffer */
 	char *orig_buffer_aligned;
@@ -199,29 +200,29 @@ static inline int librpma_fio_client_io_complete_all_sends(
 		struct thread_data *td)
 {
 	struct librpma_fio_client_data *ccd = td->io_ops_data;
-	struct rpma_completion cmpl;
+	struct ibv_wc wc;
 	int ret;
 
 	while (ccd->op_send_posted != ccd->op_send_completed) {
 		/* get a completion */
-		ret = rpma_conn_completion_get(ccd->conn, &cmpl);
+		ret = rpma_cq_get_wc(ccd->cq, 1, &wc, NULL);
 		if (ret == RPMA_E_NO_COMPLETION) {
 			/* lack of completion is not an error */
 			continue;
 		} else if (ret != 0) {
 			/* an error occurred */
-			librpma_td_verror(td, ret, "rpma_conn_completion_get");
+			librpma_td_verror(td, ret, "rpma_cq_get_wc");
 			break;
 		}
 
-		if (cmpl.op_status != IBV_WC_SUCCESS)
+		if (wc.status != IBV_WC_SUCCESS)
 			return -1;
 
-		if (cmpl.op == RPMA_OP_SEND)
+		if (wc.opcode == IBV_WC_SEND)
 			++ccd->op_send_completed;
 		else {
 			log_err(
-				"A completion other than RPMA_OP_SEND got during cleaning up the CQ from SENDs\n");
+				"A completion other than IBV_WC_SEND got during cleaning up the CQ from SENDs\n");
 			return -1;
 		}
 	}
@@ -251,6 +252,7 @@ struct librpma_fio_server_data {
 
 	/* resources of an incoming connection */
 	struct rpma_conn *conn;
+	struct rpma_cq *cq;
 
 	char *ws_ptr;
 	struct rpma_mr_local *ws_mr;
diff --git a/engines/librpma_gpspm.c b/engines/librpma_gpspm.c
index 74147709..14626e7f 100644
--- a/engines/librpma_gpspm.c
+++ b/engines/librpma_gpspm.c
@@ -60,8 +60,7 @@ static inline int client_io_flush(struct thread_data *td,
 		struct io_u *first_io_u, struct io_u *last_io_u,
 		unsigned long long int len);
 
-static int client_get_io_u_index(struct rpma_completion *cmpl,
-		unsigned int *io_u_index);
+static int client_get_io_u_index(struct ibv_wc *wc, unsigned int *io_u_index);
 
 static int client_init(struct thread_data *td)
 {
@@ -317,17 +316,16 @@ static inline int client_io_flush(struct thread_data *td,
 	return 0;
 }
 
-static int client_get_io_u_index(struct rpma_completion *cmpl,
-		unsigned int *io_u_index)
+static int client_get_io_u_index(struct ibv_wc *wc, unsigned int *io_u_index)
 {
 	GPSPMFlushResponse *flush_resp;
 
-	if (cmpl->op != RPMA_OP_RECV)
+	if (wc->opcode != IBV_WC_RECV)
 		return 0;
 
 	/* unpack a response from the received buffer */
 	flush_resp = gpspm_flush_response__unpack(NULL,
-			cmpl->byte_len, cmpl->op_context);
+			wc->byte_len, (void *)wc->wr_id);
 	if (flush_resp == NULL) {
 		log_err("Cannot unpack the flush response buffer\n");
 		return -1;
@@ -373,7 +371,7 @@ struct server_data {
 	uint32_t msg_sqe_available; /* # of free SQ slots */
 
 	/* in-memory queues */
-	struct rpma_completion *msgs_queued;
+	struct ibv_wc *msgs_queued;
 	uint32_t msg_queued_nr;
 };
 
@@ -562,8 +560,7 @@ err_cfg_delete:
 	return ret;
 }
 
-static int server_qe_process(struct thread_data *td,
-		struct rpma_completion *cmpl)
+static int server_qe_process(struct thread_data *td, struct ibv_wc *wc)
 {
 	struct librpma_fio_server_data *csd = td->io_ops_data;
 	struct server_data *sd = csd->server_data;
@@ -580,7 +577,7 @@ static int server_qe_process(struct thread_data *td,
 	int ret;
 
 	/* calculate SEND/RECV pair parameters */
-	msg_index = (int)(uintptr_t)cmpl->op_context;
+	msg_index = (int)(uintptr_t)wc->wr_id;
 	io_u_buff_offset = IO_U_BUFF_OFF_SERVER(msg_index);
 	send_buff_offset = io_u_buff_offset + SEND_OFFSET;
 	recv_buff_offset = io_u_buff_offset + RECV_OFFSET;
@@ -588,7 +585,7 @@ static int server_qe_process(struct thread_data *td,
 	recv_buff_ptr = sd->orig_buffer_aligned + recv_buff_offset;
 
 	/* unpack a flush request from the received buffer */
-	flush_req = gpspm_flush_request__unpack(NULL, cmpl->byte_len,
+	flush_req = gpspm_flush_request__unpack(NULL, wc->byte_len,
 			recv_buff_ptr);
 	if (flush_req == NULL) {
 		log_err("cannot unpack the flush request buffer\n");
@@ -682,28 +679,28 @@ static int server_cmpl_process(struct thread_data *td)
 {
 	struct librpma_fio_server_data *csd = td->io_ops_data;
 	struct server_data *sd = csd->server_data;
-	struct rpma_completion *cmpl = &sd->msgs_queued[sd->msg_queued_nr];
+	struct ibv_wc *wc = &sd->msgs_queued[sd->msg_queued_nr];
 	struct librpma_fio_options_values *o = td->eo;
 	int ret;
 
-	ret = rpma_conn_completion_get(csd->conn, cmpl);
+	ret = rpma_cq_get_wc(csd->cq, 1, wc, NULL);
 	if (ret == RPMA_E_NO_COMPLETION) {
 		if (o->busy_wait_polling == 0) {
-			ret = rpma_conn_completion_wait(csd->conn);
+			ret = rpma_cq_wait(csd->cq);
 			if (ret == RPMA_E_NO_COMPLETION) {
 				/* lack of completion is not an error */
 				return 0;
 			} else if (ret != 0) {
-				librpma_td_verror(td, ret, "rpma_conn_completion_wait");
+				librpma_td_verror(td, ret, "rpma_cq_wait");
 				goto err_terminate;
 			}
 
-			ret = rpma_conn_completion_get(csd->conn, cmpl);
+			ret = rpma_cq_get_wc(csd->cq, 1, wc, NULL);
 			if (ret == RPMA_E_NO_COMPLETION) {
 				/* lack of completion is not an error */
 				return 0;
 			} else if (ret != 0) {
-				librpma_td_verror(td, ret, "rpma_conn_completion_get");
+				librpma_td_verror(td, ret, "rpma_cq_get_wc");
 				goto err_terminate;
 			}
 		} else {
@@ -711,17 +708,17 @@ static int server_cmpl_process(struct thread_data *td)
 			return 0;
 		}
 	} else if (ret != 0) {
-		librpma_td_verror(td, ret, "rpma_conn_completion_get");
+		librpma_td_verror(td, ret, "rpma_cq_get_wc");
 		goto err_terminate;
 	}
 
 	/* validate the completion */
-	if (cmpl->op_status != IBV_WC_SUCCESS)
+	if (wc->status != IBV_WC_SUCCESS)
 		goto err_terminate;
 
-	if (cmpl->op == RPMA_OP_RECV)
+	if (wc->opcode == IBV_WC_RECV)
 		++sd->msg_queued_nr;
-	else if (cmpl->op == RPMA_OP_SEND)
+	else if (wc->opcode == IBV_WC_SEND)
 		++sd->msg_sqe_available;
 
 	return 0;

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-02-18 13:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-02-18 13:00 UTC (permalink / raw)
  To: fio

The following changes since commit 6a16e9e9531a5f746c4e2fe43873de1db434b4fc:

  diskutil: include limits.h for PATH_MAX (2022-02-15 17:17:30 -0700)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to c99c81adb3510a8dc34d47fd40b19ef657e32192:

  Correct F_FULLSYNC -> F_FULLFSYNC (2022-02-17 12:53:59 -0700)

----------------------------------------------------------------
Jens Axboe (4):
      t/io_uring: allow non-power-of-2 queue depths
      t/io_uring: align buffers correctly on non-4k page sizes
      Use fcntl(..., F_FULLSYNC) if available
      Correct F_FULLSYNC -> F_FULLFSYNC

 configure    | 22 ++++++++++++++++++++++
 io_u.c       |  4 ++++
 t/io_uring.c | 15 ++++++++++-----
 3 files changed, 36 insertions(+), 5 deletions(-)

---

Diff of recent changes:

diff --git a/configure b/configure
index 0efde7d6..6160d84d 100755
--- a/configure
+++ b/configure
@@ -645,6 +645,25 @@ if compile_prog "" "-lz" "zlib" ; then
 fi
 print_config "zlib" "$zlib"
 
+##########################################
+# fcntl(F_FULLFSYNC) support
+if test "$fcntl_sync" != "yes" ; then
+  fcntl_sync="no"
+fi
+cat > $TMPC << EOF
+#include <unistd.h>
+#include <fcntl.h>
+
+int main(int argc, char **argv)
+{
+  return fcntl(0, F_FULLFSYNC);
+}
+EOF
+if compile_prog "" "" "fcntl(F_FULLFSYNC)" ; then
+    fcntl_sync="yes"
+fi
+print_config "fcntl(F_FULLFSYNC)" "$fcntl_sync"
+
 ##########################################
 # linux-aio probe
 if test "$libaio" != "yes" ; then
@@ -3174,6 +3193,9 @@ fi
 if test "$pdb" = yes; then
   output_sym "CONFIG_PDB"
 fi
+if test "$fcntl_sync" = "yes" ; then
+  output_sym "CONFIG_FCNTL_SYNC"
+fi
 
 print_config "Lib-based ioengines dynamic" "$dynamic_engines"
 cat > $TMPC << EOF
diff --git a/io_u.c b/io_u.c
index 059637e5..806ceb77 100644
--- a/io_u.c
+++ b/io_u.c
@@ -2297,7 +2297,11 @@ int do_io_u_sync(const struct thread_data *td, struct io_u *io_u)
 	int ret;
 
 	if (io_u->ddir == DDIR_SYNC) {
+#ifdef CONFIG_FCNTL_SYNC
+		ret = fcntl(io_u->file->fd, F_FULLFSYNC);
+#else
 		ret = fsync(io_u->file->fd);
+#endif
 	} else if (io_u->ddir == DDIR_DATASYNC) {
 #ifdef CONFIG_FDATASYNC
 		ret = fdatasync(io_u->file->fd);
diff --git a/t/io_uring.c b/t/io_uring.c
index 4520de43..f513d7dc 100644
--- a/t/io_uring.c
+++ b/t/io_uring.c
@@ -364,7 +364,7 @@ static int io_uring_register_buffers(struct submitter *s)
 		return 0;
 
 	return syscall(__NR_io_uring_register, s->ring_fd,
-			IORING_REGISTER_BUFFERS, s->iovecs, depth);
+			IORING_REGISTER_BUFFERS, s->iovecs, roundup_pow2(depth));
 }
 
 static int io_uring_register_files(struct submitter *s)
@@ -962,7 +962,7 @@ static int setup_aio(struct submitter *s)
 		fixedbufs = register_files = 0;
 	}
 
-	return io_queue_init(depth, &s->aio_ctx);
+	return io_queue_init(roundup_pow2(depth), &s->aio_ctx);
 #else
 	fprintf(stderr, "Legacy AIO not available on this system/build\n");
 	errno = EINVAL;
@@ -1156,6 +1156,7 @@ int main(int argc, char *argv[])
 	struct submitter *s;
 	unsigned long done, calls, reap;
 	int err, i, j, flags, fd, opt, threads_per_f, threads_rem = 0, nfiles;
+	long page_size;
 	struct file f;
 	char *fdepths;
 	void *ret;
@@ -1249,7 +1250,7 @@ int main(int argc, char *argv[])
 		dma_map = 0;
 
 	submitter = calloc(nthreads, sizeof(*submitter) +
-				depth * sizeof(struct iovec));
+				roundup_pow2(depth) * sizeof(struct iovec));
 	for (j = 0; j < nthreads; j++) {
 		s = get_submitter(j);
 		s->index = j;
@@ -1319,12 +1320,16 @@ int main(int argc, char *argv[])
 
 	arm_sig_int();
 
+	page_size = sysconf(_SC_PAGESIZE);
+	if (page_size < 0)
+		page_size = 4096;
+
 	for (j = 0; j < nthreads; j++) {
 		s = get_submitter(j);
-		for (i = 0; i < depth; i++) {
+		for (i = 0; i < roundup_pow2(depth); i++) {
 			void *buf;
 
-			if (posix_memalign(&buf, bs, bs)) {
+			if (posix_memalign(&buf, page_size, bs)) {
 				printf("failed alloc\n");
 				return 1;
 			}

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-02-16 13:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-02-16 13:00 UTC (permalink / raw)
  To: fio

The following changes since commit a1db4528a59a99c5e2aa66091c505fb60e3a70ca:

  Merge branch 'fio-docs-ci' of https://github.com/vincentkfu/fio (2022-02-11 16:29:44 -0700)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 6a16e9e9531a5f746c4e2fe43873de1db434b4fc:

  diskutil: include limits.h for PATH_MAX (2022-02-15 17:17:30 -0700)

----------------------------------------------------------------
Jens Axboe (4):
      Merge branch 'fix_bytesrate_eta' of https://github.com/PCPartPicker/fio
      Merge branch 'rand_nr_bugfix' of https://github.com/PCPartPicker/fio
      Merge branch 'check_min_rate_cleanup' of https://github.com/PCPartPicker/fio
      diskutil: include limits.h for PATH_MAX

Vincent Fu (1):
      ci: detect Windows installer build failures

aggieNick02 (3):
      Cleanup __check_min_rate
      Fix ETA display when rate and/or rate_min are specified
      Fix :<nr> suffix with random read/write causing 0 initial offset

 .appveyor.yml |  1 +
 backend.c     | 81 ++++++++++++++++++++---------------------------------------
 diskutil.h    |  2 ++
 eta.c         |  5 ++--
 fio.h         |  6 ++---
 init.c        |  9 ++++++-
 libfio.c      |  4 +--
 7 files changed, 46 insertions(+), 62 deletions(-)

---

Diff of recent changes:

diff --git a/.appveyor.yml b/.appveyor.yml
index 42b79958..b94eefe3 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -44,6 +44,7 @@ after_build:
   - file.exe fio.exe
   - make.exe test
   - 'cd os\windows && dobuild.cmd %ARCHITECTURE% && cd ..'
+  - ls.exe ./os/windows/*.msi
   - ps: Get-ChildItem .\os\windows\*.msi | % { Push-AppveyorArtifact $_.FullName -FileName $_.Name -DeploymentName fio.msi }
 
 test_script:
diff --git a/backend.c b/backend.c
index c035baed..a21dfef6 100644
--- a/backend.c
+++ b/backend.c
@@ -136,13 +136,10 @@ static void set_sig_handlers(void)
 static bool __check_min_rate(struct thread_data *td, struct timespec *now,
 			     enum fio_ddir ddir)
 {
-	unsigned long long bytes = 0;
-	unsigned long iops = 0;
-	unsigned long spent;
-	unsigned long long rate;
-	unsigned long long ratemin = 0;
-	unsigned int rate_iops = 0;
-	unsigned int rate_iops_min = 0;
+	unsigned long long current_rate_check_bytes = td->this_io_bytes[ddir];
+	unsigned long current_rate_check_blocks = td->this_io_blocks[ddir];
+	unsigned long long option_rate_bytes_min = td->o.ratemin[ddir];
+	unsigned int option_rate_iops_min = td->o.rate_iops_min[ddir];
 
 	assert(ddir_rw(ddir));
 
@@ -155,68 +152,44 @@ static bool __check_min_rate(struct thread_data *td, struct timespec *now,
 	if (mtime_since(&td->start, now) < 2000)
 		return false;
 
-	iops += td->this_io_blocks[ddir];
-	bytes += td->this_io_bytes[ddir];
-	ratemin += td->o.ratemin[ddir];
-	rate_iops += td->o.rate_iops[ddir];
-	rate_iops_min += td->o.rate_iops_min[ddir];
-
 	/*
-	 * if rate blocks is set, sample is running
+	 * if last_rate_check_blocks or last_rate_check_bytes is set,
+	 * we can compute a rate per ratecycle
 	 */
-	if (td->rate_bytes[ddir] || td->rate_blocks[ddir]) {
-		spent = mtime_since(&td->lastrate[ddir], now);
-		if (spent < td->o.ratecycle)
+	if (td->last_rate_check_bytes[ddir] || td->last_rate_check_blocks[ddir]) {
+		unsigned long spent = mtime_since(&td->last_rate_check_time[ddir], now);
+		if (spent < td->o.ratecycle || spent==0)
 			return false;
 
-		if (td->o.rate[ddir] || td->o.ratemin[ddir]) {
+		if (td->o.ratemin[ddir]) {
 			/*
 			 * check bandwidth specified rate
 			 */
-			if (bytes < td->rate_bytes[ddir]) {
-				log_err("%s: rate_min=%lluB/s not met, only transferred %lluB\n",
-					td->o.name, ratemin, bytes);
+			unsigned long long current_rate_bytes =
+				((current_rate_check_bytes - td->last_rate_check_bytes[ddir]) * 1000) / spent;
+			if (current_rate_bytes < option_rate_bytes_min) {
+				log_err("%s: rate_min=%lluB/s not met, got %lluB/s\n",
+					td->o.name, option_rate_bytes_min, current_rate_bytes);
 				return true;
-			} else {
-				if (spent)
-					rate = ((bytes - td->rate_bytes[ddir]) * 1000) / spent;
-				else
-					rate = 0;
-
-				if (rate < ratemin ||
-				    bytes < td->rate_bytes[ddir]) {
-					log_err("%s: rate_min=%lluB/s not met, got %lluB/s\n",
-						td->o.name, ratemin, rate);
-					return true;
-				}
 			}
 		} else {
 			/*
 			 * checks iops specified rate
 			 */
-			if (iops < rate_iops) {
-				log_err("%s: rate_iops_min=%u not met, only performed %lu IOs\n",
-						td->o.name, rate_iops, iops);
+			unsigned long long current_rate_iops =
+				((current_rate_check_blocks - td->last_rate_check_blocks[ddir]) * 1000) / spent;
+
+			if (current_rate_iops < option_rate_iops_min) {
+				log_err("%s: rate_iops_min=%u not met, got %llu IOPS\n",
+					td->o.name, option_rate_iops_min, current_rate_iops);
 				return true;
-			} else {
-				if (spent)
-					rate = ((iops - td->rate_blocks[ddir]) * 1000) / spent;
-				else
-					rate = 0;
-
-				if (rate < rate_iops_min ||
-				    iops < td->rate_blocks[ddir]) {
-					log_err("%s: rate_iops_min=%u not met, got %llu IOPS\n",
-						td->o.name, rate_iops_min, rate);
-					return true;
-				}
 			}
 		}
 	}
 
-	td->rate_bytes[ddir] = bytes;
-	td->rate_blocks[ddir] = iops;
-	memcpy(&td->lastrate[ddir], now, sizeof(*now));
+	td->last_rate_check_bytes[ddir] = current_rate_check_bytes;
+	td->last_rate_check_blocks[ddir] = current_rate_check_blocks;
+	memcpy(&td->last_rate_check_time[ddir], now, sizeof(*now));
 	return false;
 }
 
@@ -1845,11 +1818,11 @@ static void *thread_main(void *data)
 
 	if (o->ratemin[DDIR_READ] || o->ratemin[DDIR_WRITE] ||
 			o->ratemin[DDIR_TRIM]) {
-	        memcpy(&td->lastrate[DDIR_READ], &td->bw_sample_time,
+	        memcpy(&td->last_rate_check_time[DDIR_READ], &td->bw_sample_time,
 					sizeof(td->bw_sample_time));
-	        memcpy(&td->lastrate[DDIR_WRITE], &td->bw_sample_time,
+	        memcpy(&td->last_rate_check_time[DDIR_WRITE], &td->bw_sample_time,
 					sizeof(td->bw_sample_time));
-	        memcpy(&td->lastrate[DDIR_TRIM], &td->bw_sample_time,
+	        memcpy(&td->last_rate_check_time[DDIR_TRIM], &td->bw_sample_time,
 					sizeof(td->bw_sample_time));
 	}
 
diff --git a/diskutil.h b/diskutil.h
index 83bcbf89..7d7ef802 100644
--- a/diskutil.h
+++ b/diskutil.h
@@ -2,6 +2,8 @@
 #define FIO_DISKUTIL_H
 #define FIO_DU_NAME_SZ		64
 
+#include <limits.h>
+
 #include "helper_thread.h"
 #include "fio_sem.h"
 
diff --git a/eta.c b/eta.c
index ea1781f3..17970c78 100644
--- a/eta.c
+++ b/eta.c
@@ -420,6 +420,7 @@ bool calc_thread_status(struct jobs_eta *je, int force)
 		if (is_power_of_2(td->o.kb_base))
 			je->is_pow2 = 1;
 		je->unit_base = td->o.unit_base;
+		je->sig_figs = td->o.sig_figs;
 		if (td->o.bw_avg_time < bw_avg_time)
 			bw_avg_time = td->o.bw_avg_time;
 		if (td->runstate == TD_RUNNING || td->runstate == TD_VERIFYING
@@ -600,9 +601,9 @@ void display_thread_status(struct jobs_eta *je)
 		char *tr, *mr;
 
 		mr = num2str(je->m_rate[0] + je->m_rate[1] + je->m_rate[2],
-				je->sig_figs, 0, je->is_pow2, N2S_BYTEPERSEC);
+				je->sig_figs, 1, je->is_pow2, N2S_BYTEPERSEC);
 		tr = num2str(je->t_rate[0] + je->t_rate[1] + je->t_rate[2],
-				je->sig_figs, 0, je->is_pow2, N2S_BYTEPERSEC);
+				je->sig_figs, 1, je->is_pow2, N2S_BYTEPERSEC);
 
 		p += sprintf(p, ", %s-%s", mr, tr);
 		free(tr);
diff --git a/fio.h b/fio.h
index 7b0ca843..88df117d 100644
--- a/fio.h
+++ b/fio.h
@@ -335,10 +335,10 @@ struct thread_data {
 	 */
 	uint64_t rate_bps[DDIR_RWDIR_CNT];
 	uint64_t rate_next_io_time[DDIR_RWDIR_CNT];
-	unsigned long long rate_bytes[DDIR_RWDIR_CNT];
-	unsigned long rate_blocks[DDIR_RWDIR_CNT];
+	unsigned long long last_rate_check_bytes[DDIR_RWDIR_CNT];
+	unsigned long last_rate_check_blocks[DDIR_RWDIR_CNT];
 	unsigned long long rate_io_issue_bytes[DDIR_RWDIR_CNT];
-	struct timespec lastrate[DDIR_RWDIR_CNT];
+	struct timespec last_rate_check_time[DDIR_RWDIR_CNT];
 	int64_t last_usec[DDIR_RWDIR_CNT];
 	struct frand_state poisson_state[DDIR_RWDIR_CNT];
 
diff --git a/init.c b/init.c
index 13935152..81c30f8c 100644
--- a/init.c
+++ b/init.c
@@ -1576,7 +1576,14 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
 	td->ts.sig_figs = o->sig_figs;
 
 	init_thread_stat_min_vals(&td->ts);
-	td->ddir_seq_nr = o->ddir_seq_nr;
+
+	/*
+	 * td->>ddir_seq_nr needs to be initialized to 1, NOT o->ddir_seq_nr,
+	 * so that get_next_offset gets a new random offset the first time it
+	 * is called, instead of keeping an initial offset of 0 for the first
+	 * nr-1 calls
+	 */
+	td->ddir_seq_nr = 1;
 
 	if ((o->stonewall || o->new_group) && prev_group_jobs) {
 		prev_group_jobs = 0;
diff --git a/libfio.c b/libfio.c
index 01fa7452..1a891776 100644
--- a/libfio.c
+++ b/libfio.c
@@ -87,8 +87,8 @@ static void reset_io_counters(struct thread_data *td, int all)
 			td->this_io_bytes[ddir] = 0;
 			td->stat_io_blocks[ddir] = 0;
 			td->this_io_blocks[ddir] = 0;
-			td->rate_bytes[ddir] = 0;
-			td->rate_blocks[ddir] = 0;
+			td->last_rate_check_bytes[ddir] = 0;
+			td->last_rate_check_blocks[ddir] = 0;
 			td->bytes_done[ddir] = 0;
 			td->rate_io_issue_bytes[ddir] = 0;
 			td->rate_next_io_time[ddir] = 0;

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-02-12 13:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-02-12 13:00 UTC (permalink / raw)
  To: fio

The following changes since commit df597be63e26ef59c1538b3ce2026c83684ff7fb:

  fio: really use LDFLAGS when linking dynamic engines (2022-02-08 09:28:30 -0700)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to a1db4528a59a99c5e2aa66091c505fb60e3a70ca:

  Merge branch 'fio-docs-ci' of https://github.com/vincentkfu/fio (2022-02-11 16:29:44 -0700)

----------------------------------------------------------------
Jens Axboe (4):
      t/io_uring: avoid unused `nr_batch` warning
      Add aarch64 cpu clock support
      Merge branch 'fio_offload_fixes' of https://github.com/PCPartPicker/fio
      Merge branch 'fio-docs-ci' of https://github.com/vincentkfu/fio

Vincent Fu (8):
      docs: document cpumode option for the cpuio ioengine
      docs: update Makefile in order to detect build failures
      docs: rename HOWTO to HOWTO.rst
      HOWTO: combine multiple pool option listings
      HOWTO: combine separate hipri listings into a single one
      HOWTO: combine two chunk_size listings into a single one
      ci: install sphinx packages and add doc building to GitHub Actions
      windows: update the installer build for renamed files

aggieNick02 (1):
      Fix issues (assert or uninit var, hang) with check_min_rate and offloading

 HOWTO => HOWTO.rst      | 126 ++++++++++++++++++++++++++++--------------------
 arch/arch-aarch64.h     |  17 +++++++
 backend.c               |   9 +++-
 ci/actions-full-test.sh |   1 +
 ci/actions-install.sh   |   3 +-
 doc/Makefile            |   2 +-
 doc/fio_doc.rst         |   2 +-
 doc/fio_man.rst         |   2 +-
 fio.1                   |  13 +++++
 os/windows/install.wxs  |   4 +-
 t/io_uring.c            |   9 ++--
 11 files changed, 124 insertions(+), 64 deletions(-)
 rename HOWTO => HOWTO.rst (99%)

---

Diff of recent changes:

diff --git a/HOWTO b/HOWTO.rst
similarity index 99%
rename from HOWTO
rename to HOWTO.rst
index 74ba7216..ac1f3478 100644
--- a/HOWTO
+++ b/HOWTO.rst
@@ -2137,8 +2137,10 @@ I/O engine
 			Asynchronous read and write using DDN's Infinite Memory Engine (IME).
 			This engine will try to stack as much IOs as possible by creating
 			requests for IME. FIO will then decide when to commit these requests.
+
 		**libiscsi**
 			Read and write iscsi lun with libiscsi.
+
 		**nbd**
 			Read and write a Network Block Device (NBD).
 
@@ -2149,6 +2151,7 @@ I/O engine
 			unless :option:`verify` is set or :option:`cuda_io` is `posix`.
 			:option:`iomem` must not be `cudamalloc`. This ioengine defines
 			engine specific options.
+
 		**dfs**
 			I/O engine supporting asynchronous read and write operations to the
 			DAOS File System (DFS) via libdfs.
@@ -2175,8 +2178,8 @@ with the caveat that when used on the command line, they must come after the
     Set the percentage of I/O that will be issued with the highest priority.
     Default: 0. A single value applies to reads and writes. Comma-separated
     values may be specified for reads and writes. For this option to be
-    effective, NCQ priority must be supported and enabled, and `direct=1'
-    option must be used. fio must also be run as the root user. Unlike
+    effective, NCQ priority must be supported and enabled, and the :option:`direct`
+    option must be set. fio must also be run as the root user. Unlike
     slat/clat/lat stats, which can be tracked and reported independently, per
     priority stats only track and report a single type of latency. By default,
     completion latency (clat) will be reported, if :option:`lat_percentiles` is
@@ -2207,6 +2210,7 @@ with the caveat that when used on the command line, they must come after the
 	meaning of priority may differ. See also the :option:`prio` option.
 
 .. option:: cmdprio_bssplit=str[,str] : [io_uring] [libaio]
+
 	To get a finer control over I/O priority, this option allows
 	specifying the percentage of IOs that must have a priority set
 	depending on the block size of the IO. This option is useful only
@@ -2243,14 +2247,6 @@ with the caveat that when used on the command line, they must come after the
     map and release for each IO. This is more efficient, and reduces the
     IO latency as well.
 
-.. option:: hipri : [io_uring]
-
-    If this option is set, fio will attempt to use polled IO completions.
-    Normal IO completions generate interrupts to signal the completion of
-    IO, polled completions do not. Hence they are require active reaping
-    by the application. The benefits are more efficient IO for high IOPS
-    scenarios, and lower latencies for low queue depth IO.
-
 .. option:: registerfiles : [io_uring]
 
 	With this option, fio registers the set of files being used with the
@@ -2271,6 +2267,33 @@ with the caveat that when used on the command line, they must come after the
 	When :option:`sqthread_poll` is set, this option provides a way to
 	define which CPU should be used for the polling thread.
 
+.. option:: hipri
+
+   [io_uring]
+
+        If this option is set, fio will attempt to use polled IO completions.
+        Normal IO completions generate interrupts to signal the completion of
+        IO, polled completions do not. Hence they are require active reaping
+        by the application. The benefits are more efficient IO for high IOPS
+        scenarios, and lower latencies for low queue depth IO.
+
+   [pvsync2]
+
+	Set RWF_HIPRI on I/O, indicating to the kernel that it's of higher priority
+	than normal.
+
+   [sg]
+
+	If this option is set, fio will attempt to use polled IO completions.
+	This will have a similar effect as (io_uring)hipri. Only SCSI READ and
+	WRITE commands will have the SGV4_FLAG_HIPRI set (not UNMAP (trim) nor
+	VERIFY). Older versions of the Linux sg driver that do not support
+	hipri will simply ignore this flag and do normal IO. The Linux SCSI
+	Low Level Driver (LLD) that "owns" the device also needs to support
+	hipri (also known as iopoll and mq_poll). The MegaRAID driver is an
+	example of a SCSI LLD. Default: clear (0) which does normal
+	(interrupted based) IO.
+
 .. option:: userspace_reap : [libaio]
 
 	Normally, with the libaio engine in use, fio will use the
@@ -2279,11 +2302,6 @@ with the caveat that when used on the command line, they must come after the
 	reap events. The reaping mode is only enabled when polling for a minimum of
 	0 events (e.g. when :option:`iodepth_batch_complete` `=0`).
 
-.. option:: hipri : [pvsync2]
-
-	Set RWF_HIPRI on I/O, indicating to the kernel that it's of higher priority
-	than normal.
-
 .. option:: hipri_percentage : [pvsync2]
 
 	When hipri is set this determines the probability of a pvsync2 I/O being high
@@ -2318,6 +2336,16 @@ with the caveat that when used on the command line, they must come after the
 
 	Split the load into cycles of the given time. In microseconds.
 
+.. option:: cpumode=str : [cpuio]
+
+	Specify how to stress the CPU. It can take these two values:
+
+	**noop**
+		This is the default where the CPU executes noop instructions.
+	**qsort**
+		Replace the default noop instructions loop with a qsort algorithm to
+		consume more energy.
+
 .. option:: exit_on_io_done=bool : [cpuio]
 
 	Detect when I/O threads are done, then exit.
@@ -2444,10 +2472,6 @@ with the caveat that when used on the command line, they must come after the
 
 	Specifies the name of the RBD.
 
-.. option:: pool=str : [rbd,rados]
-
-	Specifies the name of the Ceph pool containing RBD or RADOS data.
-
 .. option:: clientname=str : [rbd,rados]
 
 	Specifies the username (without the 'client.' prefix) used to access the
@@ -2466,6 +2490,36 @@ with the caveat that when used on the command line, they must come after the
         Touching all objects affects ceph caches and likely impacts test results.
         Enabled by default.
 
+.. option:: pool=str :
+
+   [rbd,rados]
+
+	Specifies the name of the Ceph pool containing RBD or RADOS data.
+
+   [dfs]
+
+	Specify the label or UUID of the DAOS pool to connect to.
+
+.. option:: cont=str : [dfs]
+
+	Specify the label or UUID of the DAOS container to open.
+
+.. option:: chunk_size=int
+
+   [dfs]
+
+	Specificy a different chunk size (in bytes) for the dfs file.
+	Use DAOS container's chunk size by default.
+
+   [libhdfs]
+
+	The size of the chunk to use for each file.
+
+.. option:: object_class=str : [dfs]
+
+	Specificy a different object class for the dfs file.
+	Use DAOS container's object class by default.
+
 .. option:: skip_bad=bool : [mtd]
 
 	Skip operations against known bad blocks.
@@ -2474,10 +2528,6 @@ with the caveat that when used on the command line, they must come after the
 
 	libhdfs will create chunk in this HDFS directory.
 
-.. option:: chunk_size : [libhdfs]
-
-	The size of the chunk to use for each file.
-
 .. option:: verb=str : [rdma]
 
 	The RDMA verb to use on this side of the RDMA ioengine connection. Valid
@@ -2563,18 +2613,6 @@ with the caveat that when used on the command line, they must come after the
 	a valid stream identifier) fio will open a stream and then close it when done. Default
 	is 0.
 
-.. option:: hipri : [sg]
-
-	If this option is set, fio will attempt to use polled IO completions.
-	This will have a similar effect as (io_uring)hipri. Only SCSI READ and
-	WRITE commands will have the SGV4_FLAG_HIPRI set (not UNMAP (trim) nor
-	VERIFY). Older versions of the Linux sg driver that do not support
-	hipri will simply ignore this flag and do normal IO. The Linux SCSI
-	Low Level Driver (LLD) that "owns" the device also needs to support
-	hipri (also known as iopoll and mq_poll). The MegaRAID driver is an
-	example of a SCSI LLD. Default: clear (0) which does normal
-	(interrupted based) IO.
-
 .. option:: http_host=str : [http]
 
 	Hostname to connect to. For S3, this could be the bucket hostname.
@@ -2654,24 +2692,6 @@ with the caveat that when used on the command line, they must come after the
 		GPU to RAM before a write and copied from RAM to GPU after a
 		read. :option:`verify` does not affect use of cudaMemcpy.
 
-.. option:: pool=str : [dfs]
-
-	Specify the label or UUID of the DAOS pool to connect to.
-
-.. option:: cont=str : [dfs]
-
-	Specify the label or UUID of the DAOS container to open.
-
-.. option:: chunk_size=int : [dfs]
-
-	Specificy a different chunk size (in bytes) for the dfs file.
-	Use DAOS container's chunk size by default.
-
-.. option:: object_class=str : [dfs]
-
-	Specificy a different object class for the dfs file.
-	Use DAOS container's object class by default.
-
 .. option:: nfs_url=str : [nfs]
 
 	URL in libnfs format, eg nfs://<server|ipv4|ipv6>/path[?arg=val[&arg=val]*]
diff --git a/arch/arch-aarch64.h b/arch/arch-aarch64.h
index 2a86cc5a..94571709 100644
--- a/arch/arch-aarch64.h
+++ b/arch/arch-aarch64.h
@@ -27,4 +27,21 @@ static inline int arch_ffz(unsigned long bitmask)
 
 #define ARCH_HAVE_FFZ
 
+static inline unsigned long long get_cpu_clock(void)
+{
+	unsigned long val;
+
+	asm volatile("mrs %0, cntvct_el0" : "=r" (val));
+	return val;
+}
+#define ARCH_HAVE_CPU_CLOCK
+
+#define ARCH_HAVE_INIT
+extern bool tsc_reliable;
+static inline int arch_init(char *envp[])
+{
+	tsc_reliable = true;
+	return 0;
+}
+
 #endif
diff --git a/backend.c b/backend.c
index 061e3b32..c035baed 100644
--- a/backend.c
+++ b/backend.c
@@ -1091,8 +1091,10 @@ static void do_io(struct thread_data *td, uint64_t *bytes_done)
 				td->rate_io_issue_bytes[__ddir] += blen;
 			}
 
-			if (should_check_rate(td))
+			if (should_check_rate(td)) {
 				td->rate_next_io_time[__ddir] = usec_for_io(td, __ddir);
+				fio_gettime(&comp_time, NULL);
+			}
 
 		} else {
 			ret = io_u_submit(td, io_u);
@@ -1172,8 +1174,11 @@ reap:
 								f->file_name);
 			}
 		}
-	} else
+	} else {
+		if (td->o.io_submit_mode == IO_MODE_OFFLOAD)
+			workqueue_flush(&td->io_wq);
 		cleanup_pending_aio(td);
+	}
 
 	/*
 	 * stop job if we failed doing any IO
diff --git a/ci/actions-full-test.sh b/ci/actions-full-test.sh
index 4ae1dba1..91790664 100755
--- a/ci/actions-full-test.sh
+++ b/ci/actions-full-test.sh
@@ -10,6 +10,7 @@ main() {
     else
         sudo python3 t/run-fio-tests.py --skip 6 1007 1008 --debug
     fi
+    make -C doc html
 }
 
 main
diff --git a/ci/actions-install.sh b/ci/actions-install.sh
index b3486a47..0e472717 100755
--- a/ci/actions-install.sh
+++ b/ci/actions-install.sh
@@ -60,6 +60,7 @@ DPKGCFG
     # care about the architecture.
     pkgs+=(
         python3-scipy
+	python3-sphinx
     )
 
     echo "Updating APT..."
@@ -78,7 +79,7 @@ install_macos() {
     #brew update >/dev/null 2>&1
     echo "Installing packages..."
     HOMEBREW_NO_AUTO_UPDATE=1 brew install cunit
-    pip3 install scipy six
+    pip3 install scipy six sphinx
 }
 
 main() {
diff --git a/doc/Makefile b/doc/Makefile
index 3b979f9a..a444d83a 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -2,7 +2,7 @@
 #
 
 # You can set these variables from the command line.
-SPHINXOPTS    =
+SPHINXOPTS    = -W --keep-going
 SPHINXBUILD   = sphinx-build
 PAPER         =
 BUILDDIR      = output
diff --git a/doc/fio_doc.rst b/doc/fio_doc.rst
index 8e1216f0..34e7fde9 100644
--- a/doc/fio_doc.rst
+++ b/doc/fio_doc.rst
@@ -5,7 +5,7 @@ fio - Flexible I/O tester rev. |version|
 .. include:: ../README.rst
 
 
-.. include:: ../HOWTO
+.. include:: ../HOWTO.rst
 
 
 
diff --git a/doc/fio_man.rst b/doc/fio_man.rst
index 44312f16..dc1d1c0d 100644
--- a/doc/fio_man.rst
+++ b/doc/fio_man.rst
@@ -9,4 +9,4 @@ Fio Manpage
 .. include:: ../README.rst
 
 
-.. include:: ../HOWTO
+.. include:: ../HOWTO.rst
diff --git a/fio.1 b/fio.1
index f32d7915..e23d4092 100644
--- a/fio.1
+++ b/fio.1
@@ -2091,6 +2091,19 @@ option when using cpuio I/O engine.
 .BI (cpuio)cpuchunks \fR=\fPint
 Split the load into cycles of the given time. In microseconds.
 .TP
+.BI (cpuio)cpumode \fR=\fPstr
+Specify how to stress the CPU. It can take these two values:
+.RS
+.RS
+.TP
+.B noop
+This is the default and directs the CPU to execute noop instructions.
+.TP
+.B qsort
+Replace the default noop instructions with a qsort algorithm to consume more energy.
+.RE
+.RE
+.TP
 .BI (cpuio)exit_on_io_done \fR=\fPbool
 Detect when I/O threads are done, then exit.
 .TP
diff --git a/os/windows/install.wxs b/os/windows/install.wxs
index 7773bb3b..f2753289 100755
--- a/os/windows/install.wxs
+++ b/os/windows/install.wxs
@@ -33,13 +33,13 @@
 						</Component>
 						<?endif?>
 						<Component>
-							<File Id="README" Name="README.txt" Source="..\..\README"/>
+							<File Id="README" Name="README.txt" Source="..\..\README.rst"/>
 						</Component>
 						<Component>
 							<File Id="REPORTING_BUGS" Name="REPORTING-BUGS.txt" Source="..\..\REPORTING-BUGS"/>
 						</Component>
 						<Component>
-							<File Id="HOWTO" Name="HOWTO.txt" Source="..\..\HOWTO"/>
+							<File Id="HOWTO" Name="HOWTO.txt" Source="..\..\HOWTO.rst"/>
 						</Component>
 						<Component>
 							<File Id="COPYING" Name="COPYING.txt" Source="..\..\COPYING"/>
diff --git a/t/io_uring.c b/t/io_uring.c
index faf5978c..4520de43 100644
--- a/t/io_uring.c
+++ b/t/io_uring.c
@@ -714,12 +714,15 @@ static int reap_events_aio(struct submitter *s, struct io_event *events, int evs
 static void *submitter_aio_fn(void *data)
 {
 	struct submitter *s = data;
-	int i, ret, prepped, nr_batch;
+	int i, ret, prepped;
 	struct iocb **iocbsptr;
 	struct iocb *iocbs;
 	struct io_event *events;
-
-	nr_batch = submitter_init(s);
+#ifdef ARCH_HAVE_CPU_CLOCK
+	int nr_batch = submitter_init(s);
+#else
+	submitter_init(s);
+#endif
 
 	iocbsptr = calloc(depth, sizeof(struct iocb *));
 	iocbs = calloc(depth, sizeof(struct iocb));

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-02-09 13:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-02-09 13:00 UTC (permalink / raw)
  To: fio

The following changes since commit b65c1fc07d4794920224312c56c785de2f3f1692:

  t/io_uring: fix warnings for !ARCH_HAVE_CPU_CLOCK (2022-02-04 09:02:49 -0700)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to df597be63e26ef59c1538b3ce2026c83684ff7fb:

  fio: really use LDFLAGS when linking dynamic engines (2022-02-08 09:28:30 -0700)

----------------------------------------------------------------
Eric Sandeen (1):
      fio: really use LDFLAGS when linking dynamic engines

 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

---

Diff of recent changes:

diff --git a/Makefile b/Makefile
index 2432f519..0ab4f82c 100644
--- a/Makefile
+++ b/Makefile
@@ -295,7 +295,7 @@ define engine_template =
 $(1)_OBJS := $$($(1)_SRCS:.c=.o)
 $$($(1)_OBJS): CFLAGS := -fPIC $$($(1)_CFLAGS) $(CFLAGS)
 engines/fio-$(1).so: $$($(1)_OBJS)
-	$$(QUIET_LINK)$(CC) $(DYNAMIC) -shared -rdynamic -fPIC -Wl,-soname,fio-$(1).so.1 -o $$@ $$< $$($(1)_LIBS)
+	$$(QUIET_LINK)$(CC) $(LDFLAGS) -shared -rdynamic -fPIC -Wl,-soname,fio-$(1).so.1 -o $$@ $$< $$($(1)_LIBS)
 ENGS_OBJS += engines/fio-$(1).so
 endef
 else # !CONFIG_DYNAMIC_ENGINES

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-02-05 13:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-02-05 13:00 UTC (permalink / raw)
  To: fio

The following changes since commit 62e9ece4d540ff2af865e4b43811f3150b8b846b:

  fio: use correct function declaration for set_epoch_time() (2022-02-03 16:06:59 -0700)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to b65c1fc07d4794920224312c56c785de2f3f1692:

  t/io_uring: fix warnings for !ARCH_HAVE_CPU_CLOCK (2022-02-04 09:02:49 -0700)

----------------------------------------------------------------
Jens Axboe (1):
      t/io_uring: fix warnings for !ARCH_HAVE_CPU_CLOCK

Niklas Cassel (1):
      stat: make free_clat_prio_stats() safe against NULL

 stat.c       |  3 +++
 t/io_uring.c | 11 ++++++++---
 2 files changed, 11 insertions(+), 3 deletions(-)

---

Diff of recent changes:

diff --git a/stat.c b/stat.c
index 0876222a..1764eebc 100644
--- a/stat.c
+++ b/stat.c
@@ -2041,6 +2041,9 @@ void free_clat_prio_stats(struct thread_stat *ts)
 {
 	enum fio_ddir ddir;
 
+	if (!ts)
+		return;
+
 	for (ddir = 0; ddir < DDIR_RWDIR_CNT; ddir++) {
 		sfree(ts->clat_prio[ddir]);
 		ts->clat_prio[ddir] = NULL;
diff --git a/t/io_uring.c b/t/io_uring.c
index e8365a79..faf5978c 100644
--- a/t/io_uring.c
+++ b/t/io_uring.c
@@ -287,6 +287,7 @@ out:
 	free(ovals);
 }
 
+#ifdef ARCH_HAVE_CPU_CLOCK
 static unsigned int plat_val_to_idx(unsigned long val)
 {
 	unsigned int msb, error_bits, base, offset, idx;
@@ -322,6 +323,7 @@ static unsigned int plat_val_to_idx(unsigned long val)
 
 	return idx;
 }
+#endif
 
 static void add_stat(struct submitter *s, int clock_index, int nr)
 {
@@ -789,9 +791,12 @@ static void *submitter_uring_fn(void *data)
 {
 	struct submitter *s = data;
 	struct io_sq_ring *ring = &s->sq_ring;
-	int ret, prepped, nr_batch;
-
-	nr_batch = submitter_init(s);
+	int ret, prepped;
+#ifdef ARCH_HAVE_CPU_CLOCK
+	int nr_batch = submitter_init(s);
+#else
+	submitter_init(s);
+#endif
 
 	prepped = 0;
 	do {

^ permalink raw reply related	[flat|nested] 1180+ messages in thread

* Recent changes (master)
@ 2022-02-04 13:00 Jens Axboe
  0 siblings, 0 replies; 1180+ messages in thread
From: Jens Axboe @ 2022-02-04 13:00 UTC (permalink / raw)
  To: fio

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain, Size: 132470 bytes --]

The following changes since commit 52a0b9ed71c3e929461e64b39059281948107071:

  Merge branch 'patch-1' of https://github.com/Nikratio/fio (2022-01-28 14:50:51 -0700)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 62e9ece4d540ff2af865e4b43811f3150b8b846b:

  fio: use correct function declaration for set_epoch_time() (2022-02-03 16:06:59 -0700)

----------------------------------------------------------------
David Korczynski (1):
      ci/Github actions: add CIFuzz integration

Jens Axboe (6):
      Merge branch 'master' of https://github.com/blah325/fio
      server: fix formatting issue
      Merge branch 'freebsd-comment-update' of https://github.com/macdice/fio
      Merge branch 'cifuzz-integration' of https://github.com/DavidKorczynski/fio
      Merge branch 'fio_pr_alternate_epoch' of https://github.com/PCPartPicker/fio
      fio: use correct function declaration for set_epoch_time()

Niklas Cassel (18):
      init: verify option lat_percentiles consistency for all jobs in group
      backend: do ioprio_set() before calling the ioengine init callback
      stat: save the default ioprio in struct thread_stat
      client/server: convert ss_data to use an offset instead of fixed position
      stat: add a new function to allocate a clat_prio_stat array
      os: define min/max prio class and level for systems without ioprio
      options: add a parsing function for an additional cmdprio_bssplit format
      cmdprio: add support for a new cmdprio_bssplit entry format
      examples: add new cmdprio_bssplit format examples
      stat: use enum fio_ddir consistently
      stat: increment members counter after call to sum_thread_stats()
      stat: add helper for resetting the latency buckets
      stat: disable per prio stats where not needed
      stat: report clat stats on a per priority granularity
      stat: convert json output to a new per priority granularity format
      gfio: drop support for high/low priority latency results
      stat: remove unused high/low prio struct members
      t/latency_percentiles.py: add tests for the new cmdprio_bssplit format

Thomas Munro (1):
      Update comments about availability of fdatasync().

aggieNick02 (1):
      Support for alternate epochs in fio log files

james rizzo (3):
      Avoid client calls to recv() without prior poll()
      Add Windows support for --server.
      Added a new windows only IO engine option “no_completion_thread”.

 .github/workflows/cifuzz.yml |  24 ++
 HOWTO                        |  41 +++-
 backend.c                    |  27 ++-
 cconv.c                      |   4 +
 client.c                     |  48 ++--
 engines/cmdprio.c            | 440 +++++++++++++++++++++++++++++------
 engines/cmdprio.h            |  22 +-
 engines/filecreate.c         |   2 +-
 engines/filedelete.c         |   2 +-
 engines/filestat.c           |   2 +-
 engines/windowsaio.c         | 134 +++++++++--
 examples/cmdprio-bssplit.fio |  39 +++-
 fio.1                        |  45 +++-
 fio.h                        |   2 +-
 fio_time.h                   |   2 +-
 gclient.c                    |  55 +----
 init.c                       |  37 +++
 io_u.c                       |   7 +-
 io_u.h                       |   3 +-
 libfio.c                     |   2 +-
 optgroup.h                   |   2 +
 options.c                    | 140 ++++++++++++
 os/os-windows.h              |   2 +
 os/os.h                      |   4 +
 os/windows/posix.c           | 182 ++++++++++++++-
 rate-submit.c                |  11 +-
 server.c                     | 369 +++++++++++++++++++++++++++---
 server.h                     |   7 +-
 stat.c                       | 531 ++++++++++++++++++++++++++++++++++---------
 stat.h                       |  40 +++-
 t/latency_percentiles.py     | 211 ++++++++++-------
 thread_options.h             |  14 ++
 time.c                       |  12 +-
 33 files changed, 2019 insertions(+), 444 deletions(-)
 create mode 100644 .github/workflows/cifuzz.yml

---

Diff of recent changes:

diff --git a/.github/workflows/cifuzz.yml b/.github/workflows/cifuzz.yml
new file mode 100644
index 00000000..acc8d482
--- /dev/null
+++ b/.github/workflows/cifuzz.yml
@@ -0,0 +1,24 @@
+name: CIFuzz
+on: [pull_request]
+jobs:
+  Fuzzing:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Build Fuzzers
+      id: build
+      uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
+      with:
+        oss-fuzz-project-name: 'fio'
+        dry-run: false
+    - name: Run Fuzzers
+      uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
+      with:
+        oss-fuzz-project-name: 'fio'
+        fuzz-seconds: 600
+        dry-run: false
+    - name: Upload Crash
+      uses: actions/upload-artifact@v1
+      if: failure() && steps.build.outcome == 'success'
+      with:
+        name: artifacts
+        path: ./out/artifacts
diff --git a/HOWTO b/HOWTO
index c72ec8cd..74ba7216 100644
--- a/HOWTO
+++ b/HOWTO
@@ -1344,7 +1344,7 @@ I/O type
 .. option:: fdatasync=int
 
 	Like :option:`fsync` but uses :manpage:`fdatasync(2)` to only sync data and
-	not metadata blocks. In Windows, FreeBSD, DragonFlyBSD or OSX there is no
+	not metadata blocks. In Windows, DragonFlyBSD or OSX there is no
 	:manpage:`fdatasync(2)` so this falls back to using :manpage:`fsync(2)`.
 	Defaults to 0, which means fio does not periodically issue and wait for a
 	data-only sync to complete.
@@ -2212,10 +2212,28 @@ with the caveat that when used on the command line, they must come after the
 	depending on the block size of the IO. This option is useful only
 	when used together with the :option:`bssplit` option, that is,
 	multiple different block sizes are used for reads and writes.
-	The format for this option is the same as the format of the
-	:option:`bssplit` option, with the exception that values for
-	trim IOs are ignored. This option is mutually exclusive with the
-	:option:`cmdprio_percentage` option.
+
+	The first accepted format for this option is the same as the format of
+	the :option:`bssplit` option:
+
+		cmdprio_bssplit=blocksize/percentage:blocksize/percentage
+
+	In this case, each entry will use the priority class and priority
+	level defined by the options :option:`cmdprio_class` and
+	:option:`cmdprio` respectively.
+
+	The second accepted format for this option is:
+
+		cmdprio_bssplit=blocksize/percentage/class/level:blocksize/percentage/class/level
+
+	In this case, the priority class and priority level is defined inside
+	each entry. In comparison with the first accepted format, the second
+	accepted format does not restrict all entries to have the same priority
+	class and priority level.
+
+	For both formats, only the read and write data directions are supported,
+	values for trim IOs are ignored. This option is mutually exclusive with
+	the :option:`cmdprio_percentage` option.
 
 .. option:: fixedbufs : [io_uring]
 
@@ -3663,6 +3681,19 @@ Measurements and reporting
 	write_type_log for each log type, instead of the default zero-based
 	timestamps.
 
+.. option:: log_alternate_epoch=bool
+
+	If set, fio will log timestamps based on the epoch used by the clock specified
+	in the log_alternate_epoch_clock_id option, to the log files produced by
+	enabling write_type_log for each log type, instead of the default zero-based
+	timestamps.
+
+.. option:: log_alternate_epoch_clock_id=int
+
+	Specifies the clock_id to be used by clock_gettime to obtain the alternate epoch
+	if either log_unix_epoch or log_alternate_epoch are true. Otherwise has no
+	effect. Default value is 0, or CLOCK_REALTIME.
+
 .. option:: block_error_percentiles=bool
 
 	If set, record errors in trim block-sized units from writes and trims and
diff --git a/backend.c b/backend.c
index c167f908..061e3b32 100644
--- a/backend.c
+++ b/backend.c
@@ -1777,6 +1777,18 @@ static void *thread_main(void *data)
 	if (!init_iolog(td))
 		goto err;
 
+	/* ioprio_set() has to be done before td_io_init() */
+	if (fio_option_is_set(o, ioprio) ||
+	    fio_option_is_set(o, ioprio_class)) {
+		ret = ioprio_set(IOPRIO_WHO_PROCESS, 0, o->ioprio_class, o->ioprio);
+		if (ret == -1) {
+			td_verror(td, errno, "ioprio_set");
+			goto err;
+		}
+		td->ioprio = ioprio_value(o->ioprio_class, o->ioprio);
+		td->ts.ioprio = td->ioprio;
+	}
+
 	if (td_io_init(td))
 		goto err;
 
@@ -1789,16 +1801,6 @@ static void *thread_main(void *data)
 	if (o->verify_async && verify_async_init(td))
 		goto err;
 
-	if (fio_option_is_set(o, ioprio) ||
-	    fio_option_is_set(o, ioprio_class)) {
-		ret = ioprio_set(IOPRIO_WHO_PROCESS, 0, o->ioprio_class, o->ioprio);
-		if (ret == -1) {
-			td_verror(td, errno, "ioprio_set");
-			goto err;
-		}
-		td->ioprio = ioprio_value(o->ioprio_class, o->ioprio);
-	}
-
 	if (o->cgroup && cgroup_setup(td, cgroup_list, &cgroup_mnt))
 		goto err;
 
@@ -1828,7 +1830,7 @@ static void *thread_main(void *data)
 	if (rate_submit_init(td, sk_out))
 		goto err;
 
-	set_epoch_time(td, o->log_unix_epoch);
+	set_epoch_time(td, o->log_unix_epoch | o->log_alternate_epoch, o->log_alternate_epoch_clock_id);
 	fio_getrusage(&td->ru_start);
 	memcpy(&td->bw_sample_time, &td->epoch, sizeof(td->epoch));
 	memcpy(&td->iops_sample_time, &td->epoch, sizeof(td->epoch));
@@ -2611,6 +2613,9 @@ int fio_backend(struct sk_out *sk_out)
 	}
 
 	for_each_td(td, i) {
+		struct thread_stat *ts = &td->ts;
+
+		free_clat_prio_stats(ts);
 		steadystate_free(td);
 		fio_options_free(td);
 		fio_dump_options_free(td);
diff --git a/cconv.c b/cconv.c
index 4f8d27eb..62d02e36 100644
--- a/cconv.c
+++ b/cconv.c
@@ -197,6 +197,8 @@ void convert_thread_options_to_cpu(struct thread_options *o,
 	o->log_gz = le32_to_cpu(top->log_gz);
 	o->log_gz_store = le32_to_cpu(top->log_gz_store);
 	o->log_unix_epoch = le32_to_cpu(top->log_unix_epoch);
+	o->log_alternate_epoch = le32_to_cpu(top->log_alternate_epoch);
+	o->log_alternate_epoch_clock_id = le32_to_cpu(top->log_alternate_epoch_clock_id);
 	o->norandommap = le32_to_cpu(top->norandommap);
 	o->softrandommap = le32_to_cpu(top->softrandommap);
 	o->bs_unaligned = le32_to_cpu(top->bs_unaligned);
@@ -425,6 +427,8 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
 	top->log_gz = cpu_to_le32(o->log_gz);
 	top->log_gz_store = cpu_to_le32(o->log_gz_store);
 	top->log_unix_epoch = cpu_to_le32(o->log_unix_epoch);
+	top->log_alternate_epoch = cpu_to_le32(o->log_alternate_epoch);
+	top->log_alternate_epoch_clock_id = cpu_to_le32(o->log_alternate_epoch_clock_id);
 	top->norandommap = cpu_to_le32(o->norandommap);
 	top->softrandommap = cpu_to_le32(o->softrandommap);
 	top->bs_unaligned = cpu_to_le32(o->bs_unaligned);
diff --git a/client.c b/client.c
index be8411d8..605a3ce5 100644
--- a/client.c
+++ b/client.c
@@ -284,9 +284,10 @@ static int fio_client_dec_jobs_eta(struct client_eta *eta, client_eta_op eta_fn)
 static void fio_drain_client_text(struct fio_client *client)
 {
 	do {
-		struct fio_net_cmd *cmd;
+		struct fio_net_cmd *cmd = NULL;
 
-		cmd = fio_net_recv_cmd(client->fd, false);
+		if (fio_server_poll_fd(client->fd, POLLIN, 0))
+			cmd = fio_net_recv_cmd(client->fd, false);
 		if (!cmd)
 			break;
 
@@ -953,6 +954,8 @@ static void convert_ts(struct thread_stat *dst, struct thread_stat *src)
 	dst->pid		= le32_to_cpu(src->pid);
 	dst->members		= le32_to_cpu(src->members);
 	dst->unified_rw_rep	= le32_to_cpu(src->unified_rw_rep);
+	dst->ioprio		= le32_to_cpu(src->ioprio);
+	dst->disable_prio_stat	= le32_to_cpu(src->disable_prio_stat);
 
 	for (i = 0; i < DDIR_RWDIR_CNT; i++) {
 		convert_io_stat(&dst->clat_stat[i], &src->clat_stat[i]);
@@ -1035,14 +1038,6 @@ static void convert_ts(struct thread_stat *dst, struct thread_stat *src)
 	dst->nr_block_infos	= le64_to_cpu(src->nr_block_infos);
 	for (i = 0; i < dst->nr_block_infos; i++)
 		dst->block_infos[i] = le32_to_cpu(src->block_infos[i]);
-	for (i = 0; i < DDIR_RWDIR_CNT; i++) {
-		for (j = 0; j < FIO_IO_U_PLAT_NR; j++) {
-			dst->io_u_plat_high_prio[i][j] = le64_to_cpu(src->io_u_plat_high_prio[i][j]);
-			dst->io_u_plat_low_prio[i][j] = le64_to_cpu(src->io_u_plat_low_prio[i][j]);
-		}
-		convert_io_stat(&dst->clat_high_prio_stat[i], &src->clat_high_prio_stat[i]);
-		convert_io_stat(&dst->clat_low_prio_stat[i], &src->clat_low_prio_stat[i]);
-	}
 
 	dst->ss_dur		= le64_to_cpu(src->ss_dur);
 	dst->ss_state		= le32_to_cpu(src->ss_state);
@@ -1052,6 +1047,19 @@ static void convert_ts(struct thread_stat *dst, struct thread_stat *src)
 	dst->ss_deviation.u.f 	= fio_uint64_to_double(le64_to_cpu(src->ss_deviation.u.i));
 	dst->ss_criterion.u.f 	= fio_uint64_to_double(le64_to_cpu(src->ss_criterion.u.i));
 
+	for (i = 0; i < DDIR_RWDIR_CNT; i++) {
+		dst->nr_clat_prio[i] = le32_to_cpu(src->nr_clat_prio[i]);
+		for (j = 0; j < dst->nr_clat_prio[i]; j++) {
+			for (k = 0; k < FIO_IO_U_PLAT_NR; k++)
+				dst->clat_prio[i][j].io_u_plat[k] =
+					le64_to_cpu(src->clat_prio[i][j].io_u_plat[k]);
+			convert_io_stat(&dst->clat_prio[i][j].clat_stat,
+					&src->clat_prio[i][j].clat_stat);
+			dst->clat_prio[i][j].ioprio =
+				le32_to_cpu(dst->clat_prio[i][j].ioprio);
+		}
+	}
+
 	if (dst->ss_state & FIO_SS_DATA) {
 		for (i = 0; i < dst->ss_dur; i++ ) {
 			dst->ss_iops_data[i] = le64_to_cpu(src->ss_iops_data[i]);
@@ -1760,7 +1768,6 @@ int fio_handle_client(struct fio_client *client)
 {
 	struct client_ops *ops = client->ops;
 	struct fio_net_cmd *cmd;
-	int size;
 
 	dprint(FD_NET, "client: handle %s\n", client->hostname);
 
@@ -1794,14 +1801,26 @@ int fio_handle_client(struct fio_client *client)
 		}
 	case FIO_NET_CMD_TS: {
 		struct cmd_ts_pdu *p = (struct cmd_ts_pdu *) cmd->payload;
+		uint64_t offset;
+		int i;
+
+		for (i = 0; i < DDIR_RWDIR_CNT; i++) {
+			if (le32_to_cpu(p->ts.nr_clat_prio[i])) {
+				offset = le64_to_cpu(p->ts.clat_prio_offset[i]);
+				p->ts.clat_prio[i] =
+					(struct clat_prio_stat *)((char *)p + offset);
+			}
+		}
 
 		dprint(FD_NET, "client: ts->ss_state = %u\n", (unsigned int) le32_to_cpu(p->ts.ss_state));
 		if (le32_to_cpu(p->ts.ss_state) & FIO_SS_DATA) {
 			dprint(FD_NET, "client: received steadystate ring buffers\n");
 
-			size = le64_to_cpu(p->ts.ss_dur);
-			p->ts.ss_iops_data = (uint64_t *) ((struct cmd_ts_pdu *)cmd->payload + 1);
-			p->ts.ss_bw_data = p->ts.ss_iops_data + size;
+			offset = le64_to_cpu(p->ts.ss_iops_data_offset);
+			p->ts.ss_iops_data = (uint64_t *)((char *)p + offset);
+
+			offset = le64_to_cpu(p->ts.ss_bw_data_offset);
+			p->ts.ss_bw_data = (uint64_t *)((char *)p + offset);
 		}
 
 		convert_ts(&p->ts, &p->ts);
@@ -2152,6 +2171,7 @@ int fio_handle_clients(struct client_ops *ops)
 
 	fio_client_json_fini();
 
+	free_clat_prio_stats(&client_ts);
 	free(pfds);
 	return retval || error_clients;
 }
diff --git a/engines/cmdprio.c b/engines/cmdprio.c
index 92b752ae..dd358754 100644
--- a/engines/cmdprio.c
+++ b/engines/cmdprio.c
@@ -5,45 +5,201 @@
 
 #include "cmdprio.h"
 
-static int fio_cmdprio_bssplit_ddir(struct thread_options *to, void *cb_arg,
-				    enum fio_ddir ddir, char *str, bool data)
+/*
+ * Temporary array used during parsing. Will be freed after the corresponding
+ * struct bsprio_desc has been generated and saved in cmdprio->bsprio_desc.
+ */
+struct cmdprio_parse_result {
+	struct split_prio *entries;
+	int nr_entries;
+};
+
+/*
+ * Temporary array used during init. Will be freed after the corresponding
+ * struct clat_prio_stat array has been saved in td->ts.clat_prio and the
+ * matching clat_prio_indexes have been saved in each struct cmdprio_prio.
+ */
+struct cmdprio_values {
+	unsigned int *prios;
+	int nr_prios;
+};
+
+static int find_clat_prio_index(unsigned int *all_prios, int nr_prios,
+				int32_t prio)
 {
-	struct cmdprio *cmdprio = cb_arg;
-	struct split split;
-	unsigned int i;
+	int i;
 
-	if (ddir == DDIR_TRIM)
-		return 0;
+	for (i = 0; i < nr_prios; i++) {
+		if (all_prios[i] == prio)
+			return i;
+	}
 
-	memset(&split, 0, sizeof(split));
+	return -1;
+}
 
-	if (split_parse_ddir(to, &split, str, data, BSSPLIT_MAX))
+/**
+ * assign_clat_prio_index - In order to avoid stat.c the need to loop through
+ * all possible priorities each time add_clat_sample() / add_lat_sample() is
+ * called, save which index to use in each cmdprio_prio. This will later be
+ * propagated to the io_u, if the specific io_u was determined to use a cmdprio
+ * priority value.
+ */
+static void assign_clat_prio_index(struct cmdprio_prio *prio,
+				   struct cmdprio_values *values)
+{
+	int clat_prio_index = find_clat_prio_index(values->prios,
+						   values->nr_prios,
+						   prio->prio);
+	if (clat_prio_index == -1) {
+		clat_prio_index = values->nr_prios;
+		values->prios[clat_prio_index] = prio->prio;
+		values->nr_prios++;
+	}
+	prio->clat_prio_index = clat_prio_index;
+}
+
+/**
+ * init_cmdprio_values - Allocate a temporary array that can hold all unique
+ * priorities (per ddir), so that we can assign_clat_prio_index() for each
+ * cmdprio_prio during setup. This temporary array is freed after setup.
+ */
+static int init_cmdprio_values(struct cmdprio_values *values,
+			       int max_unique_prios, struct thread_stat *ts)
+{
+	values->prios = calloc(max_unique_prios + 1,
+			       sizeof(*values->prios));
+	if (!values->prios)
 		return 1;
-	if (!split.nr)
-		return 0;
 
-	cmdprio->bssplit_nr[ddir] = split.nr;
-	cmdprio->bssplit[ddir] = malloc(split.nr * sizeof(struct bssplit));
-	if (!cmdprio->bssplit[ddir])
+	/* td->ioprio/ts->ioprio is always stored at index 0. */
+	values->prios[0] = ts->ioprio;
+	values->nr_prios++;
+
+	return 0;
+}
+
+/**
+ * init_ts_clat_prio - Allocates and fills a clat_prio_stat array which holds
+ * all unique priorities (per ddir).
+ */
+static int init_ts_clat_prio(struct thread_stat *ts, enum fio_ddir ddir,
+			     struct cmdprio_values *values)
+{
+	int i;
+
+	if (alloc_clat_prio_stat_ddir(ts, ddir, values->nr_prios))
 		return 1;
 
-	for (i = 0; i < split.nr; i++) {
-		cmdprio->bssplit[ddir][i].bs = split.val1[i];
-		if (split.val2[i] == -1U) {
-			cmdprio->bssplit[ddir][i].perc = 0;
-		} else {
-			if (split.val2[i] > 100)
-				cmdprio->bssplit[ddir][i].perc = 100;
-			else
-				cmdprio->bssplit[ddir][i].perc = split.val2[i];
+	for (i = 0; i < values->nr_prios; i++)
+		ts->clat_prio[ddir][i].ioprio = values->prios[i];
+
+	return 0;
+}
+
+static int fio_cmdprio_fill_bsprio(struct cmdprio_bsprio *bsprio,
+				   struct split_prio *entries,
+				   struct cmdprio_values *values,
+				   int implicit_cmdprio, int start, int end)
+{
+	struct cmdprio_prio *prio;
+	int i = end - start + 1;
+
+	bsprio->prios = calloc(i, sizeof(*bsprio->prios));
+	if (!bsprio->prios)
+		return 1;
+
+	bsprio->bs = entries[start].bs;
+	bsprio->nr_prios = 0;
+	for (i = start; i <= end; i++) {
+		prio = &bsprio->prios[bsprio->nr_prios];
+		prio->perc = entries[i].perc;
+		if (entries[i].prio == -1)
+			prio->prio = implicit_cmdprio;
+		else
+			prio->prio = entries[i].prio;
+		assign_clat_prio_index(prio, values);
+		bsprio->tot_perc += entries[i].perc;
+		if (bsprio->tot_perc > 100) {
+			log_err("fio: cmdprio_bssplit total percentage "
+				"for bs: %"PRIu64" exceeds 100\n",
+				bsprio->bs);
+			free(bsprio->prios);
+			return 1;
 		}
+		bsprio->nr_prios++;
+	}
+
+	return 0;
+}
+
+static int
+fio_cmdprio_generate_bsprio_desc(struct cmdprio_bsprio_desc *bsprio_desc,
+				 struct cmdprio_parse_result *parse_res,
+				 struct cmdprio_values *values,
+				 int implicit_cmdprio)
+{
+	struct split_prio *entries = parse_res->entries;
+	int nr_entries = parse_res->nr_entries;
+	struct cmdprio_bsprio *bsprio;
+	int i, start, count = 0;
+
+	/*
+	 * The parsed result is sorted by blocksize, so count only the number
+	 * of different blocksizes, to know how many cmdprio_bsprio we need.
+	 */
+	for (i = 0; i < nr_entries; i++) {
+		while (i + 1 < nr_entries && entries[i].bs == entries[i + 1].bs)
+			i++;
+		count++;
+	}
+
+	/*
+	 * This allocation is not freed on error. Instead, the calling function
+	 * is responsible for calling fio_cmdprio_cleanup() on error.
+	 */
+	bsprio_desc->bsprios = calloc(count, sizeof(*bsprio_desc->bsprios));
+	if (!bsprio_desc->bsprios)
+		return 1;
+
+	start = 0;
+	bsprio_desc->nr_bsprios = 0;
+	for (i = 0; i < nr_entries; i++) {
+		while (i + 1 < nr_entries && entries[i].bs == entries[i + 1].bs)
+			i++;
+		bsprio = &bsprio_desc->bsprios[bsprio_desc->nr_bsprios];
+		/*
+		 * All parsed entries with the same blocksize get saved in the
+		 * same cmdprio_bsprio, to expedite the search in the hot path.
+		 */
+		if (fio_cmdprio_fill_bsprio(bsprio, entries, values,
+					    implicit_cmdprio, start, i))
+			return 1;
+
+		start = i + 1;
+		bsprio_desc->nr_bsprios++;
 	}
 
 	return 0;
 }
 
-int fio_cmdprio_bssplit_parse(struct thread_data *td, const char *input,
-			      struct cmdprio *cmdprio)
+static int fio_cmdprio_bssplit_ddir(struct thread_options *to, void *cb_arg,
+				    enum fio_ddir ddir, char *str, bool data)
+{
+	struct cmdprio_parse_result *parse_res_arr = cb_arg;
+	struct cmdprio_parse_result *parse_res = &parse_res_arr[ddir];
+
+	if (ddir == DDIR_TRIM)
+		return 0;
+
+	if (split_parse_prio_ddir(to, &parse_res->entries,
+				  &parse_res->nr_entries, str))
+		return 1;
+
+	return 0;
+}
+
+static int fio_cmdprio_bssplit_parse(struct thread_data *td, const char *input,
+				     struct cmdprio_parse_result *parse_res)
 {
 	char *str, *p;
 	int ret = 0;
@@ -53,26 +209,39 @@ int fio_cmdprio_bssplit_parse(struct thread_data *td, const char *input,
 	strip_blank_front(&str);
 	strip_blank_end(str);
 
-	ret = str_split_parse(td, str, fio_cmdprio_bssplit_ddir, cmdprio,
+	ret = str_split_parse(td, str, fio_cmdprio_bssplit_ddir, parse_res,
 			      false);
 
 	free(p);
 	return ret;
 }
 
-static int fio_cmdprio_percentage(struct cmdprio *cmdprio, struct io_u *io_u)
+/**
+ * fio_cmdprio_percentage - Returns the percentage of I/Os that should
+ * use a cmdprio priority value (rather than the default context priority).
+ *
+ * For CMDPRIO_MODE_BSSPLIT, if the percentage is non-zero, we will also
+ * return the matching bsprio, to avoid the same linear search elsewhere.
+ * For CMDPRIO_MODE_PERC, we will never return a bsprio.
+ */
+static int fio_cmdprio_percentage(struct cmdprio *cmdprio, struct io_u *io_u,
+				  struct cmdprio_bsprio **bsprio)
 {
+	struct cmdprio_bsprio *bsprio_entry;
 	enum fio_ddir ddir = io_u->ddir;
-	struct cmdprio_options *options = cmdprio->options;
 	int i;
 
 	switch (cmdprio->mode) {
 	case CMDPRIO_MODE_PERC:
-		return options->percentage[ddir];
+		*bsprio = NULL;
+		return cmdprio->perc_entry[ddir].perc;
 	case CMDPRIO_MODE_BSSPLIT:
-		for (i = 0; i < cmdprio->bssplit_nr[ddir]; i++) {
-			if (cmdprio->bssplit[ddir][i].bs == io_u->buflen)
-				return cmdprio->bssplit[ddir][i].perc;
+		for (i = 0; i < cmdprio->bsprio_desc[ddir].nr_bsprios; i++) {
+			bsprio_entry = &cmdprio->bsprio_desc[ddir].bsprios[i];
+			if (bsprio_entry->bs == io_u->buflen) {
+				*bsprio = bsprio_entry;
+				return bsprio_entry->tot_perc;
+			}
 		}
 		break;
 	default:
@@ -83,6 +252,11 @@ static int fio_cmdprio_percentage(struct cmdprio *cmdprio, struct io_u *io_u)
 		assert(0);
 	}
 
+	/*
+	 * This is totally fine, the given blocksize simply does not
+	 * have any (non-zero) cmdprio_bssplit entries defined.
+	 */
+	*bsprio = NULL;
 	return 0;
 }
 
@@ -100,52 +274,162 @@ static int fio_cmdprio_percentage(struct cmdprio *cmdprio, struct io_u *io_u)
 bool fio_cmdprio_set_ioprio(struct thread_data *td, struct cmdprio *cmdprio,
 			    struct io_u *io_u)
 {
-	enum fio_ddir ddir = io_u->ddir;
-	struct cmdprio_options *options = cmdprio->options;
-	unsigned int p;
-	unsigned int cmdprio_value =
-		ioprio_value(options->class[ddir], options->level[ddir]);
-
-	p = fio_cmdprio_percentage(cmdprio, io_u);
-	if (p && rand_between(&td->prio_state, 0, 99) < p) {
-		io_u->ioprio = cmdprio_value;
-		if (!td->ioprio || cmdprio_value < td->ioprio) {
-			/*
-			 * The async IO priority is higher (has a lower value)
-			 * than the default priority (which is either 0 or the
-			 * value set by "prio" and "prioclass" options).
-			 */
-			io_u->flags |= IO_U_F_HIGH_PRIO;
-		}
+	struct cmdprio_bsprio *bsprio;
+	unsigned int p, rand;
+	uint32_t perc = 0;
+	int i;
+
+	p = fio_cmdprio_percentage(cmdprio, io_u, &bsprio);
+	if (!p)
+		return false;
+
+	rand = rand_between(&td->prio_state, 0, 99);
+	if (rand >= p)
+		return false;
+
+	switch (cmdprio->mode) {
+	case CMDPRIO_MODE_PERC:
+		io_u->ioprio = cmdprio->perc_entry[io_u->ddir].prio;
+		io_u->clat_prio_index =
+			cmdprio->perc_entry[io_u->ddir].clat_prio_index;
 		return true;
+	case CMDPRIO_MODE_BSSPLIT:
+		assert(bsprio);
+		for (i = 0; i < bsprio->nr_prios; i++) {
+			struct cmdprio_prio *prio = &bsprio->prios[i];
+
+			perc += prio->perc;
+			if (rand < perc) {
+				io_u->ioprio = prio->prio;
+				io_u->clat_prio_index = prio->clat_prio_index;
+				return true;
+			}
+		}
+		break;
+	default:
+		assert(0);
 	}
 
-	if (td->ioprio && td->ioprio < cmdprio_value) {
+	/* When rand < p (total perc), we should always find a cmdprio_prio. */
+	assert(0);
+	return false;
+}
+
+static int fio_cmdprio_gen_perc(struct thread_data *td, struct cmdprio *cmdprio)
+{
+	struct cmdprio_options *options = cmdprio->options;
+	struct cmdprio_prio *prio;
+	struct cmdprio_values values[CMDPRIO_RWDIR_CNT] = {0};
+	struct thread_stat *ts = &td->ts;
+	enum fio_ddir ddir;
+	int ret;
+
+	for (ddir = 0; ddir < CMDPRIO_RWDIR_CNT; ddir++) {
 		/*
-		 * The IO will be executed with the default priority (which is
-		 * either 0 or the value set by "prio" and "prioclass options),
-		 * and this priority is higher (has a lower value) than the
-		 * async IO priority.
+		 * Do not allocate a clat_prio array nor set the cmdprio struct
+		 * if zero percent of the I/Os (for the ddir) should use a
+		 * cmdprio priority value, or when the ddir is not enabled.
 		 */
-		io_u->flags |= IO_U_F_HIGH_PRIO;
+		if (!options->percentage[ddir] ||
+		    (ddir == DDIR_READ && !td_read(td)) ||
+		    (ddir == DDIR_WRITE && !td_write(td)))
+			continue;
+
+		ret = init_cmdprio_values(&values[ddir], 1, ts);
+		if (ret)
+			goto err;
+
+		prio = &cmdprio->perc_entry[ddir];
+		prio->perc = options->percentage[ddir];
+		prio->prio = ioprio_value(options->class[ddir],
+					  options->level[ddir]);
+		assign_clat_prio_index(prio, &values[ddir]);
+
+		ret = init_ts_clat_prio(ts, ddir, &values[ddir]);
+		if (ret)
+			goto err;
+
+		free(values[ddir].prios);
+		values[ddir].prios = NULL;
+		values[ddir].nr_prios = 0;
 	}
 
-	return false;
+	return 0;
+
+err:
+	for (ddir = 0; ddir < CMDPRIO_RWDIR_CNT; ddir++)
+		free(values[ddir].prios);
+	free_clat_prio_stats(ts);
+
+	return ret;
 }
 
 static int fio_cmdprio_parse_and_gen_bssplit(struct thread_data *td,
 					     struct cmdprio *cmdprio)
 {
 	struct cmdprio_options *options = cmdprio->options;
-	int ret;
-
-	ret = fio_cmdprio_bssplit_parse(td, options->bssplit_str, cmdprio);
+	struct cmdprio_parse_result parse_res[CMDPRIO_RWDIR_CNT] = {0};
+	struct cmdprio_values values[CMDPRIO_RWDIR_CNT] = {0};
+	struct thread_stat *ts = &td->ts;
+	int ret, implicit_cmdprio;
+	enum fio_ddir ddir;
+
+	ret = fio_cmdprio_bssplit_parse(td, options->bssplit_str,
+					&parse_res[0]);
 	if (ret)
 		goto err;
 
+	for (ddir = 0; ddir < CMDPRIO_RWDIR_CNT; ddir++) {
+		/*
+		 * Do not allocate a clat_prio array nor set the cmdprio structs
+		 * if there are no non-zero entries (for the ddir), or when the
+		 * ddir is not enabled.
+		 */
+		if (!parse_res[ddir].nr_entries ||
+		    (ddir == DDIR_READ && !td_read(td)) ||
+		    (ddir == DDIR_WRITE && !td_write(td))) {
+			free(parse_res[ddir].entries);
+			parse_res[ddir].entries = NULL;
+			parse_res[ddir].nr_entries = 0;
+			continue;
+		}
+
+		ret = init_cmdprio_values(&values[ddir],
+					  parse_res[ddir].nr_entries, ts);
+		if (ret)
+			goto err;
+
+		implicit_cmdprio = ioprio_value(options->class[ddir],
+						options->level[ddir]);
+
+		ret = fio_cmdprio_generate_bsprio_desc(&cmdprio->bsprio_desc[ddir],
+						       &parse_res[ddir],
+						       &values[ddir],
+						       implicit_cmdprio);
+		if (ret)
+			goto err;
+
+		free(parse_res[ddir].entries);
+		parse_res[ddir].entries = NULL;
+		parse_res[ddir].nr_entries = 0;
+
+		ret = init_ts_clat_prio(ts, ddir, &values[ddir]);
+		if (ret)
+			goto err;
+
+		free(values[ddir].prios);
+		values[ddir].prios = NULL;
+		values[ddir].nr_prios = 0;
+	}
+
 	return 0;
 
 err:
+	for (ddir = 0; ddir < CMDPRIO_RWDIR_CNT; ddir++) {
+		free(parse_res[ddir].entries);
+		free(values[ddir].prios);
+	}
+	free_clat_prio_stats(ts);
 	fio_cmdprio_cleanup(cmdprio);
 
 	return ret;
@@ -157,40 +441,46 @@ static int fio_cmdprio_parse_and_gen(struct thread_data *td,
 	struct cmdprio_options *options = cmdprio->options;
 	int i, ret;
 
+	/*
+	 * If cmdprio_percentage/cmdprio_bssplit is set and cmdprio_class
+	 * is not set, default to RT priority class.
+	 */
+	for (i = 0; i < CMDPRIO_RWDIR_CNT; i++) {
+		/*
+		 * A cmdprio value is only used when fio_cmdprio_percentage()
+		 * returns non-zero, so it is safe to set a class even for a
+		 * DDIR that will never use it.
+		 */
+		if (!options->class[i])
+			options->class[i] = IOPRIO_CLASS_RT;
+	}
+
 	switch (cmdprio->mode) {
 	case CMDPRIO_MODE_BSSPLIT:
 		ret = fio_cmdprio_parse_and_gen_bssplit(td, cmdprio);
 		break;
 	case CMDPRIO_MODE_PERC:
-		ret = 0;
+		ret = fio_cmdprio_gen_perc(td, cmdprio);
 		break;
 	default:
 		assert(0);
 		return 1;
 	}
 
-	/*
-	 * If cmdprio_percentage/cmdprio_bssplit is set and cmdprio_class
-	 * is not set, default to RT priority class.
-	 */
-	for (i = 0; i < CMDPRIO_RWDIR_CNT; i++) {
-		if (options->percentage[i] || cmdprio->bssplit_nr[i]) {
-			if (!options->class[i])
-				options->class[i] = IOPRIO_CLASS_RT;
-		}
-	}
-
 	return ret;
 }
 
 void fio_cmdprio_cleanup(struct cmdprio *cmdprio)
 {
-	int ddir;
+	enum fio_ddir ddir;
+	int i;
 
 	for (ddir = 0; ddir < CMDPRIO_RWDIR_CNT; ddir++) {
-		free(cmdprio->bssplit[ddir]);
-		cmdprio->bssplit[ddir] = NULL;
-		cmdprio->bssplit_nr[ddir] = 0;
+		for (i = 0; i < cmdprio->bsprio_desc[ddir].nr_bsprios; i++)
+			free(cmdprio->bsprio_desc[ddir].bsprios[i].prios);
+		free(cmdprio->bsprio_desc[ddir].bsprios);
+		cmdprio->bsprio_desc[ddir].bsprios = NULL;
+		cmdprio->bsprio_desc[ddir].nr_bsprios = 0;
 	}
 
 	/*
diff --git a/engines/cmdprio.h b/engines/cmdprio.h
index 0c7bd6cf..755da8d0 100644
--- a/engines/cmdprio.h
+++ b/engines/cmdprio.h
@@ -17,6 +17,24 @@ enum {
 	CMDPRIO_MODE_BSSPLIT,
 };
 
+struct cmdprio_prio {
+	int32_t prio;
+	uint32_t perc;
+	uint16_t clat_prio_index;
+};
+
+struct cmdprio_bsprio {
+	uint64_t bs;
+	uint32_t tot_perc;
+	unsigned int nr_prios;
+	struct cmdprio_prio *prios;
+};
+
+struct cmdprio_bsprio_desc {
+	struct cmdprio_bsprio *bsprios;
+	unsigned int nr_bsprios;
+};
+
 struct cmdprio_options {
 	unsigned int percentage[CMDPRIO_RWDIR_CNT];
 	unsigned int class[CMDPRIO_RWDIR_CNT];
@@ -26,8 +44,8 @@ struct cmdprio_options {
 
 struct cmdprio {
 	struct cmdprio_options *options;
-	unsigned int bssplit_nr[CMDPRIO_RWDIR_CNT];
-	struct bssplit *bssplit[CMDPRIO_RWDIR_CNT];
+	struct cmdprio_prio perc_entry[CMDPRIO_RWDIR_CNT];
+	struct cmdprio_bsprio_desc bsprio_desc[CMDPRIO_RWDIR_CNT];
 	unsigned int mode;
 };
 
diff --git a/engines/filecreate.c b/engines/filecreate.c
index 4bb13c34..7884752d 100644
--- a/engines/filecreate.c
+++ b/engines/filecreate.c
@@ -49,7 +49,7 @@ static int open_file(struct thread_data *td, struct fio_file *f)
 		uint64_t nsec;
 
 		nsec = ntime_since_now(&start);
-		add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, false);
+		add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, 0);
 	}
 
 	return 0;
diff --git a/engines/filedelete.c b/engines/filedelete.c
index e882ccf0..df388ac9 100644
--- a/