All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] zbd: support 'z' suffix for zone granularity
@ 2021-02-04  8:32 Alexey Dobriyan
  2021-02-05  0:53 ` Dmitry Fomichev
  0 siblings, 1 reply; 7+ messages in thread
From: Alexey Dobriyan @ 2021-02-04  8:32 UTC (permalink / raw)
  To: axboe; +Cc: fio, dmitry.fomichev, damien.lemoal

This is nifty for writing quick tests and when firmware guys change
zone sizes.

Converted options are

	size=
	io_size=
	offset=
	offset_increment=

Example:

	rw=write
	numjobs=2
	offset=1z
	offset_increment=10z
	size=5z
	io_size=6z

Thread 1 will write zones 1, 2, 3, 4, 5, 1.
Thread 2 will write zones 11, 12, 13, 14, 15, 11.
	
Signed-off-by: Alexey Dobriyan (SK hynix) <adobriyan@gmail.com>
---

 filesetup.c      |   40 +++++++++++++++++++++++++++++++++++-----
 fio.1            |   11 +++++++----
 options.c        |   30 ++++++++++++++++++++++--------
 parse.c          |   39 ++++++++++++++++++++++++++++++++++++++-
 parse.h          |   12 ++++++++++--
 server.h         |    2 +-
 thread_options.h |   13 +++++++++++--
 zbd.c            |    9 ++++++++-
 zbd.h            |    1 +
 9 files changed, 133 insertions(+), 24 deletions(-)

--- a/filesetup.c
+++ b/filesetup.c
@@ -1029,6 +1029,35 @@ int setup_files(struct thread_data *td)
 	if (o->read_iolog_file)
 		goto done;
 
+	if (td->o.zone_mode == ZONE_MODE_ZBD) {
+		struct fio_file *f;
+		int i;
+
+		err = zbd_init_files(td);
+		if (err)
+			goto err_out;
+
+		for_each_file(td, f, i) {
+			struct zoned_block_device_info *zbd = f->zbd_info;
+
+			if (!zbd)
+				continue;
+
+			if (td->o.size_nz > 0) {
+				td->o.size = td->o.size_nz * zbd->zone_size;
+			}
+			if (td->o.io_size_nz > 0) {
+				td->o.io_size = td->o.io_size_nz * zbd->zone_size;
+			}
+			if (td->o.start_offset_nz > 0) {
+				td->o.start_offset = td->o.start_offset_nz * zbd->zone_size;
+			}
+			if (td->o.offset_increment_nz > 0) {
+				td->o.offset_increment = td->o.offset_increment_nz * zbd->zone_size;
+			}
+		}
+	}
+
 	/*
 	 * check sizes. if the files/devices do not exist and the size
 	 * isn't passed to fio, abort.
@@ -1269,16 +1298,17 @@ int setup_files(struct thread_data *td)
 	}
 
 done:
-	if (o->create_only)
-		td->done = 1;
-
-	td_restore_runstate(td, old_state);
-
 	if (td->o.zone_mode == ZONE_MODE_ZBD) {
 		err = zbd_setup_files(td);
 		if (err)
 			goto err_out;
 	}
+
+	if (o->create_only)
+		td->done = 1;
+
+	td_restore_runstate(td, old_state);
+
 	return 0;
 
 err_offset:
--- a/fio.1
+++ b/fio.1
@@ -348,6 +348,9 @@ us or usec means microseconds
 .PD
 .RE
 .P
+`z' suffix specifies that the value is measured in zones.
+Absolute value will be calculated after figuring out block device's zone size.
+.P
 If the option accepts an upper and lower range, use a colon ':' or
 minus '\-' to separate such values. See \fIirange\fR parameter type.
 If the lower value specified happens to be larger than the upper value
@@ -1030,7 +1033,7 @@ The values are all relative to each other, and no absolute meaning
 should be associated with them.
 .RE
 .TP
-.BI offset \fR=\fPint
+.BI offset \fR=\fPint[%|z]
 Start I/O at the provided offset in the file, given as either a fixed size in
 bytes or a percentage. If a percentage is given, the generated offset will be
 aligned to the minimum \fBblocksize\fR or to the value of \fBoffset_align\fR if
@@ -1045,7 +1048,7 @@ If set to non-zero value, the byte offset generated by a percentage \fBoffset\fR
 is aligned upwards to this value. Defaults to 0 meaning that a percentage
 offset is aligned to the minimum block size.
 .TP
-.BI offset_increment \fR=\fPint
+.BI offset_increment \fR=\fPint[%|z]
 If this is provided, then the real offset becomes `\fBoffset\fR + \fBoffset_increment\fR
 * thread_number', where the thread number is a counter that starts at 0 and
 is incremented for each sub-job (i.e. when \fBnumjobs\fR option is
@@ -1567,7 +1570,7 @@ Pin the specified amount of memory with \fBmlock\fR\|(2). Can be used to
 simulate a smaller amount of memory. The amount specified is per worker.
 .SS "I/O size"
 .TP
-.BI size \fR=\fPint
+.BI size \fR=\fPint[%|z]
 The total size of file I/O for each thread of this job. Fio will run until
 this many bytes has been transferred, unless runtime is limited by other options
 (such as \fBruntime\fR, for instance, or increased/decreased by \fBio_size\fR).
@@ -1582,7 +1585,7 @@ given, fio will use 20% of the full size of the given files or devices.
 Can be combined with \fBoffset\fR to constrain the start and end range
 that I/O will be done within.
 .TP
-.BI io_size \fR=\fPint "\fR,\fB io_limit" \fR=\fPint
+.BI io_size \fR=\fPint[%|z] "\fR,\fB io_limit" \fR=\fPint[%|z]
 Normally fio operates within the region set by \fBsize\fR, which means
 that the \fBsize\fR option sets both the region and size of I/O to be
 performed. Sometimes that is not what you want. With this option, it is
--- a/options.c
+++ b/options.c
@@ -1471,8 +1471,13 @@ static int str_offset_cb(void *data, unsigned long long *__val)
 	if (parse_is_percent(v)) {
 		td->o.start_offset = 0;
 		td->o.start_offset_percent = -1ULL - v;
+		td->o.start_offset_nz = 0;
 		dprint(FD_PARSE, "SET start_offset_percent %d\n",
 					td->o.start_offset_percent);
+	} else if (parse_is_zone(v)) {
+		td->o.start_offset = 0;
+		td->o.start_offset_percent = 0;
+		td->o.start_offset_nz = v - ZONE_BASE_VAL;
 	} else
 		td->o.start_offset = v;
 
@@ -1487,8 +1492,13 @@ static int str_offset_increment_cb(void *data, unsigned long long *__val)
 	if (parse_is_percent(v)) {
 		td->o.offset_increment = 0;
 		td->o.offset_increment_percent = -1ULL - v;
+		td->o.offset_increment_nz = 0;
 		dprint(FD_PARSE, "SET offset_increment_percent %d\n",
 					td->o.offset_increment_percent);
+	} else if (parse_is_zone(v)) {
+		td->o.offset_increment = 0;
+		td->o.offset_increment_percent = 0;
+		td->o.offset_increment_nz = v - ZONE_BASE_VAL;
 	} else
 		td->o.offset_increment = v;
 
@@ -1505,6 +1515,10 @@ static int str_size_cb(void *data, unsigned long long *__val)
 		td->o.size_percent = -1ULL - v;
 		dprint(FD_PARSE, "SET size_percent %d\n",
 					td->o.size_percent);
+	} else if (parse_is_zone(v)) {
+		td->o.size = 0;
+		td->o.size_percent = 0;
+		td->o.size_nz = v - ZONE_BASE_VAL;
 	} else
 		td->o.size = v;
 
@@ -1525,6 +1539,10 @@ static int str_io_size_cb(void *data, unsigned long long *__val)
 		}
 		dprint(FD_PARSE, "SET io_size_percent %d\n",
 					td->o.io_size_percent);
+	} else if (parse_is_zone(v)) {
+		td->o.io_size = 0;
+		td->o.io_size_percent = 0;
+		td->o.io_size_nz = v - ZONE_BASE_VAL;
 	} else
 		td->o.io_size = v;
 
@@ -2081,11 +2099,10 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
 	{
 		.name	= "size",
 		.lname	= "Size",
-		.type	= FIO_OPT_STR_VAL,
+		.type	= FIO_OPT_STR_VAL_ZONE,
 		.cb	= str_size_cb,
 		.off1	= offsetof(struct thread_options, size),
 		.help	= "Total size of device or files",
-		.interval = 1024 * 1024,
 		.category = FIO_OPT_C_IO,
 		.group	= FIO_OPT_G_INVALID,
 	},
@@ -2093,11 +2110,10 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
 		.name	= "io_size",
 		.alias	= "io_limit",
 		.lname	= "IO Size",
-		.type	= FIO_OPT_STR_VAL,
+		.type	= FIO_OPT_STR_VAL_ZONE,
 		.cb	= str_io_size_cb,
 		.off1	= offsetof(struct thread_options, io_size),
 		.help	= "Total size of I/O to be performed",
-		.interval = 1024 * 1024,
 		.category = FIO_OPT_C_IO,
 		.group	= FIO_OPT_G_INVALID,
 	},
@@ -2138,12 +2154,11 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
 		.name	= "offset",
 		.lname	= "IO offset",
 		.alias	= "fileoffset",
-		.type	= FIO_OPT_STR_VAL,
+		.type	= FIO_OPT_STR_VAL_ZONE,
 		.cb	= str_offset_cb,
 		.off1	= offsetof(struct thread_options, start_offset),
 		.help	= "Start IO from this offset",
 		.def	= "0",
-		.interval = 1024 * 1024,
 		.category = FIO_OPT_C_IO,
 		.group	= FIO_OPT_G_INVALID,
 	},
@@ -2161,14 +2176,13 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
 	{
 		.name	= "offset_increment",
 		.lname	= "IO offset increment",
-		.type	= FIO_OPT_STR_VAL,
+		.type	= FIO_OPT_STR_VAL_ZONE,
 		.cb	= str_offset_increment_cb,
 		.off1	= offsetof(struct thread_options, offset_increment),
 		.help	= "What is the increment from one offset to the next",
 		.parent = "offset",
 		.hide	= 1,
 		.def	= "0",
-		.interval = 1024 * 1024,
 		.category = FIO_OPT_C_IO,
 		.group	= FIO_OPT_G_INVALID,
 	},
--- a/parse.c
+++ b/parse.c
@@ -37,6 +37,7 @@ static const char *opt_type_names[] = {
 	"OPT_BOOL",
 	"OPT_FLOAT_LIST",
 	"OPT_STR_SET",
+	"OPT_STR_VAL_ZONE",
 	"OPT_DEPRECATED",
 	"OPT_SOFT_DEPRECATED",
 	"OPT_UNSUPPORTED",
@@ -599,7 +600,10 @@ static int __handle_option(const struct fio_option *o, const char *ptr,
 		fallthrough;
 	case FIO_OPT_ULL:
 	case FIO_OPT_INT:
-	case FIO_OPT_STR_VAL: {
+	case FIO_OPT_STR_VAL:
+not_zone_granularity:;
+		{
+
 		fio_opt_str_val_fn *fn = o->cb;
 		char tmp[128], *p;
 
@@ -944,6 +948,39 @@ static int __handle_option(const struct fio_option *o, const char *ptr,
 		}
 		break;
 	}
+	case FIO_OPT_STR_VAL_ZONE: {
+		int (*f)(void*, unsigned long long *) = o->cb;
+		char *ep;
+		unsigned long long val;
+		size_t len = strlen(ptr);
+
+		if (len == 0 || ptr[len - 1] != 'z')
+			goto not_zone_granularity;
+
+		errno = 0;
+		val = strtoul(ptr, &ep, 10);
+		if (errno == 0 && ep != ptr) {
+			switch (*ep) {
+			case 'z':
+				val = ZONE_BASE_VAL + (uint32_t)val;
+				break;
+
+			default:
+				ret = 1;
+				break;
+			}
+			val_store(ullp, val, o->off1, 0, data, o);
+			ret = 0;
+		} else {
+			ret = 1;
+		}
+		if (ret) {
+			return ret;
+		}
+
+		ret = f(data, &val);
+		break;
+	}
 	case FIO_OPT_DEPRECATED:
 		ret = 1;
 		fallthrough;
--- a/parse.h
+++ b/parse.h
@@ -21,6 +21,7 @@ enum fio_opt_type {
 	FIO_OPT_BOOL,
 	FIO_OPT_FLOAT_LIST,
 	FIO_OPT_STR_SET,
+	FIO_OPT_STR_VAL_ZONE,
 	FIO_OPT_DEPRECATED,
 	FIO_OPT_SOFT_DEPRECATED,
 	FIO_OPT_UNSUPPORTED,	/* keep this last */
@@ -128,14 +129,21 @@ static inline void *td_var(void *to, const struct fio_option *o,
 	return (void *) ((uintptr_t) ret + offset);
 }
 
+#define ZONE_BASE_VAL ((-1ULL >> 1) + 1)
+
 static inline int parse_is_percent(unsigned long long val)
 {
-	return val <= -1ULL && val >= (-1ULL - 100ULL);
+	return val >= (-1ULL - 100ULL);
 }
 
 static inline int parse_is_percent_uncapped(unsigned long long val)
 {
-	return (long long)val <= -1;
+	return ZONE_BASE_VAL + -1U < val;
+}
+
+static inline int parse_is_zone(unsigned long long val)
+{
+	return (val - ZONE_BASE_VAL) <= -1U;
 }
 
 struct print_option {
--- a/server.h
+++ b/server.h
@@ -48,7 +48,7 @@ struct fio_net_cmd_reply {
 };
 
 enum {
-	FIO_SERVER_VER			= 87,
+	FIO_SERVER_VER			= 88,
 
 	FIO_SERVER_MAX_FRAGMENT_PDU	= 1024,
 	FIO_SERVER_MAX_CMD_MB		= 2048,
--- a/thread_options.h
+++ b/thread_options.h
@@ -83,13 +83,16 @@ struct thread_options {
 	unsigned long long size;
 	unsigned long long io_size;
 	unsigned int size_percent;
+	unsigned int size_nz;
 	unsigned int io_size_percent;
+	unsigned int io_size_nz;
 	unsigned int fill_device;
 	unsigned int file_append;
 	unsigned long long file_size_low;
 	unsigned long long file_size_high;
 	unsigned long long start_offset;
 	unsigned long long start_offset_align;
+	unsigned int start_offset_nz;
 
 	unsigned long long bs[DDIR_RWDIR_CNT];
 	unsigned long long ba[DDIR_RWDIR_CNT];
@@ -315,6 +318,7 @@ struct thread_options {
 	unsigned int gid;
 
 	unsigned int offset_increment_percent;
+	unsigned int offset_increment_nz;
 	unsigned long long offset_increment;
 	unsigned long long number_ios;
 
@@ -384,14 +388,19 @@ struct thread_options_pack {
 	uint64_t size;
 	uint64_t io_size;
 	uint32_t size_percent;
+	uint32_t size_nz;
 	uint32_t io_size_percent;
+	uint32_t io_size_nz;
 	uint32_t fill_device;
 	uint32_t file_append;
 	uint32_t unique_filename;
+	uint32_t _;
 	uint64_t file_size_low;
 	uint64_t file_size_high;
 	uint64_t start_offset;
 	uint64_t start_offset_align;
+	uint32_t start_offset_nz;
+	uint32_t __;
 
 	uint64_t bs[DDIR_RWDIR_CNT];
 	uint64_t ba[DDIR_RWDIR_CNT];
@@ -464,8 +473,6 @@ struct thread_options_pack {
 	struct zone_split zone_split[DDIR_RWDIR_CNT][ZONESPLIT_MAX];
 	uint32_t zone_split_nr[DDIR_RWDIR_CNT];
 
-	uint8_t pad1[4];
-
 	fio_fp64_t zipf_theta;
 	fio_fp64_t pareto_h;
 	fio_fp64_t gauss_dev;
@@ -616,12 +623,14 @@ struct thread_options_pack {
 	uint32_t gid;
 
 	uint32_t offset_increment_percent;
+	uint32_t offset_increment_nz;
 	uint64_t offset_increment;
 	uint64_t number_ios;
 
 	uint64_t latency_target;
 	uint64_t latency_window;
 	uint64_t max_latency;
+	uint32_t ___;
 	fio_fp64_t latency_percentile;
 	uint32_t latency_run;
 
--- a/zbd.c
+++ b/zbd.c
@@ -648,7 +648,7 @@ static bool zbd_open_zone(struct thread_data *td, const struct fio_file *f,
 static int zbd_reset_zone(struct thread_data *td, struct fio_file *f,
 			  struct fio_zone_info *z);
 
-int zbd_setup_files(struct thread_data *td)
+int zbd_init_files(struct thread_data *td)
 {
 	struct fio_file *f;
 	int i;
@@ -657,6 +657,13 @@ int zbd_setup_files(struct thread_data *td)
 		if (zbd_init_zone_info(td, f))
 			return 1;
 	}
+	return 0;
+}
+
+int zbd_setup_files(struct thread_data *td)
+{
+	struct fio_file *f;
+	int i;
 
 	if (!zbd_using_direct_io()) {
 		log_err("Using direct I/O is mandatory for writing to ZBD drives\n\n");
--- a/zbd.h
+++ b/zbd.h
@@ -87,6 +87,7 @@ struct zoned_block_device_info {
 	struct fio_zone_info	zone_info[0];
 };
 
+int zbd_init_files(struct thread_data *td);
 int zbd_setup_files(struct thread_data *td);
 void zbd_free_zone_info(struct fio_file *f);
 void zbd_file_reset(struct thread_data *td, struct fio_file *f);


^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH] zbd: support 'z' suffix for zone granularity
  2021-02-04  8:32 [PATCH] zbd: support 'z' suffix for zone granularity Alexey Dobriyan
@ 2021-02-05  0:53 ` Dmitry Fomichev
  2021-02-08 14:43   ` Alexey Dobriyan
                     ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Dmitry Fomichev @ 2021-02-05  0:53 UTC (permalink / raw)
  To: Alexey Dobriyan, axboe; +Cc: fio, Damien Le Moal

Alexey,

I like the general idea of this patch. Adding zone units looks useful,
especially for ZNS workloads.

One obvious part that is missing in the current version is the unit test.
Please add a simple test (#55?) to t/zbd/test-zbd-support script
exercising the new functionality. Something along the lines of the example
that you put into the commit message should be fine. Adding verify would
be really good.

Adding a couple more tests to run a similar config against non-zoned block
devices and strided zoned block devices would be ideal. The tests should be
checking that fio rejects values with 'z' suffices with non-zoned devices and
accepts them if strided zoned mode is specified, correct?

See more comments inline below...

> -----Original Message-----
> From: Alexey Dobriyan <adobriyan@gmail.com>
> Sent: Thursday, February 4, 2021 3:32 AM
> To: axboe@kernel.dk
> Cc: fio@vger.kernel.org; Dmitry Fomichev <Dmitry.Fomichev@wdc.com>;
> Damien Le Moal <Damien.LeMoal@wdc.com>
> Subject: [PATCH] zbd: support 'z' suffix for zone granularity
> 
> This is nifty for writing quick tests and when firmware guys change
> zone sizes.

Maybe put something more descriptive here? Like

Allow users to express i/o offsets and sizes in zone units when running
fio against zoned block devices (ZBDs).

> 
> Converted options are
> 
> 	size=
> 	io_size=
> 	offset=
> 	offset_increment=
> 
> Example:
> 
> 	rw=write
> 	numjobs=2
> 	offset=1z
> 	offset_increment=10z
> 	size=5z
> 	io_size=6z
> 
> Thread 1 will write zones 1, 2, 3, 4, 5, 1.
> Thread 2 will write zones 11, 12, 13, 14, 15, 11.
> 
> Signed-off-by: Alexey Dobriyan (SK hynix) <adobriyan@gmail.com>
> ---
> 
>  filesetup.c      |   40 +++++++++++++++++++++++++++++++++++-----
>  fio.1            |   11 +++++++----
>  options.c        |   30 ++++++++++++++++++++++--------
>  parse.c          |   39 ++++++++++++++++++++++++++++++++++++++-
>  parse.h          |   12 ++++++++++--
>  server.h         |    2 +-
>  thread_options.h |   13 +++++++++++--
>  zbd.c            |    9 ++++++++-
>  zbd.h            |    1 +
>  9 files changed, 133 insertions(+), 24 deletions(-)
> 
> --- a/filesetup.c
> +++ b/filesetup.c
> @@ -1029,6 +1029,35 @@ int setup_files(struct thread_data *td)
>  	if (o->read_iolog_file)
>  		goto done;
> 
> +	if (td->o.zone_mode == ZONE_MODE_ZBD) {

What about ZONE_MODE_STRIDED? Shouldn't 'z' suffix be enabled for it too?

> +		struct fio_file *f;
> +		int i;
> +
> +		err = zbd_init_files(td);
> +		if (err)
> +			goto err_out;
> +

zbd_init_files() is only called once. Why not fold the loop below into zbd_init_files()
since the code is ZBD-specific?

> +		for_each_file(td, f, i) {
> +			struct zoned_block_device_info *zbd = f->zbd_info;
> +
> +			if (!zbd)
> +				continue;
> +
> +			if (td->o.size_nz > 0) {
> +				td->o.size = td->o.size_nz * zbd->zone_size;
> +			}
> +			if (td->o.io_size_nz > 0) {
> +				td->o.io_size = td->o.io_size_nz * zbd-
> >zone_size;
> +			}
> +			if (td->o.start_offset_nz > 0) {
> +				td->o.start_offset = td->o.start_offset_nz *
> zbd->zone_size;
> +			}
> +			if (td->o.offset_increment_nz > 0) {
> +				td->o.offset_increment = td-
> >o.offset_increment_nz * zbd->zone_size;
> +			}
> +		}
> +	}
> +
>  	/*
>  	 * check sizes. if the files/devices do not exist and the size
>  	 * isn't passed to fio, abort.
> @@ -1269,16 +1298,17 @@ int setup_files(struct thread_data *td)
>  	}
> 
>  done:
> -	if (o->create_only)
> -		td->done = 1;
> -
> -	td_restore_runstate(td, old_state);
> -
>  	if (td->o.zone_mode == ZONE_MODE_ZBD) {
>  		err = zbd_setup_files(td);
>  		if (err)
>  			goto err_out;
>  	}
> +
> +	if (o->create_only)
> +		td->done = 1;
> +
> +	td_restore_runstate(td, old_state);
> +
>  	return 0;
> 
>  err_offset:
> --- a/fio.1
> +++ b/fio.1
> @@ -348,6 +348,9 @@ us or usec means microseconds
>  .PD
>  .RE
>  .P
> +`z' suffix specifies that the value is measured in zones.
> +Absolute value will be calculated after figuring out block device's zone size.

Maybe something like this?

The exact value is calculated automatically when the device zone size becomes known.

> +.P
>  If the option accepts an upper and lower range, use a colon ':' or
>  minus '\-' to separate such values. See \fIirange\fR parameter type.
>  If the lower value specified happens to be larger than the upper value
> @@ -1030,7 +1033,7 @@ The values are all relative to each other, and no
> absolute meaning
>  should be associated with them.
>  .RE
>  .TP
> -.BI offset \fR=\fPint
> +.BI offset \fR=\fPint[%|z]
>  Start I/O at the provided offset in the file, given as either a fixed size in
>  bytes or a percentage. If a percentage is given, the generated offset will be
>  aligned to the minimum \fBblocksize\fR or to the value of \fBoffset_align\fR
> if
> @@ -1045,7 +1048,7 @@ If set to non-zero value, the byte offset generated
> by a percentage \fBoffset\fR
>  is aligned upwards to this value. Defaults to 0 meaning that a percentage
>  offset is aligned to the minimum block size.
>  .TP
> -.BI offset_increment \fR=\fPint
> +.BI offset_increment \fR=\fPint[%|z]
>  If this is provided, then the real offset becomes `\fBoffset\fR +
> \fBoffset_increment\fR
>  * thread_number', where the thread number is a counter that starts at 0
> and
>  is incremented for each sub-job (i.e. when \fBnumjobs\fR option is
> @@ -1567,7 +1570,7 @@ Pin the specified amount of memory with
> \fBmlock\fR\|(2). Can be used to
>  simulate a smaller amount of memory. The amount specified is per worker.
>  .SS "I/O size"
>  .TP
> -.BI size \fR=\fPint
> +.BI size \fR=\fPint[%|z]
>  The total size of file I/O for each thread of this job. Fio will run until
>  this many bytes has been transferred, unless runtime is limited by other
> options
>  (such as \fBruntime\fR, for instance, or increased/decreased by
> \fBio_size\fR).
> @@ -1582,7 +1585,7 @@ given, fio will use 20% of the full size of the given
> files or devices.
>  Can be combined with \fBoffset\fR to constrain the start and end range
>  that I/O will be done within.
>  .TP
> -.BI io_size \fR=\fPint "\fR,\fB io_limit" \fR=\fPint
> +.BI io_size \fR=\fPint[%|z] "\fR,\fB io_limit" \fR=\fPint[%|z]
>  Normally fio operates within the region set by \fBsize\fR, which means
>  that the \fBsize\fR option sets both the region and size of I/O to be
>  performed. Sometimes that is not what you want. With this option, it is
> --- a/options.c
> +++ b/options.c
> @@ -1471,8 +1471,13 @@ static int str_offset_cb(void *data, unsigned long
> long *__val)
>  	if (parse_is_percent(v)) {
>  		td->o.start_offset = 0;
>  		td->o.start_offset_percent = -1ULL - v;
> +		td->o.start_offset_nz = 0;
>  		dprint(FD_PARSE, "SET start_offset_percent %d\n",
>  					td->o.start_offset_percent);
> +	} else if (parse_is_zone(v)) {
> +		td->o.start_offset = 0;
> +		td->o.start_offset_percent = 0;
> +		td->o.start_offset_nz = v - ZONE_BASE_VAL;
>  	} else
>  		td->o.start_offset = v;
> 
> @@ -1487,8 +1492,13 @@ static int str_offset_increment_cb(void *data,
> unsigned long long *__val)
>  	if (parse_is_percent(v)) {
>  		td->o.offset_increment = 0;
>  		td->o.offset_increment_percent = -1ULL - v;
> +		td->o.offset_increment_nz = 0;
>  		dprint(FD_PARSE, "SET offset_increment_percent %d\n",
>  					td->o.offset_increment_percent);
> +	} else if (parse_is_zone(v)) {
> +		td->o.offset_increment = 0;
> +		td->o.offset_increment_percent = 0;
> +		td->o.offset_increment_nz = v - ZONE_BASE_VAL;
>  	} else
>  		td->o.offset_increment = v;
> 
> @@ -1505,6 +1515,10 @@ static int str_size_cb(void *data, unsigned long
> long *__val)
>  		td->o.size_percent = -1ULL - v;
>  		dprint(FD_PARSE, "SET size_percent %d\n",
>  					td->o.size_percent);
> +	} else if (parse_is_zone(v)) {
> +		td->o.size = 0;
> +		td->o.size_percent = 0;
> +		td->o.size_nz = v - ZONE_BASE_VAL;
>  	} else
>  		td->o.size = v;
> 
> @@ -1525,6 +1539,10 @@ static int str_io_size_cb(void *data, unsigned long
> long *__val)
>  		}
>  		dprint(FD_PARSE, "SET io_size_percent %d\n",
>  					td->o.io_size_percent);
> +	} else if (parse_is_zone(v)) {
> +		td->o.io_size = 0;
> +		td->o.io_size_percent = 0;
> +		td->o.io_size_nz = v - ZONE_BASE_VAL;
>  	} else
>  		td->o.io_size = v;
> 
> @@ -2081,11 +2099,10 @@ struct fio_option fio_options[FIO_MAX_OPTS] =
> {
>  	{
>  		.name	= "size",
>  		.lname	= "Size",
> -		.type	= FIO_OPT_STR_VAL,
> +		.type	= FIO_OPT_STR_VAL_ZONE,
>  		.cb	= str_size_cb,
>  		.off1	= offsetof(struct thread_options, size),
>  		.help	= "Total size of device or files",
> -		.interval = 1024 * 1024,
>  		.category = FIO_OPT_C_IO,
>  		.group	= FIO_OPT_G_INVALID,
>  	},
> @@ -2093,11 +2110,10 @@ struct fio_option fio_options[FIO_MAX_OPTS] =
> {
>  		.name	= "io_size",
>  		.alias	= "io_limit",
>  		.lname	= "IO Size",
> -		.type	= FIO_OPT_STR_VAL,
> +		.type	= FIO_OPT_STR_VAL_ZONE,
>  		.cb	= str_io_size_cb,
>  		.off1	= offsetof(struct thread_options, io_size),
>  		.help	= "Total size of I/O to be performed",
> -		.interval = 1024 * 1024,
>  		.category = FIO_OPT_C_IO,
>  		.group	= FIO_OPT_G_INVALID,
>  	},
> @@ -2138,12 +2154,11 @@ struct fio_option fio_options[FIO_MAX_OPTS] =
> {
>  		.name	= "offset",
>  		.lname	= "IO offset",
>  		.alias	= "fileoffset",
> -		.type	= FIO_OPT_STR_VAL,
> +		.type	= FIO_OPT_STR_VAL_ZONE,
>  		.cb	= str_offset_cb,
>  		.off1	= offsetof(struct thread_options, start_offset),
>  		.help	= "Start IO from this offset",
>  		.def	= "0",
> -		.interval = 1024 * 1024,
>  		.category = FIO_OPT_C_IO,
>  		.group	= FIO_OPT_G_INVALID,
>  	},
> @@ -2161,14 +2176,13 @@ struct fio_option fio_options[FIO_MAX_OPTS] =
> {
>  	{
>  		.name	= "offset_increment",
>  		.lname	= "IO offset increment",
> -		.type	= FIO_OPT_STR_VAL,
> +		.type	= FIO_OPT_STR_VAL_ZONE,
>  		.cb	= str_offset_increment_cb,
>  		.off1	= offsetof(struct thread_options, offset_increment),
>  		.help	= "What is the increment from one offset to the
> next",
>  		.parent = "offset",
>  		.hide	= 1,
>  		.def	= "0",
> -		.interval = 1024 * 1024,
>  		.category = FIO_OPT_C_IO,
>  		.group	= FIO_OPT_G_INVALID,
>  	},
> --- a/parse.c
> +++ b/parse.c
> @@ -37,6 +37,7 @@ static const char *opt_type_names[] = {
>  	"OPT_BOOL",
>  	"OPT_FLOAT_LIST",
>  	"OPT_STR_SET",
> +	"OPT_STR_VAL_ZONE",
>  	"OPT_DEPRECATED",
>  	"OPT_SOFT_DEPRECATED",
>  	"OPT_UNSUPPORTED",
> @@ -599,7 +600,10 @@ static int __handle_option(const struct fio_option
> *o, const char *ptr,
>  		fallthrough;
>  	case FIO_OPT_ULL:
>  	case FIO_OPT_INT:
> -	case FIO_OPT_STR_VAL: {
> +	case FIO_OPT_STR_VAL:
> +not_zone_granularity:;
> +		{
> +
>  		fio_opt_str_val_fn *fn = o->cb;
>  		char tmp[128], *p;
> 
> @@ -944,6 +948,39 @@ static int __handle_option(const struct fio_option
> *o, const char *ptr,
>  		}
>  		break;
>  	}
> +	case FIO_OPT_STR_VAL_ZONE: {
> +		int (*f)(void*, unsigned long long *) = o->cb;
> +		char *ep;
> +		unsigned long long val;
> +		size_t len = strlen(ptr);
> +
> +		if (len == 0 || ptr[len - 1] != 'z')
> +			goto not_zone_granularity;

The goto above looks pretty bad - the jump is quite far and backwards.
How about arranging the new code a little bit differently in this
function? Something like

	case FIO_OPT_INT:
+ 	if (len > 0 && ptr[len - 1] == 'z') {
+		log_err(<zoned units are not allowed>);
+		return 1;
+	}
+	fallthrough;
+ 	case FIO_OPT_STR_VAL_ZONE:
+ 	if (len > 0 && ptr[len - 1] == 'z') {
+ 		<process the new STR_VAL_ZONE>
+		break;
+	}
+	fallthrough;
+ 	case FIO_OPT_STR_VAL:
+ 		<process the regular STR_VAL>

> +
> +		errno = 0;
> +		val = strtoul(ptr, &ep, 10);
> +		if (errno == 0 && ep != ptr) {
> +			switch (*ep) {
> +			case 'z':
> +				val = ZONE_BASE_VAL + (uint32_t)val;
> +				break;
> +
> +			default:
> +				ret = 1;
> +				break;
> +			}
> +			val_store(ullp, val, o->off1, 0, data, o);
> +			ret = 0;
> +		} else {
> +			ret = 1;
> +		}
> +		if (ret) {
> +			return ret;
> +		}
> +
> +		ret = f(data, &val);
> +		break;
> +	}
>  	case FIO_OPT_DEPRECATED:
>  		ret = 1;
>  		fallthrough;
> --- a/parse.h
> +++ b/parse.h
> @@ -21,6 +21,7 @@ enum fio_opt_type {
>  	FIO_OPT_BOOL,
>  	FIO_OPT_FLOAT_LIST,
>  	FIO_OPT_STR_SET,
> +	FIO_OPT_STR_VAL_ZONE,
>  	FIO_OPT_DEPRECATED,
>  	FIO_OPT_SOFT_DEPRECATED,
>  	FIO_OPT_UNSUPPORTED,	/* keep this last */
> @@ -128,14 +129,21 @@ static inline void *td_var(void *to, const struct
> fio_option *o,
>  	return (void *) ((uintptr_t) ret + offset);
>  }
> 
> +#define ZONE_BASE_VAL ((-1ULL >> 1) + 1)
> +
>  static inline int parse_is_percent(unsigned long long val)
>  {
> -	return val <= -1ULL && val >= (-1ULL - 100ULL);
> +	return val >= (-1ULL - 100ULL);
>  }
> 
>  static inline int parse_is_percent_uncapped(unsigned long long val)
>  {
> -	return (long long)val <= -1;
> +	return ZONE_BASE_VAL + -1U < val;
> +}
> +
> +static inline int parse_is_zone(unsigned long long val)
> +{
> +	return (val - ZONE_BASE_VAL) <= -1U;
>  }
> 
>  struct print_option {
> --- a/server.h
> +++ b/server.h
> @@ -48,7 +48,7 @@ struct fio_net_cmd_reply {
>  };
> 
>  enum {
> -	FIO_SERVER_VER			= 87,
> +	FIO_SERVER_VER			= 88,
> 
>  	FIO_SERVER_MAX_FRAGMENT_PDU	= 1024,
>  	FIO_SERVER_MAX_CMD_MB		= 2048,
> --- a/thread_options.h
> +++ b/thread_options.h
> @@ -83,13 +83,16 @@ struct thread_options {
>  	unsigned long long size;
>  	unsigned long long io_size;
>  	unsigned int size_percent;
> +	unsigned int size_nz;
>  	unsigned int io_size_percent;
> +	unsigned int io_size_nz;
>  	unsigned int fill_device;
>  	unsigned int file_append;
>  	unsigned long long file_size_low;
>  	unsigned long long file_size_high;
>  	unsigned long long start_offset;
>  	unsigned long long start_offset_align;
> +	unsigned int start_offset_nz;
> 
>  	unsigned long long bs[DDIR_RWDIR_CNT];
>  	unsigned long long ba[DDIR_RWDIR_CNT];
> @@ -315,6 +318,7 @@ struct thread_options {
>  	unsigned int gid;
> 
>  	unsigned int offset_increment_percent;
> +	unsigned int offset_increment_nz;
>  	unsigned long long offset_increment;
>  	unsigned long long number_ios;
> 
> @@ -384,14 +388,19 @@ struct thread_options_pack {
>  	uint64_t size;
>  	uint64_t io_size;
>  	uint32_t size_percent;
> +	uint32_t size_nz;
>  	uint32_t io_size_percent;
> +	uint32_t io_size_nz;
>  	uint32_t fill_device;
>  	uint32_t file_append;
>  	uint32_t unique_filename;
> +	uint32_t _;

It seems that the addition of the padding above is unrelated
to this patch. This could be made a separate patch though
along with some explanation. Also, I think it is best to follow
the kernel style for naming of multiple pad members in
a struct -
uint32_t pad1;
....
uint32_t pad2;
, etc.

>  	uint64_t file_size_low;
>  	uint64_t file_size_high;
>  	uint64_t start_offset;
>  	uint64_t start_offset_align;
> +	uint32_t start_offset_nz;
> +	uint32_t __;

uint32_t pad2;

> 
>  	uint64_t bs[DDIR_RWDIR_CNT];
>  	uint64_t ba[DDIR_RWDIR_CNT];
> @@ -464,8 +473,6 @@ struct thread_options_pack {
>  	struct zone_split zone_split[DDIR_RWDIR_CNT][ZONESPLIT_MAX];
>  	uint32_t zone_split_nr[DDIR_RWDIR_CNT];
> 
> -	uint8_t pad1[4];

This change doesn't look related to the patch either.

> -
>  	fio_fp64_t zipf_theta;
>  	fio_fp64_t pareto_h;
>  	fio_fp64_t gauss_dev;
> @@ -616,12 +623,14 @@ struct thread_options_pack {
>  	uint32_t gid;
> 
>  	uint32_t offset_increment_percent;
> +	uint32_t offset_increment_nz;
>  	uint64_t offset_increment;
>  	uint64_t number_ios;
> 
>  	uint64_t latency_target;
>  	uint64_t latency_window;
>  	uint64_t max_latency;
> +	uint32_t ___;

Why is this needed?

Best regards,
Dmitry

>  	fio_fp64_t latency_percentile;
>  	uint32_t latency_run;
> 
> --- a/zbd.c
> +++ b/zbd.c
> @@ -648,7 +648,7 @@ static bool zbd_open_zone(struct thread_data *td,
> const struct fio_file *f,
>  static int zbd_reset_zone(struct thread_data *td, struct fio_file *f,
>  			  struct fio_zone_info *z);
> 
> -int zbd_setup_files(struct thread_data *td)
> +int zbd_init_files(struct thread_data *td)
>  {
>  	struct fio_file *f;
>  	int i;
> @@ -657,6 +657,13 @@ int zbd_setup_files(struct thread_data *td)
>  		if (zbd_init_zone_info(td, f))
>  			return 1;
>  	}
> +	return 0;
> +}
> +
> +int zbd_setup_files(struct thread_data *td)
> +{
> +	struct fio_file *f;
> +	int i;
> 
>  	if (!zbd_using_direct_io()) {
>  		log_err("Using direct I/O is mandatory for writing to ZBD
> drives\n\n");
> --- a/zbd.h
> +++ b/zbd.h
> @@ -87,6 +87,7 @@ struct zoned_block_device_info {
>  	struct fio_zone_info	zone_info[0];
>  };
> 
> +int zbd_init_files(struct thread_data *td);
>  int zbd_setup_files(struct thread_data *td);
>  void zbd_free_zone_info(struct fio_file *f);
>  void zbd_file_reset(struct thread_data *td, struct fio_file *f);

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] zbd: support 'z' suffix for zone granularity
  2021-02-05  0:53 ` Dmitry Fomichev
@ 2021-02-08 14:43   ` Alexey Dobriyan
  2021-02-08 15:53     ` Alexey Dobriyan
  2021-02-08 16:37   ` Alexey Dobriyan
  2021-02-08 17:54   ` [PATCH v2] " Alexey Dobriyan
  2 siblings, 1 reply; 7+ messages in thread
From: Alexey Dobriyan @ 2021-02-08 14:43 UTC (permalink / raw)
  To: Dmitry Fomichev; +Cc: axboe, fio, Damien Le Moal

On Fri, Feb 05, 2021 at 12:53:32AM +0000, Dmitry Fomichev wrote:
> I like the general idea of this patch. Adding zone units looks useful,
> especially for ZNS workloads.

OK -- tests, wording.

> > --- a/filesetup.c
> > +++ b/filesetup.c
> > @@ -1029,6 +1029,35 @@ int setup_files(struct thread_data *td)
> >  	if (o->read_iolog_file)
> >  		goto done;
> > 
> > +	if (td->o.zone_mode == ZONE_MODE_ZBD) {
> 
> What about ZONE_MODE_STRIDED? Shouldn't 'z' suffix be enabled for it too?

I converted zoneskip= as well, but the code asserts somewhere else.
I didn't figure why exactly. zoneskip= and strided mode can be enabled
separatedly.

> > +		struct fio_file *f;
> > +		int i;
> > +
> > +		err = zbd_init_files(td);
> > +		if (err)
> > +			goto err_out;
> > +
> 
> zbd_init_files() is only called once. Why not fold the loop below into zbd_init_files()
> since the code is ZBD-specific?

OK.

> > +		for_each_file(td, f, i) {
> > +			struct zoned_block_device_info *zbd = f->zbd_info;
> > +
> > +			if (!zbd)
> > +				continue;
> > +
> > +			if (td->o.size_nz > 0) {
> > +				td->o.size = td->o.size_nz * zbd->zone_size;
> > +			}
> > +			if (td->o.io_size_nz > 0) {
> > +				td->o.io_size = td->o.io_size_nz * zbd-
> > >zone_size;
> > +			}
> > +			if (td->o.start_offset_nz > 0) {
> > +				td->o.start_offset = td->o.start_offset_nz *
> > zbd->zone_size;
> > +			}
> > +			if (td->o.offset_increment_nz > 0) {
> > +				td->o.offset_increment = td-
> > >o.offset_increment_nz * zbd->zone_size;
> > +			}
> > +		}
> > +	}

> > @@ -599,7 +600,10 @@ static int __handle_option(const struct fio_option
> > *o, const char *ptr,
> >  		fallthrough;
> >  	case FIO_OPT_ULL:
> >  	case FIO_OPT_INT:
> > -	case FIO_OPT_STR_VAL: {
> > +	case FIO_OPT_STR_VAL:
> > +not_zone_granularity:;
> > +		{
> > +
> >  		fio_opt_str_val_fn *fn = o->cb;
> >  		char tmp[128], *p;
> > 
> > @@ -944,6 +948,39 @@ static int __handle_option(const struct fio_option
> > *o, const char *ptr,
> >  		}
> >  		break;
> >  	}
> > +	case FIO_OPT_STR_VAL_ZONE: {
> > +		int (*f)(void*, unsigned long long *) = o->cb;
> > +		char *ep;
> > +		unsigned long long val;
> > +		size_t len = strlen(ptr);
> > +
> > +		if (len == 0 || ptr[len - 1] != 'z')
> > +			goto not_zone_granularity;
> 
> The goto above looks pretty bad - the jump is quite far and backwards.
> How about arranging the new code a little bit differently in this
> function? Something like
> 
> 	case FIO_OPT_INT:
> + 	if (len > 0 && ptr[len - 1] == 'z') {
> +		log_err(<zoned units are not allowed>);
> +		return 1;
> +	}
> +	fallthrough;
> + 	case FIO_OPT_STR_VAL_ZONE:
> + 	if (len > 0 && ptr[len - 1] == 'z') {
> + 		<process the new STR_VAL_ZONE>
> +		break;
> +	}
> +	fallthrough;
> + 	case FIO_OPT_STR_VAL:
> + 		<process the regular STR_VAL>
> 
> > +
> > +		errno = 0;
> > +		val = strtoul(ptr, &ep, 10);
> > +		if (errno == 0 && ep != ptr) {
> > +			switch (*ep) {
> > +			case 'z':
> > +				val = ZONE_BASE_VAL + (uint32_t)val;
> > +				break;
> > +
> > +			default:
> > +				ret = 1;
> > +				break;
> > +			}
> > +			val_store(ullp, val, o->off1, 0, data, o);
> > +			ret = 0;
> > +		} else {
> > +			ret = 1;
> > +		}
> > +		if (ret) {
> > +			return ret;
> > +		}
> > +
> > +		ret = f(data, &val);
> > +		break;
> > +	}
> >  	case FIO_OPT_DEPRECATED:
> >  		ret = 1;
> >  		fallthrough;

> > --- a/thread_options.h
> > +++ b/thread_options.h
> > @@ -83,13 +83,16 @@ struct thread_options {
> >  	unsigned long long size;
> >  	unsigned long long io_size;
> >  	unsigned int size_percent;
> > +	unsigned int size_nz;
> >  	unsigned int io_size_percent;
> > +	unsigned int io_size_nz;
> >  	unsigned int fill_device;
> >  	unsigned int file_append;
> >  	unsigned long long file_size_low;
> >  	unsigned long long file_size_high;
> >  	unsigned long long start_offset;
> >  	unsigned long long start_offset_align;
> > +	unsigned int start_offset_nz;
> > 
> >  	unsigned long long bs[DDIR_RWDIR_CNT];
> >  	unsigned long long ba[DDIR_RWDIR_CNT];
> > @@ -315,6 +318,7 @@ struct thread_options {
> >  	unsigned int gid;
> > 
> >  	unsigned int offset_increment_percent;
> > +	unsigned int offset_increment_nz;
> >  	unsigned long long offset_increment;
> >  	unsigned long long number_ios;
> > 
> > @@ -384,14 +388,19 @@ struct thread_options_pack {
> >  	uint64_t size;
> >  	uint64_t io_size;
> >  	uint32_t size_percent;
> > +	uint32_t size_nz;
> >  	uint32_t io_size_percent;
> > +	uint32_t io_size_nz;
> >  	uint32_t fill_device;
> >  	uint32_t file_append;
> >  	uint32_t unique_filename;
> > +	uint32_t _;
> 
> It seems that the addition of the padding above is unrelated
> to this patch. This could be made a separate patch though
> along with some explanation. Also, I think it is best to follow
> the kernel style for naming of multiple pad members in
> a struct -
> uint32_t pad1;
> ....
> uint32_t pad2;
> , etc.
> 
> >  	uint64_t file_size_low;
> >  	uint64_t file_size_high;
> >  	uint64_t start_offset;
> >  	uint64_t start_offset_align;
> > +	uint32_t start_offset_nz;
> > +	uint32_t __;
> 
> uint32_t pad2;
> 
> > 
> >  	uint64_t bs[DDIR_RWDIR_CNT];
> >  	uint64_t ba[DDIR_RWDIR_CNT];
> > @@ -464,8 +473,6 @@ struct thread_options_pack {
> >  	struct zone_split zone_split[DDIR_RWDIR_CNT][ZONESPLIT_MAX];
> >  	uint32_t zone_split_nr[DDIR_RWDIR_CNT];
> > 
> > -	uint8_t pad1[4];
> 
> This change doesn't look related to the patch either.
> 
> > -
> >  	fio_fp64_t zipf_theta;
> >  	fio_fp64_t pareto_h;
> >  	fio_fp64_t gauss_dev;
> > @@ -616,12 +623,14 @@ struct thread_options_pack {
> >  	uint32_t gid;
> > 
> >  	uint32_t offset_increment_percent;
> > +	uint32_t offset_increment_nz;
> >  	uint64_t offset_increment;
> >  	uint64_t number_ios;
> > 
> >  	uint64_t latency_target;
> >  	uint64_t latency_window;
> >  	uint64_t max_latency;
> > +	uint32_t ___;
> 
> Why is this needed?

There are compile failure minefield in libfio.c.
Padding needs to be redone any time new options are added.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] zbd: support 'z' suffix for zone granularity
  2021-02-08 14:43   ` Alexey Dobriyan
@ 2021-02-08 15:53     ` Alexey Dobriyan
  0 siblings, 0 replies; 7+ messages in thread
From: Alexey Dobriyan @ 2021-02-08 15:53 UTC (permalink / raw)
  To: Dmitry Fomichev; +Cc: axboe, fio, Damien Le Moal

On Mon, Feb 08, 2021 at 05:43:20PM +0300, Alexey Dobriyan wrote:
> On Fri, Feb 05, 2021 at 12:53:32AM +0000, Dmitry Fomichev wrote:
> > I like the general idea of this patch. Adding zone units looks useful,
> > especially for ZNS workloads.
> 
> OK -- tests, wording.
> 
> > > --- a/filesetup.c
> > > +++ b/filesetup.c
> > > @@ -1029,6 +1029,35 @@ int setup_files(struct thread_data *td)
> > >  	if (o->read_iolog_file)
> > >  		goto done;
> > > 
> > > +	if (td->o.zone_mode == ZONE_MODE_ZBD) {
> > 
> > What about ZONE_MODE_STRIDED? Shouldn't 'z' suffix be enabled for it too?
> 
> I converted zoneskip= as well, but the code asserts somewhere else.
> I didn't figure why exactly. zoneskip= and strided mode can be enabled
> separatedly.
> 
> > > +		struct fio_file *f;
> > > +		int i;
> > > +
> > > +		err = zbd_init_files(td);
> > > +		if (err)
> > > +			goto err_out;
> > > +
> > 
> > zbd_init_files() is only called once. Why not fold the loop below into zbd_init_files()
> > since the code is ZBD-specific?
> 
> OK.

I take that back!

The reason for a split is that size= and offset= and io_size= must be
available before those checks:

	/*
	 * now file sizes are known, so we can set ->io_size. if size= is
	 * not given, ->io_size is just equal to ->real_file_size. if size
	 * is given, ->io_size is size / nr_files.
	 */

so the init is done in 3 parts:

	optionally create/alloc f->zbd_info

	recalculate if size=/io_size=/offset=/... is supplied in zones

	...

	calculate ->min_zone/->max_zone, etc

but size recalculation is necessary for zonemode=strided as well.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] zbd: support 'z' suffix for zone granularity
  2021-02-05  0:53 ` Dmitry Fomichev
  2021-02-08 14:43   ` Alexey Dobriyan
@ 2021-02-08 16:37   ` Alexey Dobriyan
  2021-02-08 17:54   ` [PATCH v2] " Alexey Dobriyan
  2 siblings, 0 replies; 7+ messages in thread
From: Alexey Dobriyan @ 2021-02-08 16:37 UTC (permalink / raw)
  To: Dmitry Fomichev; +Cc: axboe, fio, Damien Le Moal

On Fri, Feb 05, 2021 at 12:53:32AM +0000, Dmitry Fomichev wrote:
> > @@ -599,7 +600,10 @@ static int __handle_option(const struct fio_option
> > *o, const char *ptr,
> >  		fallthrough;
> >  	case FIO_OPT_ULL:
> >  	case FIO_OPT_INT:
> > -	case FIO_OPT_STR_VAL: {
> > +	case FIO_OPT_STR_VAL:
> > +not_zone_granularity:;
> > +		{
> > +
> >  		fio_opt_str_val_fn *fn = o->cb;
> >  		char tmp[128], *p;
> > 
> > @@ -944,6 +948,39 @@ static int __handle_option(const struct fio_option
> > *o, const char *ptr,
> >  		}
> >  		break;
> >  	}
> > +	case FIO_OPT_STR_VAL_ZONE: {
> > +		int (*f)(void*, unsigned long long *) = o->cb;
> > +		char *ep;
> > +		unsigned long long val;
> > +		size_t len = strlen(ptr);
> > +
> > +		if (len == 0 || ptr[len - 1] != 'z')
> > +			goto not_zone_granularity;
> 
> The goto above looks pretty bad - the jump is quite far and backwards.
> How about arranging the new code a little bit differently in this
> function? Something like
> 
> 	case FIO_OPT_INT:
> + 	if (len > 0 && ptr[len - 1] == 'z') {
> +		log_err(<zoned units are not allowed>);
> +		return 1;
> +	}
> +	fallthrough;
> + 	case FIO_OPT_STR_VAL_ZONE:
> + 	if (len > 0 && ptr[len - 1] == 'z') {
> + 		<process the new STR_VAL_ZONE>
> +		break;
> +	}
> +	fallthrough;
> + 	case FIO_OPT_STR_VAL:
> + 		<process the regular STR_VAL>

This will make FIO_OPT_STR_VAL_TIME options to acquire support
for 'z' suffix. goto has to be somewhere.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v2] zbd: support 'z' suffix for zone granularity
  2021-02-05  0:53 ` Dmitry Fomichev
  2021-02-08 14:43   ` Alexey Dobriyan
  2021-02-08 16:37   ` Alexey Dobriyan
@ 2021-02-08 17:54   ` Alexey Dobriyan
  2021-02-15  3:40     ` Dmitry Fomichev
  2 siblings, 1 reply; 7+ messages in thread
From: Alexey Dobriyan @ 2021-02-08 17:54 UTC (permalink / raw)
  To: Dmitry Fomichev; +Cc: axboe, fio, Damien Le Moal

Allow users to pass some options with zone granularity which is natural
for ZBD workloads.

This is nifty for writing quick tests and when firmware guys change
zone sizes.

Converted options are

	offset=
	offset_increment=
	size=
	io_size=
	zoneskip=

Example:

	rw=write
	numjobs=2
	offset=1z
	offset_increment=10z
	size=5z
	io_size=6z

Thread 1 will write zones 1, 2, 3, 4, 5, 1.
Thread 2 will write zones 11, 12, 13, 14, 15, 11.

Note #1:
	zonemode=strided doesn't create ZBD zone structure but requires
	value recalculation. This is why 2 functions are split.

Note #2:
	there is a bug with zonemode=strided + size= + zoneskip=
	writing more I/O than necessary.
	'z' suffix doesn't fix nor adds this bug.

Signed-off-by: Alexey Dobriyan (SK hynix) <adobriyan@gmail.com>
---

	fix zoneskip=
	write 2 tests

	note: test only test option parsing,
	I/O pattern testing is long standing issue.

	patterns verified with strace(1)

 filesetup.c            |   18 +++++++++++++-----
 fio.1                  |   13 ++++++++-----
 options.c              |   48 ++++++++++++++++++++++++++++++++++++++----------
 parse.c                |   39 ++++++++++++++++++++++++++++++++++++++-
 parse.h                |    9 ++++++++-
 server.h               |    2 +-
 t/zbd/test-zbd-support |   42 ++++++++++++++++++++++++++++++++++++++++++
 thread_options.h       |   17 +++++++++++++----
 zbd.c                  |   40 +++++++++++++++++++++++++++++++++++++++-
 zbd.h                  |    2 ++
 10 files changed, 202 insertions(+), 28 deletions(-)

--- a/filesetup.c
+++ b/filesetup.c
@@ -1029,6 +1029,13 @@ int setup_files(struct thread_data *td)
 	if (o->read_iolog_file)
 		goto done;
 
+	if (td->o.zone_mode == ZONE_MODE_ZBD) {
+		err = zbd_init_files(td);
+		if (err)
+			goto err_out;
+	}
+	zbd_recalc_options_with_zone_granularity(td);
+
 	/*
 	 * check sizes. if the files/devices do not exist and the size
 	 * isn't passed to fio, abort.
@@ -1269,16 +1276,17 @@ int setup_files(struct thread_data *td)
 	}
 
 done:
-	if (o->create_only)
-		td->done = 1;
-
-	td_restore_runstate(td, old_state);
-
 	if (td->o.zone_mode == ZONE_MODE_ZBD) {
 		err = zbd_setup_files(td);
 		if (err)
 			goto err_out;
 	}
+
+	if (o->create_only)
+		td->done = 1;
+
+	td_restore_runstate(td, old_state);
+
 	return 0;
 
 err_offset:
--- a/fio.1
+++ b/fio.1
@@ -348,6 +348,9 @@ us or usec means microseconds
 .PD
 .RE
 .P
+`z' suffix specifies that the value is measured in zones.
+Value is recalculated once block device's zone size becomes known.
+.P
 If the option accepts an upper and lower range, use a colon ':' or
 minus '\-' to separate such values. See \fIirange\fR parameter type.
 If the lower value specified happens to be larger than the upper value
@@ -780,7 +783,7 @@ If not specified it defaults to the zone size. If the target device is a zoned
 block device, the zone capacity is obtained from the device information and this
 option is ignored.
 .TP
-.BI zoneskip \fR=\fPint
+.BI zoneskip \fR=\fPint[z]
 For \fBzonemode\fR=strided, the number of bytes to skip after \fBzonesize\fR
 bytes of data have been transferred.
 
@@ -1030,7 +1033,7 @@ The values are all relative to each other, and no absolute meaning
 should be associated with them.
 .RE
 .TP
-.BI offset \fR=\fPint
+.BI offset \fR=\fPint[%|z]
 Start I/O at the provided offset in the file, given as either a fixed size in
 bytes or a percentage. If a percentage is given, the generated offset will be
 aligned to the minimum \fBblocksize\fR or to the value of \fBoffset_align\fR if
@@ -1045,7 +1048,7 @@ If set to non-zero value, the byte offset generated by a percentage \fBoffset\fR
 is aligned upwards to this value. Defaults to 0 meaning that a percentage
 offset is aligned to the minimum block size.
 .TP
-.BI offset_increment \fR=\fPint
+.BI offset_increment \fR=\fPint[%|z]
 If this is provided, then the real offset becomes `\fBoffset\fR + \fBoffset_increment\fR
 * thread_number', where the thread number is a counter that starts at 0 and
 is incremented for each sub-job (i.e. when \fBnumjobs\fR option is
@@ -1567,7 +1570,7 @@ Pin the specified amount of memory with \fBmlock\fR\|(2). Can be used to
 simulate a smaller amount of memory. The amount specified is per worker.
 .SS "I/O size"
 .TP
-.BI size \fR=\fPint
+.BI size \fR=\fPint[%|z]
 The total size of file I/O for each thread of this job. Fio will run until
 this many bytes has been transferred, unless runtime is limited by other options
 (such as \fBruntime\fR, for instance, or increased/decreased by \fBio_size\fR).
@@ -1582,7 +1585,7 @@ given, fio will use 20% of the full size of the given files or devices.
 Can be combined with \fBoffset\fR to constrain the start and end range
 that I/O will be done within.
 .TP
-.BI io_size \fR=\fPint "\fR,\fB io_limit" \fR=\fPint
+.BI io_size \fR=\fPint[%|z] "\fR,\fB io_limit" \fR=\fPint[%|z]
 Normally fio operates within the region set by \fBsize\fR, which means
 that the \fBsize\fR option sets both the region and size of I/O to be
 performed. Sometimes that is not what you want. With this option, it is
--- a/options.c
+++ b/options.c
@@ -1471,8 +1471,13 @@ static int str_offset_cb(void *data, unsigned long long *__val)
 	if (parse_is_percent(v)) {
 		td->o.start_offset = 0;
 		td->o.start_offset_percent = -1ULL - v;
+		td->o.start_offset_nz = 0;
 		dprint(FD_PARSE, "SET start_offset_percent %d\n",
 					td->o.start_offset_percent);
+	} else if (parse_is_zone(v)) {
+		td->o.start_offset = 0;
+		td->o.start_offset_percent = 0;
+		td->o.start_offset_nz = v - ZONE_BASE_VAL;
 	} else
 		td->o.start_offset = v;
 
@@ -1487,8 +1492,13 @@ static int str_offset_increment_cb(void *data, unsigned long long *__val)
 	if (parse_is_percent(v)) {
 		td->o.offset_increment = 0;
 		td->o.offset_increment_percent = -1ULL - v;
+		td->o.offset_increment_nz = 0;
 		dprint(FD_PARSE, "SET offset_increment_percent %d\n",
 					td->o.offset_increment_percent);
+	} else if (parse_is_zone(v)) {
+		td->o.offset_increment = 0;
+		td->o.offset_increment_percent = 0;
+		td->o.offset_increment_nz = v - ZONE_BASE_VAL;
 	} else
 		td->o.offset_increment = v;
 
@@ -1505,6 +1515,10 @@ static int str_size_cb(void *data, unsigned long long *__val)
 		td->o.size_percent = -1ULL - v;
 		dprint(FD_PARSE, "SET size_percent %d\n",
 					td->o.size_percent);
+	} else if (parse_is_zone(v)) {
+		td->o.size = 0;
+		td->o.size_percent = 0;
+		td->o.size_nz = v - ZONE_BASE_VAL;
 	} else
 		td->o.size = v;
 
@@ -1525,12 +1539,30 @@ static int str_io_size_cb(void *data, unsigned long long *__val)
 		}
 		dprint(FD_PARSE, "SET io_size_percent %d\n",
 					td->o.io_size_percent);
+	} else if (parse_is_zone(v)) {
+		td->o.io_size = 0;
+		td->o.io_size_percent = 0;
+		td->o.io_size_nz = v - ZONE_BASE_VAL;
 	} else
 		td->o.io_size = v;
 
 	return 0;
 }
 
+static int str_zoneskip_cb(void *data, unsigned long long *__val)
+{
+	struct thread_data *td = cb_data_to_td(data);
+	unsigned long long v = *__val;
+
+	if (parse_is_zone(v)) {
+		td->o.zone_skip = 0;
+		td->o.zone_skip_nz = v - ZONE_BASE_VAL;
+	} else
+		td->o.zone_skip = v;
+
+	return 0;
+}
+
 static int str_write_bw_log_cb(void *data, const char *str)
 {
 	struct thread_data *td = cb_data_to_td(data);
@@ -2081,11 +2113,10 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
 	{
 		.name	= "size",
 		.lname	= "Size",
-		.type	= FIO_OPT_STR_VAL,
+		.type	= FIO_OPT_STR_VAL_ZONE,
 		.cb	= str_size_cb,
 		.off1	= offsetof(struct thread_options, size),
 		.help	= "Total size of device or files",
-		.interval = 1024 * 1024,
 		.category = FIO_OPT_C_IO,
 		.group	= FIO_OPT_G_INVALID,
 	},
@@ -2093,11 +2124,10 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
 		.name	= "io_size",
 		.alias	= "io_limit",
 		.lname	= "IO Size",
-		.type	= FIO_OPT_STR_VAL,
+		.type	= FIO_OPT_STR_VAL_ZONE,
 		.cb	= str_io_size_cb,
 		.off1	= offsetof(struct thread_options, io_size),
 		.help	= "Total size of I/O to be performed",
-		.interval = 1024 * 1024,
 		.category = FIO_OPT_C_IO,
 		.group	= FIO_OPT_G_INVALID,
 	},
@@ -2138,12 +2168,11 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
 		.name	= "offset",
 		.lname	= "IO offset",
 		.alias	= "fileoffset",
-		.type	= FIO_OPT_STR_VAL,
+		.type	= FIO_OPT_STR_VAL_ZONE,
 		.cb	= str_offset_cb,
 		.off1	= offsetof(struct thread_options, start_offset),
 		.help	= "Start IO from this offset",
 		.def	= "0",
-		.interval = 1024 * 1024,
 		.category = FIO_OPT_C_IO,
 		.group	= FIO_OPT_G_INVALID,
 	},
@@ -2161,14 +2190,13 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
 	{
 		.name	= "offset_increment",
 		.lname	= "IO offset increment",
-		.type	= FIO_OPT_STR_VAL,
+		.type	= FIO_OPT_STR_VAL_ZONE,
 		.cb	= str_offset_increment_cb,
 		.off1	= offsetof(struct thread_options, offset_increment),
 		.help	= "What is the increment from one offset to the next",
 		.parent = "offset",
 		.hide	= 1,
 		.def	= "0",
-		.interval = 1024 * 1024,
 		.category = FIO_OPT_C_IO,
 		.group	= FIO_OPT_G_INVALID,
 	},
@@ -3404,11 +3432,11 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
 	{
 		.name	= "zoneskip",
 		.lname	= "Zone skip",
-		.type	= FIO_OPT_STR_VAL,
+		.type	= FIO_OPT_STR_VAL_ZONE,
+		.cb	= str_zoneskip_cb,
 		.off1	= offsetof(struct thread_options, zone_skip),
 		.help	= "Space between IO zones",
 		.def	= "0",
-		.interval = 1024 * 1024,
 		.category = FIO_OPT_C_IO,
 		.group	= FIO_OPT_G_ZONE,
 	},
--- a/parse.c
+++ b/parse.c
@@ -37,6 +37,7 @@ static const char *opt_type_names[] = {
 	"OPT_BOOL",
 	"OPT_FLOAT_LIST",
 	"OPT_STR_SET",
+	"OPT_STR_VAL_ZONE",
 	"OPT_DEPRECATED",
 	"OPT_SOFT_DEPRECATED",
 	"OPT_UNSUPPORTED",
@@ -599,7 +600,10 @@ static int __handle_option(const struct fio_option *o, const char *ptr,
 		fallthrough;
 	case FIO_OPT_ULL:
 	case FIO_OPT_INT:
-	case FIO_OPT_STR_VAL: {
+	case FIO_OPT_STR_VAL:
+not_zone_granularity:;
+		{
+
 		fio_opt_str_val_fn *fn = o->cb;
 		char tmp[128], *p;
 
@@ -944,6 +948,39 @@ static int __handle_option(const struct fio_option *o, const char *ptr,
 		}
 		break;
 	}
+	case FIO_OPT_STR_VAL_ZONE: {
+		int (*f)(void*, unsigned long long *) = o->cb;
+		char *ep;
+		unsigned long long val;
+		size_t len = strlen(ptr);
+
+		if (len == 0 || ptr[len - 1] != 'z')
+			goto not_zone_granularity;
+
+		errno = 0;
+		val = strtoul(ptr, &ep, 10);
+		if (errno == 0 && ep != ptr) {
+			switch (*ep) {
+			case 'z':
+				val = ZONE_BASE_VAL + (uint32_t)val;
+				break;
+
+			default:
+				ret = 1;
+				break;
+			}
+			val_store(ullp, val, o->off1, 0, data, o);
+			ret = 0;
+		} else {
+			ret = 1;
+		}
+		if (ret) {
+			return ret;
+		}
+
+		ret = f(data, &val);
+		break;
+	}
 	case FIO_OPT_DEPRECATED:
 		ret = 1;
 		fallthrough;
--- a/parse.h
+++ b/parse.h
@@ -21,6 +21,7 @@ enum fio_opt_type {
 	FIO_OPT_BOOL,
 	FIO_OPT_FLOAT_LIST,
 	FIO_OPT_STR_SET,
+	FIO_OPT_STR_VAL_ZONE,
 	FIO_OPT_DEPRECATED,
 	FIO_OPT_SOFT_DEPRECATED,
 	FIO_OPT_UNSUPPORTED,	/* keep this last */
@@ -133,9 +134,15 @@ static inline int parse_is_percent(unsigned long long val)
 	return val <= -1ULL && val >= (-1ULL - 100ULL);
 }
 
+#define ZONE_BASE_VAL ((-1ULL >> 1) + 1)
 static inline int parse_is_percent_uncapped(unsigned long long val)
 {
-	return (long long)val <= -1;
+	return ZONE_BASE_VAL + -1U < val;
+}
+
+static inline int parse_is_zone(unsigned long long val)
+{
+	return (val - ZONE_BASE_VAL) <= -1U;
 }
 
 struct print_option {
--- a/server.h
+++ b/server.h
@@ -48,7 +48,7 @@ struct fio_net_cmd_reply {
 };
 
 enum {
-	FIO_SERVER_VER			= 87,
+	FIO_SERVER_VER			= 88,
 
 	FIO_SERVER_MAX_FRAGMENT_PDU	= 1024,
 	FIO_SERVER_MAX_CMD_MB		= 2048,
--- a/t/zbd/test-zbd-support
+++ b/t/zbd/test-zbd-support
@@ -1153,6 +1153,48 @@ test54() {
 		>> "${logfile}.${test_number}" 2>&1 || return $?
 }
 
+# test 'z' suffix parsing only
+test55() {
+	require_zbd || return $SKIP_TESTCASE
+	# offset=1z + offset_increment=10z + size=2z
+	require_seq_zones 13 || return $SKIP_TESTCASE
+
+	run_fio	--name=j		\
+		--filename=${dev}	\
+		--direct=1		\
+		"$(ioengine "psync")"	\
+		--zonemode=zbd		\
+		--zonesize=${zone_size}	\
+		--rw=write		\
+		--bs=${zone_size}	\
+		--numjobs=2		\
+		--offset_increment=10z	\
+		--offset=1z		\
+		--size=2z		\
+		--io_size=3z		\
+		${job_var_opts[@]} --debug=zbd \
+		>> "${logfile}.${test_number}" 2>&1 || return $?
+}
+
+# test 'z' suffix parsing only
+test56() {
+	require_zbd || return $SKIP_TESTCASE
+	require_seq_zones 10 || return $SKIP_TESTCASE
+
+	run_fio	--name=j		\
+		--filename=${dev}	\
+		--direct=1		\
+		"$(ioengine "psync")"	\
+		--zonemode=strided	\
+		--zonesize=${zone_size}	\
+		--rw=write		\
+		--bs=${zone_size}	\
+		--size=10z		\
+		--zoneskip=2z		\
+		${job_var_opts[@]} --debug=zbd \
+		>> "${logfile}.${test_number}" 2>&1 || return $?
+}
+
 SECONDS=0
 tests=()
 dynamic_analyzer=()
--- a/thread_options.h
+++ b/thread_options.h
@@ -83,13 +83,16 @@ struct thread_options {
 	unsigned long long size;
 	unsigned long long io_size;
 	unsigned int size_percent;
+	unsigned int size_nz;
 	unsigned int io_size_percent;
+	unsigned int io_size_nz;
 	unsigned int fill_device;
 	unsigned int file_append;
 	unsigned long long file_size_low;
 	unsigned long long file_size_high;
 	unsigned long long start_offset;
 	unsigned long long start_offset_align;
+	unsigned int start_offset_nz;
 
 	unsigned long long bs[DDIR_RWDIR_CNT];
 	unsigned long long ba[DDIR_RWDIR_CNT];
@@ -198,6 +201,7 @@ struct thread_options {
 	unsigned long long zone_size;
 	unsigned long long zone_capacity;
 	unsigned long long zone_skip;
+	uint32_t zone_skip_nz;
 	enum fio_zone_mode zone_mode;
 	unsigned long long lockmem;
 	enum fio_memtype mem_type;
@@ -315,6 +319,7 @@ struct thread_options {
 	unsigned int gid;
 
 	unsigned int offset_increment_percent;
+	unsigned int offset_increment_nz;
 	unsigned long long offset_increment;
 	unsigned long long number_ios;
 
@@ -384,14 +389,19 @@ struct thread_options_pack {
 	uint64_t size;
 	uint64_t io_size;
 	uint32_t size_percent;
+	uint32_t size_nz;
 	uint32_t io_size_percent;
+	uint32_t io_size_nz;
 	uint32_t fill_device;
 	uint32_t file_append;
 	uint32_t unique_filename;
+	uint32_t pad3;
 	uint64_t file_size_low;
 	uint64_t file_size_high;
 	uint64_t start_offset;
 	uint64_t start_offset_align;
+	uint32_t start_offset_nz;
+	uint32_t pad4;
 
 	uint64_t bs[DDIR_RWDIR_CNT];
 	uint64_t ba[DDIR_RWDIR_CNT];
@@ -464,8 +474,6 @@ struct thread_options_pack {
 	struct zone_split zone_split[DDIR_RWDIR_CNT][ZONESPLIT_MAX];
 	uint32_t zone_split_nr[DDIR_RWDIR_CNT];
 
-	uint8_t pad1[4];
-
 	fio_fp64_t zipf_theta;
 	fio_fp64_t pareto_h;
 	fio_fp64_t gauss_dev;
@@ -501,6 +509,7 @@ struct thread_options_pack {
 	uint64_t zone_capacity;
 	uint64_t zone_skip;
 	uint64_t lockmem;
+	uint32_t zone_skip_nz;
 	uint32_t mem_type;
 	uint32_t mem_align;
 
@@ -509,8 +518,6 @@ struct thread_options_pack {
 	uint32_t new_group;
 	uint32_t numjobs;
 
-	uint8_t pad3[4];
-
 	/*
 	 * We currently can't convert these, so don't enable them
 	 */
@@ -616,12 +623,14 @@ struct thread_options_pack {
 	uint32_t gid;
 
 	uint32_t offset_increment_percent;
+	uint32_t offset_increment_nz;
 	uint64_t offset_increment;
 	uint64_t number_ios;
 
 	uint64_t latency_target;
 	uint64_t latency_window;
 	uint64_t max_latency;
+	uint32_t pad5;
 	fio_fp64_t latency_percentile;
 	uint32_t latency_run;
 
--- a/zbd.c
+++ b/zbd.c
@@ -648,7 +648,7 @@ static bool zbd_open_zone(struct thread_data *td, const struct fio_file *f,
 static int zbd_reset_zone(struct thread_data *td, struct fio_file *f,
 			  struct fio_zone_info *z);
 
-int zbd_setup_files(struct thread_data *td)
+int zbd_init_files(struct thread_data *td)
 {
 	struct fio_file *f;
 	int i;
@@ -657,6 +657,44 @@ int zbd_setup_files(struct thread_data *td)
 		if (zbd_init_zone_info(td, f))
 			return 1;
 	}
+	return 0;
+}
+
+void zbd_recalc_options_with_zone_granularity(struct thread_data *td)
+{
+	struct fio_file *f;
+	int i;
+
+	for_each_file(td, f, i) {
+		struct zoned_block_device_info *zbd = f->zbd_info;
+		// zonemode=strided doesn't get per-file zone size.
+		uint64_t zone_size = zbd ? zbd->zone_size : td->o.zone_size;
+
+		if (zone_size == 0)
+			continue;
+
+		if (td->o.size_nz > 0) {
+			td->o.size = td->o.size_nz * zone_size;
+		}
+		if (td->o.io_size_nz > 0) {
+			td->o.io_size = td->o.io_size_nz * zone_size;
+		}
+		if (td->o.start_offset_nz > 0) {
+			td->o.start_offset = td->o.start_offset_nz * zone_size;
+		}
+		if (td->o.offset_increment_nz > 0) {
+			td->o.offset_increment = td->o.offset_increment_nz * zone_size;
+		}
+		if (td->o.zone_skip_nz > 0) {
+			td->o.zone_skip = td->o.zone_skip_nz * zone_size;
+		}
+	}
+}
+
+int zbd_setup_files(struct thread_data *td)
+{
+	struct fio_file *f;
+	int i;
 
 	if (!zbd_using_direct_io()) {
 		log_err("Using direct I/O is mandatory for writing to ZBD drives\n\n");
--- a/zbd.h
+++ b/zbd.h
@@ -87,6 +87,8 @@ struct zoned_block_device_info {
 	struct fio_zone_info	zone_info[0];
 };
 
+int zbd_init_files(struct thread_data *td);
+void zbd_recalc_options_with_zone_granularity(struct thread_data *td);
 int zbd_setup_files(struct thread_data *td);
 void zbd_free_zone_info(struct fio_file *f);
 void zbd_file_reset(struct thread_data *td, struct fio_file *f);


^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH v2] zbd: support 'z' suffix for zone granularity
  2021-02-08 17:54   ` [PATCH v2] " Alexey Dobriyan
@ 2021-02-15  3:40     ` Dmitry Fomichev
  0 siblings, 0 replies; 7+ messages in thread
From: Dmitry Fomichev @ 2021-02-15  3:40 UTC (permalink / raw)
  To: Alexey Dobriyan; +Cc: axboe, fio, Damien Le Moal

> -----Original Message-----
> From: Alexey Dobriyan <adobriyan@gmail.com>
> Sent: Monday, February 8, 2021 12:55 PM
> To: Dmitry Fomichev <Dmitry.Fomichev@wdc.com>
> Cc: axboe@kernel.dk; fio@vger.kernel.org; Damien Le Moal
> <Damien.LeMoal@wdc.com>
> Subject: [PATCH v2] zbd: support 'z' suffix for zone granularity
> 
> Allow users to pass some options with zone granularity which is natural
> for ZBD workloads.
> 
> This is nifty for writing quick tests and when firmware guys change
> zone sizes.
> 
> Converted options are
> 
> 	offset=
> 	offset_increment=
> 	size=
> 	io_size=
> 	zoneskip=
> 
> Example:
> 
> 	rw=write
> 	numjobs=2
> 	offset=1z
> 	offset_increment=10z
> 	size=5z
> 	io_size=6z
> 
> Thread 1 will write zones 1, 2, 3, 4, 5, 1.
> Thread 2 will write zones 11, 12, 13, 14, 15, 11.
> 
> Note #1:
> 	zonemode=strided doesn't create ZBD zone structure but requires
> 	value recalculation. This is why 2 functions are split.
> 
> Note #2:
> 	there is a bug with zonemode=strided + size= + zoneskip=
> 	writing more I/O than necessary.
> 	'z' suffix doesn't fix nor adds this bug.
> 
> Signed-off-by: Alexey Dobriyan (SK hynix) <adobriyan@gmail.com>
> ---
> 
> 	fix zoneskip=
> 	write 2 tests
> 
> 	note: test only test option parsing,
> 	I/O pattern testing is long standing issue.
> 
> 	patterns verified with strace(1)
> 
>  filesetup.c            |   18 +++++++++++++-----
>  fio.1                  |   13 ++++++++-----
>  options.c              |   48 ++++++++++++++++++++++++++++++++++++++------
> ----
>  parse.c                |   39 ++++++++++++++++++++++++++++++++++++++-
>  parse.h                |    9 ++++++++-
>  server.h               |    2 +-
>  t/zbd/test-zbd-support |   42
> ++++++++++++++++++++++++++++++++++++++++++
>  thread_options.h       |   17 +++++++++++++----
>  zbd.c                  |   40 +++++++++++++++++++++++++++++++++++++++-
>  zbd.h                  |    2 ++
>  10 files changed, 202 insertions(+), 28 deletions(-)
> 
> --- a/filesetup.c
> +++ b/filesetup.c
> @@ -1029,6 +1029,13 @@ int setup_files(struct thread_data *td)
>  	if (o->read_iolog_file)
>  		goto done;
> 
> +	if (td->o.zone_mode == ZONE_MODE_ZBD) {
> +		err = zbd_init_files(td);
> +		if (err)
> +			goto err_out;
> +	}
> +	zbd_recalc_options_with_zone_granularity(td);
> +
>  	/*
>  	 * check sizes. if the files/devices do not exist and the size
>  	 * isn't passed to fio, abort.
> @@ -1269,16 +1276,17 @@ int setup_files(struct thread_data *td)
>  	}
> 
>  done:
> -	if (o->create_only)
> -		td->done = 1;
> -
> -	td_restore_runstate(td, old_state);
> -
>  	if (td->o.zone_mode == ZONE_MODE_ZBD) {
>  		err = zbd_setup_files(td);
>  		if (err)
>  			goto err_out;
>  	}
> +
> +	if (o->create_only)
> +		td->done = 1;
> +
> +	td_restore_runstate(td, old_state);
> +
>  	return 0;
> 
>  err_offset:
> --- a/fio.1
> +++ b/fio.1
> @@ -348,6 +348,9 @@ us or usec means microseconds
>  .PD
>  .RE
>  .P
> +`z' suffix specifies that the value is measured in zones.
> +Value is recalculated once block device's zone size becomes known.
> +.P
>  If the option accepts an upper and lower range, use a colon ':' or
>  minus '\-' to separate such values. See \fIirange\fR parameter type.
>  If the lower value specified happens to be larger than the upper value
> @@ -780,7 +783,7 @@ If not specified it defaults to the zone size. If the
> target device is a zoned
>  block device, the zone capacity is obtained from the device information and
> this
>  option is ignored.
>  .TP
> -.BI zoneskip \fR=\fPint
> +.BI zoneskip \fR=\fPint[z]
>  For \fBzonemode\fR=strided, the number of bytes to skip after
> \fBzonesize\fR
>  bytes of data have been transferred.
> 
> @@ -1030,7 +1033,7 @@ The values are all relative to each other, and no
> absolute meaning
>  should be associated with them.
>  .RE
>  .TP
> -.BI offset \fR=\fPint
> +.BI offset \fR=\fPint[%|z]
>  Start I/O at the provided offset in the file, given as either a fixed size in
>  bytes or a percentage. If a percentage is given, the generated offset will be
>  aligned to the minimum \fBblocksize\fR or to the value of \fBoffset_align\fR
> if
> @@ -1045,7 +1048,7 @@ If set to non-zero value, the byte offset generated
> by a percentage \fBoffset\fR
>  is aligned upwards to this value. Defaults to 0 meaning that a percentage
>  offset is aligned to the minimum block size.
>  .TP
> -.BI offset_increment \fR=\fPint
> +.BI offset_increment \fR=\fPint[%|z]
>  If this is provided, then the real offset becomes `\fBoffset\fR +
> \fBoffset_increment\fR
>  * thread_number', where the thread number is a counter that starts at 0
> and
>  is incremented for each sub-job (i.e. when \fBnumjobs\fR option is
> @@ -1567,7 +1570,7 @@ Pin the specified amount of memory with
> \fBmlock\fR\|(2). Can be used to
>  simulate a smaller amount of memory. The amount specified is per worker.
>  .SS "I/O size"
>  .TP
> -.BI size \fR=\fPint
> +.BI size \fR=\fPint[%|z]
>  The total size of file I/O for each thread of this job. Fio will run until
>  this many bytes has been transferred, unless runtime is limited by other
> options
>  (such as \fBruntime\fR, for instance, or increased/decreased by
> \fBio_size\fR).
> @@ -1582,7 +1585,7 @@ given, fio will use 20% of the full size of the given
> files or devices.
>  Can be combined with \fBoffset\fR to constrain the start and end range
>  that I/O will be done within.
>  .TP
> -.BI io_size \fR=\fPint "\fR,\fB io_limit" \fR=\fPint
> +.BI io_size \fR=\fPint[%|z] "\fR,\fB io_limit" \fR=\fPint[%|z]
>  Normally fio operates within the region set by \fBsize\fR, which means
>  that the \fBsize\fR option sets both the region and size of I/O to be
>  performed. Sometimes that is not what you want. With this option, it is
> --- a/options.c
> +++ b/options.c
> @@ -1471,8 +1471,13 @@ static int str_offset_cb(void *data, unsigned long
> long *__val)
>  	if (parse_is_percent(v)) {
>  		td->o.start_offset = 0;
>  		td->o.start_offset_percent = -1ULL - v;
> +		td->o.start_offset_nz = 0;
>  		dprint(FD_PARSE, "SET start_offset_percent %d\n",
>  					td->o.start_offset_percent);
> +	} else if (parse_is_zone(v)) {
> +		td->o.start_offset = 0;
> +		td->o.start_offset_percent = 0;
> +		td->o.start_offset_nz = v - ZONE_BASE_VAL;
>  	} else
>  		td->o.start_offset = v;
> 
> @@ -1487,8 +1492,13 @@ static int str_offset_increment_cb(void *data,
> unsigned long long *__val)
>  	if (parse_is_percent(v)) {
>  		td->o.offset_increment = 0;
>  		td->o.offset_increment_percent = -1ULL - v;
> +		td->o.offset_increment_nz = 0;
>  		dprint(FD_PARSE, "SET offset_increment_percent %d\n",
>  					td->o.offset_increment_percent);
> +	} else if (parse_is_zone(v)) {
> +		td->o.offset_increment = 0;
> +		td->o.offset_increment_percent = 0;
> +		td->o.offset_increment_nz = v - ZONE_BASE_VAL;
>  	} else
>  		td->o.offset_increment = v;
> 
> @@ -1505,6 +1515,10 @@ static int str_size_cb(void *data, unsigned long
> long *__val)
>  		td->o.size_percent = -1ULL - v;
>  		dprint(FD_PARSE, "SET size_percent %d\n",
>  					td->o.size_percent);
> +	} else if (parse_is_zone(v)) {
> +		td->o.size = 0;
> +		td->o.size_percent = 0;
> +		td->o.size_nz = v - ZONE_BASE_VAL;
>  	} else
>  		td->o.size = v;
> 
> @@ -1525,12 +1539,30 @@ static int str_io_size_cb(void *data, unsigned
> long long *__val)
>  		}
>  		dprint(FD_PARSE, "SET io_size_percent %d\n",
>  					td->o.io_size_percent);
> +	} else if (parse_is_zone(v)) {
> +		td->o.io_size = 0;
> +		td->o.io_size_percent = 0;
> +		td->o.io_size_nz = v - ZONE_BASE_VAL;
>  	} else
>  		td->o.io_size = v;
> 
>  	return 0;
>  }
> 
> +static int str_zoneskip_cb(void *data, unsigned long long *__val)
> +{
> +	struct thread_data *td = cb_data_to_td(data);
> +	unsigned long long v = *__val;
> +
> +	if (parse_is_zone(v)) {
> +		td->o.zone_skip = 0;
> +		td->o.zone_skip_nz = v - ZONE_BASE_VAL;
> +	} else
> +		td->o.zone_skip = v;
> +
> +	return 0;
> +}
> +
>  static int str_write_bw_log_cb(void *data, const char *str)
>  {
>  	struct thread_data *td = cb_data_to_td(data);
> @@ -2081,11 +2113,10 @@ struct fio_option fio_options[FIO_MAX_OPTS] =
> {
>  	{
>  		.name	= "size",
>  		.lname	= "Size",
> -		.type	= FIO_OPT_STR_VAL,
> +		.type	= FIO_OPT_STR_VAL_ZONE,
>  		.cb	= str_size_cb,
>  		.off1	= offsetof(struct thread_options, size),
>  		.help	= "Total size of device or files",
> -		.interval = 1024 * 1024,
>  		.category = FIO_OPT_C_IO,
>  		.group	= FIO_OPT_G_INVALID,
>  	},
> @@ -2093,11 +2124,10 @@ struct fio_option fio_options[FIO_MAX_OPTS] =
> {
>  		.name	= "io_size",
>  		.alias	= "io_limit",
>  		.lname	= "IO Size",
> -		.type	= FIO_OPT_STR_VAL,
> +		.type	= FIO_OPT_STR_VAL_ZONE,
>  		.cb	= str_io_size_cb,
>  		.off1	= offsetof(struct thread_options, io_size),
>  		.help	= "Total size of I/O to be performed",
> -		.interval = 1024 * 1024,
>  		.category = FIO_OPT_C_IO,
>  		.group	= FIO_OPT_G_INVALID,
>  	},
> @@ -2138,12 +2168,11 @@ struct fio_option fio_options[FIO_MAX_OPTS] =
> {
>  		.name	= "offset",
>  		.lname	= "IO offset",
>  		.alias	= "fileoffset",
> -		.type	= FIO_OPT_STR_VAL,
> +		.type	= FIO_OPT_STR_VAL_ZONE,
>  		.cb	= str_offset_cb,
>  		.off1	= offsetof(struct thread_options, start_offset),
>  		.help	= "Start IO from this offset",
>  		.def	= "0",
> -		.interval = 1024 * 1024,
>  		.category = FIO_OPT_C_IO,
>  		.group	= FIO_OPT_G_INVALID,
>  	},
> @@ -2161,14 +2190,13 @@ struct fio_option fio_options[FIO_MAX_OPTS] =
> {
>  	{
>  		.name	= "offset_increment",
>  		.lname	= "IO offset increment",
> -		.type	= FIO_OPT_STR_VAL,
> +		.type	= FIO_OPT_STR_VAL_ZONE,
>  		.cb	= str_offset_increment_cb,
>  		.off1	= offsetof(struct thread_options, offset_increment),
>  		.help	= "What is the increment from one offset to the
> next",
>  		.parent = "offset",
>  		.hide	= 1,
>  		.def	= "0",
> -		.interval = 1024 * 1024,
>  		.category = FIO_OPT_C_IO,
>  		.group	= FIO_OPT_G_INVALID,
>  	},
> @@ -3404,11 +3432,11 @@ struct fio_option fio_options[FIO_MAX_OPTS] =
> {
>  	{
>  		.name	= "zoneskip",
>  		.lname	= "Zone skip",
> -		.type	= FIO_OPT_STR_VAL,
> +		.type	= FIO_OPT_STR_VAL_ZONE,
> +		.cb	= str_zoneskip_cb,
>  		.off1	= offsetof(struct thread_options, zone_skip),
>  		.help	= "Space between IO zones",
>  		.def	= "0",
> -		.interval = 1024 * 1024,
>  		.category = FIO_OPT_C_IO,
>  		.group	= FIO_OPT_G_ZONE,
>  	},
> --- a/parse.c
> +++ b/parse.c
> @@ -37,6 +37,7 @@ static const char *opt_type_names[] = {
>  	"OPT_BOOL",
>  	"OPT_FLOAT_LIST",
>  	"OPT_STR_SET",
> +	"OPT_STR_VAL_ZONE",
>  	"OPT_DEPRECATED",
>  	"OPT_SOFT_DEPRECATED",
>  	"OPT_UNSUPPORTED",
> @@ -599,7 +600,10 @@ static int __handle_option(const struct fio_option
> *o, const char *ptr,
>  		fallthrough;
>  	case FIO_OPT_ULL:
>  	case FIO_OPT_INT:
> -	case FIO_OPT_STR_VAL: {
> +	case FIO_OPT_STR_VAL:
> +not_zone_granularity:;
> +		{
> +
>  		fio_opt_str_val_fn *fn = o->cb;
>  		char tmp[128], *p;
> 
> @@ -944,6 +948,39 @@ static int __handle_option(const struct fio_option
> *o, const char *ptr,
>  		}
>  		break;
>  	}
> +	case FIO_OPT_STR_VAL_ZONE: {
> +		int (*f)(void*, unsigned long long *) = o->cb;
> +		char *ep;
> +		unsigned long long val;
> +		size_t len = strlen(ptr);
> +
> +		if (len == 0 || ptr[len - 1] != 'z')
> +			goto not_zone_granularity;

I still think that the nasty goto can be avoided! What you can do
is something like

 	case FIO_OPT_STR_VAL_TIME:
 		is_time = 1;
 		fallthrough;
 	case FIO_OPT_ULL:
 	case FIO_OPT_INT:
 	case FIO_OPT_STR_VAL:
+	case FIO_OPT_STR_VAL_ZONE:
		{
 		fio_opt_str_val_fn *fn = o->cb;
 		char tmp[128], *p;

+			if (len > 0 && ptr[len - 1] == 'z') {
+				if (o->type != FIO_OPT_STR_VAL_ZONE) {
+ 					log_err(<<z suffix is not applicable>>);
+					return 1;				
+				} else {
+					<<process FIO_OPT_STR_VAL_ZONE>>
+					break;
+				}
+ 			}

> +
> +		errno = 0;
> +		val = strtoul(ptr, &ep, 10);
> +		if (errno == 0 && ep != ptr) {
> +			switch (*ep) {
> +			case 'z':
> +				val = ZONE_BASE_VAL + (uint32_t)val;
> +				break;
> +
> +			default:
> +				ret = 1;
> +				break;
> +			}
> +			val_store(ullp, val, o->off1, 0, data, o);
> +			ret = 0;
> +		} else {
> +			ret = 1;
> +		}
> +		if (ret) {
> +			return ret;
> +		}
> +
> +		ret = f(data, &val);
> +		break;
> +	}
>  	case FIO_OPT_DEPRECATED:
>  		ret = 1;
>  		fallthrough;
> --- a/parse.h
> +++ b/parse.h
> @@ -21,6 +21,7 @@ enum fio_opt_type {
>  	FIO_OPT_BOOL,
>  	FIO_OPT_FLOAT_LIST,
>  	FIO_OPT_STR_SET,
> +	FIO_OPT_STR_VAL_ZONE,
>  	FIO_OPT_DEPRECATED,
>  	FIO_OPT_SOFT_DEPRECATED,
>  	FIO_OPT_UNSUPPORTED,	/* keep this last */
> @@ -133,9 +134,15 @@ static inline int parse_is_percent(unsigned long long
> val)
>  	return val <= -1ULL && val >= (-1ULL - 100ULL);
>  }
> 
> +#define ZONE_BASE_VAL ((-1ULL >> 1) + 1)
>  static inline int parse_is_percent_uncapped(unsigned long long val)
>  {
> -	return (long long)val <= -1;
> +	return ZONE_BASE_VAL + -1U < val;
> +}
> +
> +static inline int parse_is_zone(unsigned long long val)
> +{
> +	return (val - ZONE_BASE_VAL) <= -1U;
>  }
> 
>  struct print_option {
> --- a/server.h
> +++ b/server.h
> @@ -48,7 +48,7 @@ struct fio_net_cmd_reply {
>  };
> 
>  enum {
> -	FIO_SERVER_VER			= 87,
> +	FIO_SERVER_VER			= 88,
> 
>  	FIO_SERVER_MAX_FRAGMENT_PDU	= 1024,
>  	FIO_SERVER_MAX_CMD_MB		= 2048,
> --- a/t/zbd/test-zbd-support
> +++ b/t/zbd/test-zbd-support
> @@ -1153,6 +1153,48 @@ test54() {
>  		>> "${logfile}.${test_number}" 2>&1 || return $?
>  }
> 
> +# test 'z' suffix parsing only
> +test55() {
> +	require_zbd || return $SKIP_TESTCASE
> +	# offset=1z + offset_increment=10z + size=2z
> +	require_seq_zones 13 || return $SKIP_TESTCASE
> +

The both new tests fail in device configurations with zone capacity
smaller than zone size. In my testing, running

t/zbd/run-tests-against-nullb -s 3 -s 9

produces the test failures of this kind. This happens because you
always try to write zones up to their size, not up to capacity.
You can add something like

	off=$((first_sequential_zone_sector * 512))
	zcap=$(total_zone_capacity 1 $off $dev)

here and do --bs=${zcap} below.

> +	run_fio	--name=j		\
> +		--filename=${dev}	\
> +		--direct=1		\
> +		"$(ioengine "psync")"	\
> +		--zonemode=zbd		\
> +		--zonesize=${zone_size}	\
> +		--rw=write		\
> +		--bs=${zone_size}	\
> +		--numjobs=2		\
> +		--offset_increment=10z	\
> +		--offset=1z		\
> +		--size=2z		\
> +		--io_size=3z		\
> +		${job_var_opts[@]} --debug=zbd \
> +		>> "${logfile}.${test_number}" 2>&1 || return $?
> +}
> +
> +# test 'z' suffix parsing only
> +test56() {
> +	require_zbd || return $SKIP_TESTCASE
> +	require_seq_zones 10 || return $SKIP_TESTCASE

This test fails in a few configurations when running
t/zbd/run-tests-against-nullb because writing in strided mode needs
zones to be initially empty. Adding

+ 	reset_zone "${dev}" -1

helps to avoid the failure. And it is probably not going to work if
ZCAP < ZSIZE, so you can perhaps add a new function to the script,
require_full_zcap() or something like that and call it to skip this test
for such configurations.

Regards,
Dmitry

> +
> +	run_fio	--name=j		\
> +		--filename=${dev}	\
> +		--direct=1		\
> +		"$(ioengine "psync")"	\
> +		--zonemode=strided	\
> +		--zonesize=${zone_size}	\
> +		--rw=write		\
> +		--bs=${zone_size}	\
> +		--size=10z		\
> +		--zoneskip=2z		\
> +		${job_var_opts[@]} --debug=zbd \
> +		>> "${logfile}.${test_number}" 2>&1 || return $?
> +}
> +
>  SECONDS=0
>  tests=()
>  dynamic_analyzer=()
> --- a/thread_options.h
> +++ b/thread_options.h
> @@ -83,13 +83,16 @@ struct thread_options {
>  	unsigned long long size;
>  	unsigned long long io_size;
>  	unsigned int size_percent;
> +	unsigned int size_nz;
>  	unsigned int io_size_percent;
> +	unsigned int io_size_nz;
>  	unsigned int fill_device;
>  	unsigned int file_append;
>  	unsigned long long file_size_low;
>  	unsigned long long file_size_high;
>  	unsigned long long start_offset;
>  	unsigned long long start_offset_align;
> +	unsigned int start_offset_nz;
> 
>  	unsigned long long bs[DDIR_RWDIR_CNT];
>  	unsigned long long ba[DDIR_RWDIR_CNT];
> @@ -198,6 +201,7 @@ struct thread_options {
>  	unsigned long long zone_size;
>  	unsigned long long zone_capacity;
>  	unsigned long long zone_skip;
> +	uint32_t zone_skip_nz;
>  	enum fio_zone_mode zone_mode;
>  	unsigned long long lockmem;
>  	enum fio_memtype mem_type;
> @@ -315,6 +319,7 @@ struct thread_options {
>  	unsigned int gid;
> 
>  	unsigned int offset_increment_percent;
> +	unsigned int offset_increment_nz;
>  	unsigned long long offset_increment;
>  	unsigned long long number_ios;
> 
> @@ -384,14 +389,19 @@ struct thread_options_pack {
>  	uint64_t size;
>  	uint64_t io_size;
>  	uint32_t size_percent;
> +	uint32_t size_nz;
>  	uint32_t io_size_percent;
> +	uint32_t io_size_nz;
>  	uint32_t fill_device;
>  	uint32_t file_append;
>  	uint32_t unique_filename;
> +	uint32_t pad3;
>  	uint64_t file_size_low;
>  	uint64_t file_size_high;
>  	uint64_t start_offset;
>  	uint64_t start_offset_align;
> +	uint32_t start_offset_nz;
> +	uint32_t pad4;
> 
>  	uint64_t bs[DDIR_RWDIR_CNT];
>  	uint64_t ba[DDIR_RWDIR_CNT];
> @@ -464,8 +474,6 @@ struct thread_options_pack {
>  	struct zone_split zone_split[DDIR_RWDIR_CNT][ZONESPLIT_MAX];
>  	uint32_t zone_split_nr[DDIR_RWDIR_CNT];
> 
> -	uint8_t pad1[4];
> -
>  	fio_fp64_t zipf_theta;
>  	fio_fp64_t pareto_h;
>  	fio_fp64_t gauss_dev;
> @@ -501,6 +509,7 @@ struct thread_options_pack {
>  	uint64_t zone_capacity;
>  	uint64_t zone_skip;
>  	uint64_t lockmem;
> +	uint32_t zone_skip_nz;
>  	uint32_t mem_type;
>  	uint32_t mem_align;
> 
> @@ -509,8 +518,6 @@ struct thread_options_pack {
>  	uint32_t new_group;
>  	uint32_t numjobs;
> 
> -	uint8_t pad3[4];
> -
>  	/*
>  	 * We currently can't convert these, so don't enable them
>  	 */
> @@ -616,12 +623,14 @@ struct thread_options_pack {
>  	uint32_t gid;
> 
>  	uint32_t offset_increment_percent;
> +	uint32_t offset_increment_nz;
>  	uint64_t offset_increment;
>  	uint64_t number_ios;
> 
>  	uint64_t latency_target;
>  	uint64_t latency_window;
>  	uint64_t max_latency;
> +	uint32_t pad5;
>  	fio_fp64_t latency_percentile;
>  	uint32_t latency_run;
> 
> --- a/zbd.c
> +++ b/zbd.c
> @@ -648,7 +648,7 @@ static bool zbd_open_zone(struct thread_data *td,
> const struct fio_file *f,
>  static int zbd_reset_zone(struct thread_data *td, struct fio_file *f,
>  			  struct fio_zone_info *z);
> 
> -int zbd_setup_files(struct thread_data *td)
> +int zbd_init_files(struct thread_data *td)
>  {
>  	struct fio_file *f;
>  	int i;
> @@ -657,6 +657,44 @@ int zbd_setup_files(struct thread_data *td)
>  		if (zbd_init_zone_info(td, f))
>  			return 1;
>  	}
> +	return 0;
> +}
> +
> +void zbd_recalc_options_with_zone_granularity(struct thread_data *td)
> +{
> +	struct fio_file *f;
> +	int i;
> +
> +	for_each_file(td, f, i) {
> +		struct zoned_block_device_info *zbd = f->zbd_info;
> +		// zonemode=strided doesn't get per-file zone size.
> +		uint64_t zone_size = zbd ? zbd->zone_size : td->o.zone_size;
> +
> +		if (zone_size == 0)
> +			continue;
> +
> +		if (td->o.size_nz > 0) {
> +			td->o.size = td->o.size_nz * zone_size;
> +		}
> +		if (td->o.io_size_nz > 0) {
> +			td->o.io_size = td->o.io_size_nz * zone_size;
> +		}
> +		if (td->o.start_offset_nz > 0) {
> +			td->o.start_offset = td->o.start_offset_nz *
> zone_size;
> +		}
> +		if (td->o.offset_increment_nz > 0) {
> +			td->o.offset_increment = td-
> >o.offset_increment_nz * zone_size;
> +		}
> +		if (td->o.zone_skip_nz > 0) {
> +			td->o.zone_skip = td->o.zone_skip_nz * zone_size;
> +		}
> +	}
> +}
> +
> +int zbd_setup_files(struct thread_data *td)
> +{
> +	struct fio_file *f;
> +	int i;
> 
>  	if (!zbd_using_direct_io()) {
>  		log_err("Using direct I/O is mandatory for writing to ZBD
> drives\n\n");
> --- a/zbd.h
> +++ b/zbd.h
> @@ -87,6 +87,8 @@ struct zoned_block_device_info {
>  	struct fio_zone_info	zone_info[0];
>  };
> 
> +int zbd_init_files(struct thread_data *td);
> +void zbd_recalc_options_with_zone_granularity(struct thread_data *td);
>  int zbd_setup_files(struct thread_data *td);
>  void zbd_free_zone_info(struct fio_file *f);
>  void zbd_file_reset(struct thread_data *td, struct fio_file *f);

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-02-15  3:40 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-04  8:32 [PATCH] zbd: support 'z' suffix for zone granularity Alexey Dobriyan
2021-02-05  0:53 ` Dmitry Fomichev
2021-02-08 14:43   ` Alexey Dobriyan
2021-02-08 15:53     ` Alexey Dobriyan
2021-02-08 16:37   ` Alexey Dobriyan
2021-02-08 17:54   ` [PATCH v2] " Alexey Dobriyan
2021-02-15  3:40     ` Dmitry Fomichev

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.