All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] fio: fix premature final dlclose of dynamic engine
@ 2021-01-25 19:13 Eric Sandeen
  2021-01-25 19:18 ` [PATCH 1/2] fio: move dynamic library handle to io_ops structure Eric Sandeen
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Eric Sandeen @ 2021-01-25 19:13 UTC (permalink / raw)
  To: fio; +Cc: Sitsofe Wheeler, Yigal Korman, Richard W.M. Jones, Jens Axboe

When running a job like this, with external engines enabled:

[global]
ioengine=pmemblk
directory=/mnt/test
thread=1
direct=1
iodepth=1

[pmemblk-job2]
filename=pmemblk-smallrw,4096,17
rw=readwrite

[pmemblk-job2]
filename=pmemblk-randrw,4096,64
rw=randrw

it segfaults if the first thread to finish is the one that dlopened the engine,
and the 2nd thread has everything ripped out from under it:

# fio --debug=io pmemblk.fio
...
io       18361 close ioengine pmemblk
io       18362 prep: io_u 0x285e1c0: off=0x2108000,len=0x1000,ddir=1,file=/mnt/test/pmemblk-randrw,4096,64
io       18362 queue: io_u 0x285e1c0: off=0x2108000,len=0x1000,ddir=1,file=/mnt/test/pmemblk-randrw,4096,64
io       18361 free ioengine pmemblk
io       18362 complete: io_u 0x285e1c0: off=0x2108000,len=0x1000,ddir=1,file=/mnt/test/pmemblk-randrw,4096,64
io       18362 fill: io_u 0x285e1c0: off=0x1bcc000,len=0x1000,ddir=0,file=/mnt/test/pmemblk-randrw,4096,64
io       18361 ioengine pmemblk unregistered
io       18362 prep: io_u 0x285e1c0: off=0x1bcc000,len=0x1000,ddir=0,file=/mnt/test/pmemblk-randrw,4096,64
io       18362 queue: io_u 0x285e1c0: off=0x1bcc000,len=0x1000,ddir=0,file=/mnt/test/pmemblk-randrw,4096,64
io       18362 complete: io_u 0x285e1c0: off=0x1bcc000,len=0x1000,ddir=0,file=/mnt/test/pmemblk-randrw,4096,64
io       18362 fill: io_u 0x285e1c0: off=0xe91000,len=0x1000,ddir=0,file=/mnt/test/pmemblk-randrw,4096,64
io       18362 prep: io_u 0x285e1c0: off=0xe91000,len=0x1000,ddir=0,file=/mnt/test/pmemblk-randrw,4096,64
io       18362 queue: io_u 0x285e1c0: off=0xe91000,len=0x1000,ddir=0,file=/mnt/test/pmemblk-randrw,4096,64
io       18362 complete: io_u 0x285e1c0: off=0xe91000,len=0x1000,ddir=0,file=/mnt/test/pmemblk-randrw,4096,64
io       18362 fill: io_u 0x285e1c0: off=0x3413000,len=0x1000,ddir=0,file=/mnt/test/pmemblk-randrw,4096,64
io       18362 prep: io_u 0x285e1c0: off=0x3413000,len=0x1000,ddir=0,file=/mnt/test/pmemblk-randrw,4096,64
io       18362 queue: io_u 0x285e1c0: off=0x3413000,len=0x1000,ddir=0,file=/mnt/test/pmemblk-randrw,4096,64
Segmentation fault (core dumped)

Fix this by keeping the dlhandle on the io engine itself, not the thread,
then explicitly dlopen for every thread that asks for the engine, and let
dlopen/dlclose do refcounting as designed.

I think this is right, and it fixes it for me, but good review would be wise!

Also: maybe the 2 patches could be collapsed, though I /think/ this makes review
a bit easier and doesn't add any regression at patch1 ...

Thanks,
-Eric



^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/2] fio: move dynamic library handle to io_ops structure
  2021-01-25 19:13 [PATCH 0/2] fio: fix premature final dlclose of dynamic engine Eric Sandeen
@ 2021-01-25 19:18 ` Eric Sandeen
  2021-01-25 19:23 ` [PATCH 2/2] fio: fix dlopen refcounting of dynamic engines Eric Sandeen
  2021-01-25 21:04 ` [PATCH 0/2] fio: fix premature final dlclose of dynamic engine Jens Axboe
  2 siblings, 0 replies; 4+ messages in thread
From: Eric Sandeen @ 2021-01-25 19:18 UTC (permalink / raw)
  To: fio; +Cc: Sitsofe Wheeler, Yigal Korman, Richard W.M. Jones, Jens Axboe

Keeping a dynamic engine's dlopen'd dlhandle on a thread structure doesn't
make sense; that thread may exit while others are still using the engine.

Move the dlhandle onto the ops structure itself.

We still only call dlopen for the first thead, which leaves a refcounting
issue which will be fixed in the next patch.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
---

diff --git a/fio.h b/fio.h
index ee582a72..062abfa7 100644
--- a/fio.h
+++ b/fio.h
@@ -281,7 +281,6 @@ struct thread_data {
 	 * IO engine private data and dlhandle.
 	 */
 	void *io_ops_data;
-	void *io_ops_dlhandle;
 
 	/*
 	 * Queue depth of io_u's that fio MIGHT do
diff --git a/init.c b/init.c
index 1d14df16..ab38b334 100644
--- a/init.c
+++ b/init.c
@@ -1104,18 +1104,18 @@ int ioengine_load(struct thread_data *td)
 		 * for this name and see if they match. If they do, then
 		 * the engine is unchanged.
 		 */
-		dlhandle = td->io_ops_dlhandle;
+		dlhandle = td->io_ops->dlhandle;
 		ops = load_ioengine(td);
 		if (!ops)
 			goto fail;
 
-		if (ops == td->io_ops && dlhandle == td->io_ops_dlhandle) {
+		if (ops == td->io_ops && dlhandle == td->io_ops->dlhandle) {
 			if (dlhandle)
 				dlclose(dlhandle);
 			return 0;
 		}
 
-		if (dlhandle && dlhandle != td->io_ops_dlhandle)
+		if (dlhandle && dlhandle != td->io_ops->dlhandle)
 			dlclose(dlhandle);
 
 		/* Unload the old engine. */
diff --git a/ioengines.c b/ioengines.c
index 5ac512ae..dcc9496d 100644
--- a/ioengines.c
+++ b/ioengines.c
@@ -155,7 +155,7 @@ static struct ioengine_ops *dlopen_ioengine(struct thread_data *td,
 		return NULL;
 	}
 
-	td->io_ops_dlhandle = dlhandle;
+	ops->dlhandle = dlhandle;
 	return ops;
 }
 
@@ -228,9 +228,9 @@ void free_ioengine(struct thread_data *td)
 		td->eo = NULL;
 	}
 
-	if (td->io_ops_dlhandle) {
-		dlclose(td->io_ops_dlhandle);
-		td->io_ops_dlhandle = NULL;
+	if (td->io_ops->dlhandle) {
+		dlclose(td->io_ops->dlhandle);
+		td->io_ops->dlhandle = NULL;
 	}
 
 	td->io_ops = NULL;
diff --git a/ioengines.h b/ioengines.h
index a928b211..839b318d 100644
--- a/ioengines.h
+++ b/ioengines.h
@@ -8,7 +8,7 @@
 #include "io_u.h"
 #include "zbd_types.h"
 
-#define FIO_IOOPS_VERSION	27
+#define FIO_IOOPS_VERSION	28
 
 #ifndef CONFIG_DYNAMIC_ENGINES
 #define FIO_STATIC	static
@@ -30,6 +30,7 @@ struct ioengine_ops {
 	const char *name;
 	int version;
 	int flags;
+	void *dlhandle;
 	int (*setup)(struct thread_data *);
 	int (*init)(struct thread_data *);
 	int (*post_init)(struct thread_data *);




^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/2] fio: fix dlopen refcounting of dynamic engines
  2021-01-25 19:13 [PATCH 0/2] fio: fix premature final dlclose of dynamic engine Eric Sandeen
  2021-01-25 19:18 ` [PATCH 1/2] fio: move dynamic library handle to io_ops structure Eric Sandeen
@ 2021-01-25 19:23 ` Eric Sandeen
  2021-01-25 21:04 ` [PATCH 0/2] fio: fix premature final dlclose of dynamic engine Jens Axboe
  2 siblings, 0 replies; 4+ messages in thread
From: Eric Sandeen @ 2021-01-25 19:23 UTC (permalink / raw)
  To: fio; +Cc: Sitsofe Wheeler, Yigal Korman, Richard W.M. Jones, Jens Axboe

ioengine_load() will dlclose the dynamic library if it matches one
that we've already got open, but this defeats the built-in refcounting
done by dlopen/dlclose.  As each thread exits, it calls free_ioengine(),
and this may do a final dlclose on a dynamic ioengine that is still
in use if we don't have the proper reference count.

Fix this by dropping the explicit dlclose of a "matching" dlopened
dynamic engine library, and let each dlclose decrement the refcount
on the engine library as is normal.

This also adds/modifies a couple of debug messages to help track this.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
---

(maybe we should add FD_DYNAMIC instead of FD_IO for this?)

diff --git a/init.c b/init.c
index ab38b334..d6dbaf7c 100644
--- a/init.c
+++ b/init.c
@@ -1109,11 +1109,8 @@ int ioengine_load(struct thread_data *td)
 		if (!ops)
 			goto fail;
 
-		if (ops == td->io_ops && dlhandle == td->io_ops->dlhandle) {
-			if (dlhandle)
-				dlclose(dlhandle);
+		if (ops == td->io_ops && dlhandle == td->io_ops->dlhandle)
 			return 0;
-		}
 
 		if (dlhandle && dlhandle != td->io_ops->dlhandle)
 			dlclose(dlhandle);
diff --git a/ioengines.c b/ioengines.c
index dcc9496d..f88b0537 100644
--- a/ioengines.c
+++ b/ioengines.c
@@ -95,6 +95,7 @@ static void *dlopen_external(struct thread_data *td, const char *engine)
 
 	sprintf(engine_path, "%s/fio-%s.so", FIO_EXT_ENG_DIR, engine);
 
+	dprint(FD_IO, "dlopen external %s\n", engine_path);
 	dlhandle = dlopen(engine_path, RTLD_LAZY);
 	if (!dlhandle)
 		log_info("Engine %s not found; Either name is invalid, was not built, or fio-engine-%s package is missing.\n",
@@ -116,7 +117,7 @@ static struct ioengine_ops *dlopen_ioengine(struct thread_data *td,
 	    !strncmp(engine_lib, "aio", 3))
 		engine_lib = "libaio";
 
-	dprint(FD_IO, "dload engine %s\n", engine_lib);
+	dprint(FD_IO, "dlopen engine %s\n", engine_lib);
 
 	dlerror();
 	dlhandle = dlopen(engine_lib, RTLD_LAZY);
@@ -194,7 +195,9 @@ struct ioengine_ops *load_ioengine(struct thread_data *td)
 	 * so as not to break job files not using the prefix.
 	 */
 	ops = __load_ioengine(td->o.ioengine);
-	if (!ops)
+
+	/* We do re-dlopen existing handles, for reference counting */
+	if (!ops || ops->dlhandle)
 		ops = dlopen_ioengine(td, name);
 
 	/*
@@ -229,6 +232,7 @@ void free_ioengine(struct thread_data *td)
 	}
 
 	if (td->io_ops->dlhandle) {
+		dprint(FD_IO, "dlclose ioengine %s\n", td->io_ops->name);
 		dlclose(td->io_ops->dlhandle);
 		td->io_ops->dlhandle = NULL;
 	}




^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 0/2] fio: fix premature final dlclose of dynamic engine
  2021-01-25 19:13 [PATCH 0/2] fio: fix premature final dlclose of dynamic engine Eric Sandeen
  2021-01-25 19:18 ` [PATCH 1/2] fio: move dynamic library handle to io_ops structure Eric Sandeen
  2021-01-25 19:23 ` [PATCH 2/2] fio: fix dlopen refcounting of dynamic engines Eric Sandeen
@ 2021-01-25 21:04 ` Jens Axboe
  2 siblings, 0 replies; 4+ messages in thread
From: Jens Axboe @ 2021-01-25 21:04 UTC (permalink / raw)
  To: Eric Sandeen, fio; +Cc: Sitsofe Wheeler, Yigal Korman, Richard W.M. Jones

On 1/25/21 12:13 PM, Eric Sandeen wrote:
> When running a job like this, with external engines enabled:
> 
> [global]
> ioengine=pmemblk
> directory=/mnt/test
> thread=1
> direct=1
> iodepth=1
> 
> [pmemblk-job2]
> filename=pmemblk-smallrw,4096,17
> rw=readwrite
> 
> [pmemblk-job2]
> filename=pmemblk-randrw,4096,64
> rw=randrw
> 
> it segfaults if the first thread to finish is the one that dlopened the engine,
> and the 2nd thread has everything ripped out from under it:
> 
> # fio --debug=io pmemblk.fio
> ...
> io       18361 close ioengine pmemblk
> io       18362 prep: io_u 0x285e1c0: off=0x2108000,len=0x1000,ddir=1,file=/mnt/test/pmemblk-randrw,4096,64
> io       18362 queue: io_u 0x285e1c0: off=0x2108000,len=0x1000,ddir=1,file=/mnt/test/pmemblk-randrw,4096,64
> io       18361 free ioengine pmemblk
> io       18362 complete: io_u 0x285e1c0: off=0x2108000,len=0x1000,ddir=1,file=/mnt/test/pmemblk-randrw,4096,64
> io       18362 fill: io_u 0x285e1c0: off=0x1bcc000,len=0x1000,ddir=0,file=/mnt/test/pmemblk-randrw,4096,64
> io       18361 ioengine pmemblk unregistered
> io       18362 prep: io_u 0x285e1c0: off=0x1bcc000,len=0x1000,ddir=0,file=/mnt/test/pmemblk-randrw,4096,64
> io       18362 queue: io_u 0x285e1c0: off=0x1bcc000,len=0x1000,ddir=0,file=/mnt/test/pmemblk-randrw,4096,64
> io       18362 complete: io_u 0x285e1c0: off=0x1bcc000,len=0x1000,ddir=0,file=/mnt/test/pmemblk-randrw,4096,64
> io       18362 fill: io_u 0x285e1c0: off=0xe91000,len=0x1000,ddir=0,file=/mnt/test/pmemblk-randrw,4096,64
> io       18362 prep: io_u 0x285e1c0: off=0xe91000,len=0x1000,ddir=0,file=/mnt/test/pmemblk-randrw,4096,64
> io       18362 queue: io_u 0x285e1c0: off=0xe91000,len=0x1000,ddir=0,file=/mnt/test/pmemblk-randrw,4096,64
> io       18362 complete: io_u 0x285e1c0: off=0xe91000,len=0x1000,ddir=0,file=/mnt/test/pmemblk-randrw,4096,64
> io       18362 fill: io_u 0x285e1c0: off=0x3413000,len=0x1000,ddir=0,file=/mnt/test/pmemblk-randrw,4096,64
> io       18362 prep: io_u 0x285e1c0: off=0x3413000,len=0x1000,ddir=0,file=/mnt/test/pmemblk-randrw,4096,64
> io       18362 queue: io_u 0x285e1c0: off=0x3413000,len=0x1000,ddir=0,file=/mnt/test/pmemblk-randrw,4096,64
> Segmentation fault (core dumped)
> 
> Fix this by keeping the dlhandle on the io engine itself, not the thread,
> then explicitly dlopen for every thread that asks for the engine, and let
> dlopen/dlclose do refcounting as designed.
> 
> I think this is right, and it fixes it for me, but good review would be wise!
> 
> Also: maybe the 2 patches could be collapsed, though I /think/ this makes review
> a bit easier and doesn't add any regression at patch1 ...

Applied them separately, thanks.

-- 
Jens Axboe



^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-01-25 21:04 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-25 19:13 [PATCH 0/2] fio: fix premature final dlclose of dynamic engine Eric Sandeen
2021-01-25 19:18 ` [PATCH 1/2] fio: move dynamic library handle to io_ops structure Eric Sandeen
2021-01-25 19:23 ` [PATCH 2/2] fio: fix dlopen refcounting of dynamic engines Eric Sandeen
2021-01-25 21:04 ` [PATCH 0/2] fio: fix premature final dlclose of dynamic engine Jens Axboe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.