On Thu, May 07 2020, Sasha Levin wrote: > From: NeilBrown > > [ Upstream commit 7c4310ff56422ea43418305d22bbc5fe19150ec4 ] This one is buggy - it introduces a use-after-free. Best delay it for now. NeilBrown > > The rpciod workqueue is on the write-out path for freeing dirty memory, > so it is important that it never block waiting for memory to be > allocated - this can lead to a deadlock. > > rpc_execute() - which is often called by an rpciod work item - calls > rcp_task_release_client() which can lead to rpc_free_client(). > > rpc_free_client() makes two calls which could potentially block wating > for memory allocation. > > rpc_clnt_debugfs_unregister() calls into debugfs and will block while > any of the debugfs files are being accessed. In particular it can block > while any of the 'open' methods are being called and all of these use > malloc for one thing or another. So this can deadlock if the memory > allocation waits for NFS to complete some writes via rpciod. > > rpc_clnt_remove_pipedir() can take the inode_lock() and while it isn't > obvious that memory allocations can happen while the lock it held, it is > safer to assume they might and to not let rpciod call > rpc_clnt_remove_pipedir(). > > So this patch moves these two calls (together with the final kfree() and > rpciod_down()) into a work-item to be run from the system work-queue. > rpciod can continue its important work, and the final stages of the free > can happen whenever they happen. > > I have seen this deadlock on a 4.12 based kernel where debugfs used > synchronize_srcu() when removing objects. synchronize_srcu() requires a > workqueue and there were no free workther threads and none could be > allocated. While debugsfs no longer uses SRCU, I believe the deadlock > is still possible. > > Signed-off-by: NeilBrown > Signed-off-by: Trond Myklebust > Signed-off-by: Sasha Levin > --- > include/linux/sunrpc/clnt.h | 8 +++++++- > net/sunrpc/clnt.c | 21 +++++++++++++++++---- > 2 files changed, 24 insertions(+), 5 deletions(-) > > diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h > index abc63bd1be2b5..d99d39d45a494 100644 > --- a/include/linux/sunrpc/clnt.h > +++ b/include/linux/sunrpc/clnt.h > @@ -71,7 +71,13 @@ struct rpc_clnt { > #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) > struct dentry *cl_debugfs; /* debugfs directory */ > #endif > - struct rpc_xprt_iter cl_xpi; > + /* cl_work is only needed after cl_xpi is no longer used, > + * and that are of similar size > + */ > + union { > + struct rpc_xprt_iter cl_xpi; > + struct work_struct cl_work; > + }; > const struct cred *cl_cred; > }; > > diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c > index f7f78566be463..a7430b66c7389 100644 > --- a/net/sunrpc/clnt.c > +++ b/net/sunrpc/clnt.c > @@ -877,6 +877,20 @@ EXPORT_SYMBOL_GPL(rpc_shutdown_client); > /* > * Free an RPC client > */ > +static void rpc_free_client_work(struct work_struct *work) > +{ > + struct rpc_clnt *clnt = container_of(work, struct rpc_clnt, cl_work); > + > + /* These might block on processes that might allocate memory, > + * so they cannot be called in rpciod, so they are handled separately > + * here. > + */ > + rpc_clnt_debugfs_unregister(clnt); > + rpc_clnt_remove_pipedir(clnt); > + > + kfree(clnt); > + rpciod_down(); > +} > static struct rpc_clnt * > rpc_free_client(struct rpc_clnt *clnt) > { > @@ -887,17 +901,16 @@ rpc_free_client(struct rpc_clnt *clnt) > rcu_dereference(clnt->cl_xprt)->servername); > if (clnt->cl_parent != clnt) > parent = clnt->cl_parent; > - rpc_clnt_debugfs_unregister(clnt); > - rpc_clnt_remove_pipedir(clnt); > rpc_unregister_client(clnt); > rpc_free_iostats(clnt->cl_metrics); > clnt->cl_metrics = NULL; > xprt_put(rcu_dereference_raw(clnt->cl_xprt)); > xprt_iter_destroy(&clnt->cl_xpi); > - rpciod_down(); > put_cred(clnt->cl_cred); > rpc_free_clid(clnt); > - kfree(clnt); > + > + INIT_WORK(&clnt->cl_work, rpc_free_client_work); > + schedule_work(&clnt->cl_work); > return parent; > } > > -- > 2.20.1