All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] SUNRPC: double free xprt_ctxt while still in use
@ 2023-05-08 23:41 NeilBrown
  2023-05-08 23:42 ` [PATCH 2/2] SUNRPC: always free ctxt when freeing deferred request NeilBrown
  2023-05-09 14:02 ` [PATCH 1/2] SUNRPC: double free xprt_ctxt while still in use Jeff Layton
  0 siblings, 2 replies; 6+ messages in thread
From: NeilBrown @ 2023-05-08 23:41 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton; +Cc: linux-nfs


When an RPC request is deferred, the rq_xprt_ctxt pointer is moved out
of the svc_rqst into the svc_deferred_req.
When the deferred request is revisited, the pointer is copied into
the new svc_rqst - and also remains in the svc_deferred_req.

In the (rare?) case that the request is deferred a second time, the old
svc_deferred_req is reused - it still has all the correct content.
However in that case the rq_xprt_ctxt pointer is NOT cleared so that
when xpo_release_xprt is called, the ctxt is freed (UDP) or possible
added to a free list (RDMA).
When the deferred request is revisited for a second time, it will
reference this ctxt which may be invalid, and the free the object a
second time which is likely to oops.

So change svc_defer() to *always* clear rq_xprt_ctxt, and assert that
the value is now stored in the svc_deferred_req.

Fixes: 773f91b2cf3f ("SUNRPC: Fix NFSD's request deferral on RDMA transports")
Signed-off-by: NeilBrown <neilb@suse.de>
---
 net/sunrpc/svc_xprt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 84e5d7d31481..5fd94f6bdc75 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -1223,13 +1223,14 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
 		dr->daddr = rqstp->rq_daddr;
 		dr->argslen = rqstp->rq_arg.len >> 2;
 		dr->xprt_ctxt = rqstp->rq_xprt_ctxt;
-		rqstp->rq_xprt_ctxt = NULL;
 
 		/* back up head to the start of the buffer and copy */
 		skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
 		memcpy(dr->args, rqstp->rq_arg.head[0].iov_base - skip,
 		       dr->argslen << 2);
 	}
+	WARN_ON_ONCE(rqstp->rq_xprt_ctxt != dr->xprt_ctxt);
+	rqstp->rq_xprt_ctxt = NULL;
 	trace_svc_defer(rqstp);
 	svc_xprt_get(rqstp->rq_xprt);
 	dr->xprt = rqstp->rq_xprt;
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/2] SUNRPC: always free ctxt when freeing deferred request
  2023-05-08 23:41 [PATCH 1/2] SUNRPC: double free xprt_ctxt while still in use NeilBrown
@ 2023-05-08 23:42 ` NeilBrown
  2023-05-09 14:11   ` Jeff Layton
  2023-05-09 14:31   ` Chuck Lever III
  2023-05-09 14:02 ` [PATCH 1/2] SUNRPC: double free xprt_ctxt while still in use Jeff Layton
  1 sibling, 2 replies; 6+ messages in thread
From: NeilBrown @ 2023-05-08 23:42 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton; +Cc: linux-nfs


Since the ->xprt_ctxt pointer was added to svc_deferred_req, it has not
been sufficient to use kfree() to free a deferred request.  We may need
to free the ctxt as well.

As freeing the ctxt is all that ->xpo_release_rqst() does, we repurpose
it to explicit do that even when the ctxt is not stored in an rqst.
So we now have ->xpo_release_ctxt() which is given an xprt and a ctxt,
which may have been taken either from an rqst or from a dreq.  The
caller is now responsible for clearing that pointer after the call to
->xpo_release_ctxt.

We also clear dr->xprt_ctxt when the ctxt is moved into a new rqst when
revisiting a deferred request.  This ensures there is only one pointer
to the ctxt, so the risk of double freeing in future is reduced.  The
new code in svc_xprt_release which releases both the ctxt and any
rq_deferred depends on this.

Fixes: 773f91b2cf3f ("SUNRPC: Fix NFSD's request deferral on RDMA transports")
Signed-off-by: NeilBrown <neilb@suse.de>
---
 include/linux/sunrpc/svc_rdma.h          |  2 +-
 include/linux/sunrpc/svc_xprt.h          |  2 +-
 net/sunrpc/svc_xprt.c                    | 21 ++++++++++++-----
 net/sunrpc/svcsock.c                     | 30 +++++++++++++-----------
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  | 11 ++++-----
 net/sunrpc/xprtrdma/svc_rdma_transport.c |  2 +-
 6 files changed, 39 insertions(+), 29 deletions(-)

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 24aa159d29a7..fbc4bd423b35 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -176,7 +176,7 @@ extern struct svc_rdma_recv_ctxt *
 extern void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
 				   struct svc_rdma_recv_ctxt *ctxt);
 extern void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma);
-extern void svc_rdma_release_rqst(struct svc_rqst *rqstp);
+extern void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *ctxt);
 extern int svc_rdma_recvfrom(struct svc_rqst *);
 
 /* svc_rdma_rw.c */
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 867479204840..6f4473ee68e1 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -23,7 +23,7 @@ struct svc_xprt_ops {
 	int		(*xpo_sendto)(struct svc_rqst *);
 	int		(*xpo_result_payload)(struct svc_rqst *, unsigned int,
 					      unsigned int);
-	void		(*xpo_release_rqst)(struct svc_rqst *);
+	void		(*xpo_release_ctxt)(struct svc_xprt *, void *);
 	void		(*xpo_detach)(struct svc_xprt *);
 	void		(*xpo_free)(struct svc_xprt *);
 	void		(*xpo_kill_temp_xprt)(struct svc_xprt *);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 5fd94f6bdc75..1e3bba433561 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -532,13 +532,21 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
 }
 EXPORT_SYMBOL_GPL(svc_reserve);
 
+static void free_deferred(struct svc_xprt *xprt, struct svc_deferred_req *dr)
+{
+	if (dr)
+		xprt->xpt_ops->xpo_release_ctxt(xprt, dr->xprt_ctxt);
+	kfree(dr);
+}
+
 static void svc_xprt_release(struct svc_rqst *rqstp)
 {
 	struct svc_xprt	*xprt = rqstp->rq_xprt;
 
-	xprt->xpt_ops->xpo_release_rqst(rqstp);
+	xprt->xpt_ops->xpo_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
+	rqstp->rq_xprt_ctxt = NULL;
 
-	kfree(rqstp->rq_deferred);
+	free_deferred(xprt, rqstp->rq_deferred);
 	rqstp->rq_deferred = NULL;
 
 	svc_rqst_release_pages(rqstp);
@@ -1054,7 +1062,7 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
 	spin_unlock_bh(&serv->sv_lock);
 
 	while ((dr = svc_deferred_dequeue(xprt)) != NULL)
-		kfree(dr);
+		free_deferred(xprt, dr);
 
 	call_xpt_users(xprt);
 	svc_xprt_put(xprt);
@@ -1176,8 +1184,8 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
 	if (too_many || test_bit(XPT_DEAD, &xprt->xpt_flags)) {
 		spin_unlock(&xprt->xpt_lock);
 		trace_svc_defer_drop(dr);
+		free_deferred(xprt, dr);
 		svc_xprt_put(xprt);
-		kfree(dr);
 		return;
 	}
 	dr->xprt = NULL;
@@ -1222,14 +1230,13 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
 		dr->addrlen = rqstp->rq_addrlen;
 		dr->daddr = rqstp->rq_daddr;
 		dr->argslen = rqstp->rq_arg.len >> 2;
-		dr->xprt_ctxt = rqstp->rq_xprt_ctxt;
 
 		/* back up head to the start of the buffer and copy */
 		skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
 		memcpy(dr->args, rqstp->rq_arg.head[0].iov_base - skip,
 		       dr->argslen << 2);
 	}
-	WARN_ON_ONCE(rqstp->rq_xprt_ctxt != dr->xprt_ctxt);
+	dr->xprt_ctxt = rqstp->rq_xprt_ctxt;
 	rqstp->rq_xprt_ctxt = NULL;
 	trace_svc_defer(rqstp);
 	svc_xprt_get(rqstp->rq_xprt);
@@ -1263,6 +1270,8 @@ static noinline int svc_deferred_recv(struct svc_rqst *rqstp)
 	rqstp->rq_daddr       = dr->daddr;
 	rqstp->rq_respages    = rqstp->rq_pages;
 	rqstp->rq_xprt_ctxt   = dr->xprt_ctxt;
+
+	dr->xprt_ctxt = NULL;
 	svc_xprt_received(rqstp->rq_xprt);
 	return dr->argslen << 2;
 }
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index a51c9b989d58..aa4f31a770e3 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -121,27 +121,27 @@ static void svc_reclassify_socket(struct socket *sock)
 #endif
 
 /**
- * svc_tcp_release_rqst - Release transport-related resources
- * @rqstp: request structure with resources to be released
+ * svc_tcp_release_ctxt - Release transport-related resources
+ * @xprt: the transport which owned the context
+ * @ctxt: the context from rqstp->rq_xprt_ctxt or dr->xprt_ctxt
  *
  */
-static void svc_tcp_release_rqst(struct svc_rqst *rqstp)
+static void svc_tcp_release_ctxt(struct svc_xprt *xprt, void *ctxt)
 {
 }
 
 /**
- * svc_udp_release_rqst - Release transport-related resources
- * @rqstp: request structure with resources to be released
+ * svc_udp_release_ctxt - Release transport-related resources
+ * @xprt: the transport which owned the context
+ * @ctxt: the context from rqstp->rq_xprt_ctxt or dr->xprt_ctxt
  *
  */
-static void svc_udp_release_rqst(struct svc_rqst *rqstp)
+static void svc_udp_release_ctxt(struct svc_xprt *xprt, void *ctxt)
 {
-	struct sk_buff *skb = rqstp->rq_xprt_ctxt;
+	struct sk_buff *skb = ctxt;
 
-	if (skb) {
-		rqstp->rq_xprt_ctxt = NULL;
+	if (skb)
 		consume_skb(skb);
-	}
 }
 
 union svc_pktinfo_u {
@@ -696,7 +696,8 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
 	unsigned int sent;
 	int err;
 
-	svc_udp_release_rqst(rqstp);
+	svc_udp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
+	rqstp->rq_xprt_ctxt = NULL;
 
 	svc_set_cmsg_data(rqstp, cmh);
 
@@ -768,7 +769,7 @@ static const struct svc_xprt_ops svc_udp_ops = {
 	.xpo_recvfrom = svc_udp_recvfrom,
 	.xpo_sendto = svc_udp_sendto,
 	.xpo_result_payload = svc_sock_result_payload,
-	.xpo_release_rqst = svc_udp_release_rqst,
+	.xpo_release_ctxt = svc_udp_release_ctxt,
 	.xpo_detach = svc_sock_detach,
 	.xpo_free = svc_sock_free,
 	.xpo_has_wspace = svc_udp_has_wspace,
@@ -1298,7 +1299,8 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
 	unsigned int sent;
 	int err;
 
-	svc_tcp_release_rqst(rqstp);
+	svc_tcp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
+	rqstp->rq_xprt_ctxt = NULL;
 
 	atomic_inc(&svsk->sk_sendqlen);
 	mutex_lock(&xprt->xpt_mutex);
@@ -1343,7 +1345,7 @@ static const struct svc_xprt_ops svc_tcp_ops = {
 	.xpo_recvfrom = svc_tcp_recvfrom,
 	.xpo_sendto = svc_tcp_sendto,
 	.xpo_result_payload = svc_sock_result_payload,
-	.xpo_release_rqst = svc_tcp_release_rqst,
+	.xpo_release_ctxt = svc_tcp_release_ctxt,
 	.xpo_detach = svc_tcp_sock_detach,
 	.xpo_free = svc_sock_free,
 	.xpo_has_wspace = svc_tcp_has_wspace,
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 1c658fa43063..5c51e28b3111 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -239,21 +239,20 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
 }
 
 /**
- * svc_rdma_release_rqst - Release transport-specific per-rqst resources
- * @rqstp: svc_rqst being released
+ * svc_rdma_release_ctxt - Release transport-specific per-rqst resources
+ * @xprt: the transport which owned the context
+ * @ctxt: the context from rqstp->rq_xprt_ctxt or dr->xprt_ctxt
  *
  * Ensure that the recv_ctxt is released whether or not a Reply
  * was sent. For example, the client could close the connection,
  * or svc_process could drop an RPC, before the Reply is sent.
  */
-void svc_rdma_release_rqst(struct svc_rqst *rqstp)
+void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *vctxt)
 {
-	struct svc_rdma_recv_ctxt *ctxt = rqstp->rq_xprt_ctxt;
-	struct svc_xprt *xprt = rqstp->rq_xprt;
+	struct svc_rdma_recv_ctxt *ctxt = vctxt;
 	struct svcxprt_rdma *rdma =
 		container_of(xprt, struct svcxprt_rdma, sc_xprt);
 
-	rqstp->rq_xprt_ctxt = NULL;
 	if (ctxt)
 		svc_rdma_recv_ctxt_put(rdma, ctxt);
 }
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 416b298f74dd..ca04f7a6a085 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -80,7 +80,7 @@ static const struct svc_xprt_ops svc_rdma_ops = {
 	.xpo_recvfrom = svc_rdma_recvfrom,
 	.xpo_sendto = svc_rdma_sendto,
 	.xpo_result_payload = svc_rdma_result_payload,
-	.xpo_release_rqst = svc_rdma_release_rqst,
+	.xpo_release_ctxt = svc_rdma_release_ctxt,
 	.xpo_detach = svc_rdma_detach,
 	.xpo_free = svc_rdma_free,
 	.xpo_has_wspace = svc_rdma_has_wspace,
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] SUNRPC: double free xprt_ctxt while still in use
  2023-05-08 23:41 [PATCH 1/2] SUNRPC: double free xprt_ctxt while still in use NeilBrown
  2023-05-08 23:42 ` [PATCH 2/2] SUNRPC: always free ctxt when freeing deferred request NeilBrown
@ 2023-05-09 14:02 ` Jeff Layton
  1 sibling, 0 replies; 6+ messages in thread
From: Jeff Layton @ 2023-05-09 14:02 UTC (permalink / raw)
  To: NeilBrown, Chuck Lever, Jeff Layton; +Cc: linux-nfs

On Tue, 2023-05-09 at 09:41 +1000, NeilBrown wrote:
> When an RPC request is deferred, the rq_xprt_ctxt pointer is moved out
> of the svc_rqst into the svc_deferred_req.
> When the deferred request is revisited, the pointer is copied into
> the new svc_rqst - and also remains in the svc_deferred_req.
> 
> In the (rare?) case that the request is deferred a second time, the old
> svc_deferred_req is reused - it still has all the correct content.
> However in that case the rq_xprt_ctxt pointer is NOT cleared so that
> when xpo_release_xprt is called, the ctxt is freed (UDP) or possible
> added to a free list (RDMA).
> When the deferred request is revisited for a second time, it will
> reference this ctxt which may be invalid, and the free the object a
> second time which is likely to oops.
> 

I've always found the deferral code to be really hard to follow. The
dearth of comments around the design doesn't help either...

To be clear, when we call into svc_defer, if rq_deferred is already
set, then that means that we're revisiting a request that has already
been deferred at least once?
                     
> So change svc_defer() to *always* clear rq_xprt_ctxt, and assert that
> the value is now stored in the svc_deferred_req.
> 
> Fixes: 773f91b2cf3f ("SUNRPC: Fix NFSD's request deferral on RDMA transports")
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  net/sunrpc/svc_xprt.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index 84e5d7d31481..5fd94f6bdc75 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -1223,13 +1223,14 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
>  		dr->daddr = rqstp->rq_daddr;
>  		dr->argslen = rqstp->rq_arg.len >> 2;
>  		dr->xprt_ctxt = rqstp->rq_xprt_ctxt;
> -		rqstp->rq_xprt_ctxt = NULL;
>  
>  		/* back up head to the start of the buffer and copy */
>  		skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
>  		memcpy(dr->args, rqstp->rq_arg.head[0].iov_base - skip,
>  		       dr->argslen << 2);
>  	}
> +	WARN_ON_ONCE(rqstp->rq_xprt_ctxt != dr->xprt_ctxt);
> +	rqstp->rq_xprt_ctxt = NULL;
>  	trace_svc_defer(rqstp);
>  	svc_xprt_get(rqstp->rq_xprt);
>  	dr->xprt = rqstp->rq_xprt;

I think this looks right, assuming my understanding of what
rq_deferred == NULL indicates.

Reviewed-by: Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/2] SUNRPC: always free ctxt when freeing deferred request
  2023-05-08 23:42 ` [PATCH 2/2] SUNRPC: always free ctxt when freeing deferred request NeilBrown
@ 2023-05-09 14:11   ` Jeff Layton
  2023-05-09 20:49     ` NeilBrown
  2023-05-09 14:31   ` Chuck Lever III
  1 sibling, 1 reply; 6+ messages in thread
From: Jeff Layton @ 2023-05-09 14:11 UTC (permalink / raw)
  To: NeilBrown, Chuck Lever; +Cc: linux-nfs

On Tue, 2023-05-09 at 09:42 +1000, NeilBrown wrote:
> Since the ->xprt_ctxt pointer was added to svc_deferred_req, it has not
> been sufficient to use kfree() to free a deferred request.  We may need
> to free the ctxt as well.
> 
> As freeing the ctxt is all that ->xpo_release_rqst() does, we repurpose
> it to explicit do that even when the ctxt is not stored in an rqst.
> So we now have ->xpo_release_ctxt() which is given an xprt and a ctxt,
> which may have been taken either from an rqst or from a dreq.  The
> caller is now responsible for clearing that pointer after the call to
> ->xpo_release_ctxt.
> 
> We also clear dr->xprt_ctxt when the ctxt is moved into a new rqst when
> revisiting a deferred request.  This ensures there is only one pointer
> to the ctxt, so the risk of double freeing in future is reduced.  The
> new code in svc_xprt_release which releases both the ctxt and any
> rq_deferred depends on this.
> 

Thank you. Leaving stray pointers around like that is just asking for
trouble.

> Fixes: 773f91b2cf3f ("SUNRPC: Fix NFSD's request deferral on RDMA transports")
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  include/linux/sunrpc/svc_rdma.h          |  2 +-
>  include/linux/sunrpc/svc_xprt.h          |  2 +-
>  net/sunrpc/svc_xprt.c                    | 21 ++++++++++++-----
>  net/sunrpc/svcsock.c                     | 30 +++++++++++++-----------
>  net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  | 11 ++++-----
>  net/sunrpc/xprtrdma/svc_rdma_transport.c |  2 +-
>  6 files changed, 39 insertions(+), 29 deletions(-)
> 
> diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
> index 24aa159d29a7..fbc4bd423b35 100644
> --- a/include/linux/sunrpc/svc_rdma.h
> +++ b/include/linux/sunrpc/svc_rdma.h
> @@ -176,7 +176,7 @@ extern struct svc_rdma_recv_ctxt *
>  extern void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
>  				   struct svc_rdma_recv_ctxt *ctxt);
>  extern void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma);
> -extern void svc_rdma_release_rqst(struct svc_rqst *rqstp);
> +extern void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *ctxt);
>  extern int svc_rdma_recvfrom(struct svc_rqst *);
>  
>  /* svc_rdma_rw.c */
> diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
> index 867479204840..6f4473ee68e1 100644
> --- a/include/linux/sunrpc/svc_xprt.h
> +++ b/include/linux/sunrpc/svc_xprt.h
> @@ -23,7 +23,7 @@ struct svc_xprt_ops {
>  	int		(*xpo_sendto)(struct svc_rqst *);
>  	int		(*xpo_result_payload)(struct svc_rqst *, unsigned int,
>  					      unsigned int);
> -	void		(*xpo_release_rqst)(struct svc_rqst *);
> +	void		(*xpo_release_ctxt)(struct svc_xprt *, void *);
>  	void		(*xpo_detach)(struct svc_xprt *);
>  	void		(*xpo_free)(struct svc_xprt *);
>  	void		(*xpo_kill_temp_xprt)(struct svc_xprt *);
> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index 5fd94f6bdc75..1e3bba433561 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -532,13 +532,21 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
>  }
>  EXPORT_SYMBOL_GPL(svc_reserve);
>  
> +static void free_deferred(struct svc_xprt *xprt, struct svc_deferred_req *dr)
> +{
> +	if (dr)
> +		xprt->xpt_ops->xpo_release_ctxt(xprt, dr->xprt_ctxt);
> +	kfree(dr);

nit: might as well put the kfree inside the if block to avoid it in the
common case of dr == NULL.

> +}
> +
>  static void svc_xprt_release(struct svc_rqst *rqstp)
>  {
>  	struct svc_xprt	*xprt = rqstp->rq_xprt;
>  
> -	xprt->xpt_ops->xpo_release_rqst(rqstp);
> +	xprt->xpt_ops->xpo_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
> +	rqstp->rq_xprt_ctxt = NULL;
>  
> -	kfree(rqstp->rq_deferred);
> +	free_deferred(xprt, rqstp->rq_deferred);
>  	rqstp->rq_deferred = NULL;
>  
>  	svc_rqst_release_pages(rqstp);
> @@ -1054,7 +1062,7 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
>  	spin_unlock_bh(&serv->sv_lock);
>  
>  	while ((dr = svc_deferred_dequeue(xprt)) != NULL)
> -		kfree(dr);
> +		free_deferred(xprt, dr);
>  
>  	call_xpt_users(xprt);
>  	svc_xprt_put(xprt);
> @@ -1176,8 +1184,8 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
>  	if (too_many || test_bit(XPT_DEAD, &xprt->xpt_flags)) {
>  		spin_unlock(&xprt->xpt_lock);
>  		trace_svc_defer_drop(dr);
> +		free_deferred(xprt, dr);
>  		svc_xprt_put(xprt);
> -		kfree(dr);
>  		return;
>  	}
>  	dr->xprt = NULL;
> @@ -1222,14 +1230,13 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
>  		dr->addrlen = rqstp->rq_addrlen;
>  		dr->daddr = rqstp->rq_daddr;
>  		dr->argslen = rqstp->rq_arg.len >> 2;
> -		dr->xprt_ctxt = rqstp->rq_xprt_ctxt;
>  
>  		/* back up head to the start of the buffer and copy */
>  		skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
>  		memcpy(dr->args, rqstp->rq_arg.head[0].iov_base - skip,
>  		       dr->argslen << 2);
>  	}
> -	WARN_ON_ONCE(rqstp->rq_xprt_ctxt != dr->xprt_ctxt);
> +	dr->xprt_ctxt = rqstp->rq_xprt_ctxt;
>  	rqstp->rq_xprt_ctxt = NULL;
>  	trace_svc_defer(rqstp);
>  	svc_xprt_get(rqstp->rq_xprt);
> @@ -1263,6 +1270,8 @@ static noinline int svc_deferred_recv(struct svc_rqst *rqstp)
>  	rqstp->rq_daddr       = dr->daddr;
>  	rqstp->rq_respages    = rqstp->rq_pages;
>  	rqstp->rq_xprt_ctxt   = dr->xprt_ctxt;
> +
> +	dr->xprt_ctxt = NULL;
>  	svc_xprt_received(rqstp->rq_xprt);
>  	return dr->argslen << 2;
>  }
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index a51c9b989d58..aa4f31a770e3 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -121,27 +121,27 @@ static void svc_reclassify_socket(struct socket *sock)
>  #endif
>  
>  /**
> - * svc_tcp_release_rqst - Release transport-related resources
> - * @rqstp: request structure with resources to be released
> + * svc_tcp_release_ctxt - Release transport-related resources
> + * @xprt: the transport which owned the context
> + * @ctxt: the context from rqstp->rq_xprt_ctxt or dr->xprt_ctxt
>   *
>   */
> -static void svc_tcp_release_rqst(struct svc_rqst *rqstp)
> +static void svc_tcp_release_ctxt(struct svc_xprt *xprt, void *ctxt)
>  {
>  }
>  
>  /**
> - * svc_udp_release_rqst - Release transport-related resources
> - * @rqstp: request structure with resources to be released
> + * svc_udp_release_ctxt - Release transport-related resources
> + * @xprt: the transport which owned the context
> + * @ctxt: the context from rqstp->rq_xprt_ctxt or dr->xprt_ctxt
>   *
>   */
> -static void svc_udp_release_rqst(struct svc_rqst *rqstp)
> +static void svc_udp_release_ctxt(struct svc_xprt *xprt, void *ctxt)
>  {
> -	struct sk_buff *skb = rqstp->rq_xprt_ctxt;
> +	struct sk_buff *skb = ctxt;
>  
> -	if (skb) {
> -		rqstp->rq_xprt_ctxt = NULL;
> +	if (skb)
>  		consume_skb(skb);
> -	}
>  }
>  
>  union svc_pktinfo_u {
> @@ -696,7 +696,8 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
>  	unsigned int sent;
>  	int err;
>  
> -	svc_udp_release_rqst(rqstp);
> +	svc_udp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
> +	rqstp->rq_xprt_ctxt = NULL;
>  
>  	svc_set_cmsg_data(rqstp, cmh);
>  
> @@ -768,7 +769,7 @@ static const struct svc_xprt_ops svc_udp_ops = {
>  	.xpo_recvfrom = svc_udp_recvfrom,
>  	.xpo_sendto = svc_udp_sendto,
>  	.xpo_result_payload = svc_sock_result_payload,
> -	.xpo_release_rqst = svc_udp_release_rqst,
> +	.xpo_release_ctxt = svc_udp_release_ctxt,
>  	.xpo_detach = svc_sock_detach,
>  	.xpo_free = svc_sock_free,
>  	.xpo_has_wspace = svc_udp_has_wspace,
> @@ -1298,7 +1299,8 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
>  	unsigned int sent;
>  	int err;
>  
> -	svc_tcp_release_rqst(rqstp);
> +	svc_tcp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
> +	rqstp->rq_xprt_ctxt = NULL;
>  
>  	atomic_inc(&svsk->sk_sendqlen);
>  	mutex_lock(&xprt->xpt_mutex);
> @@ -1343,7 +1345,7 @@ static const struct svc_xprt_ops svc_tcp_ops = {
>  	.xpo_recvfrom = svc_tcp_recvfrom,
>  	.xpo_sendto = svc_tcp_sendto,
>  	.xpo_result_payload = svc_sock_result_payload,
> -	.xpo_release_rqst = svc_tcp_release_rqst,
> +	.xpo_release_ctxt = svc_tcp_release_ctxt,
>  	.xpo_detach = svc_tcp_sock_detach,
>  	.xpo_free = svc_sock_free,
>  	.xpo_has_wspace = svc_tcp_has_wspace,
> diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
> index 1c658fa43063..5c51e28b3111 100644
> --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
> +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
> @@ -239,21 +239,20 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
>  }
>  
>  /**
> - * svc_rdma_release_rqst - Release transport-specific per-rqst resources
> - * @rqstp: svc_rqst being released
> + * svc_rdma_release_ctxt - Release transport-specific per-rqst resources
> + * @xprt: the transport which owned the context
> + * @ctxt: the context from rqstp->rq_xprt_ctxt or dr->xprt_ctxt
>   *
>   * Ensure that the recv_ctxt is released whether or not a Reply
>   * was sent. For example, the client could close the connection,
>   * or svc_process could drop an RPC, before the Reply is sent.
>   */
> -void svc_rdma_release_rqst(struct svc_rqst *rqstp)
> +void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *vctxt)
>  {
> -	struct svc_rdma_recv_ctxt *ctxt = rqstp->rq_xprt_ctxt;
> -	struct svc_xprt *xprt = rqstp->rq_xprt;
> +	struct svc_rdma_recv_ctxt *ctxt = vctxt;
>  	struct svcxprt_rdma *rdma =
>  		container_of(xprt, struct svcxprt_rdma, sc_xprt);
>  
> -	rqstp->rq_xprt_ctxt = NULL;
>  	if (ctxt)
>  		svc_rdma_recv_ctxt_put(rdma, ctxt);
>  }
> diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
> index 416b298f74dd..ca04f7a6a085 100644
> --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
> +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
> @@ -80,7 +80,7 @@ static const struct svc_xprt_ops svc_rdma_ops = {
>  	.xpo_recvfrom = svc_rdma_recvfrom,
>  	.xpo_sendto = svc_rdma_sendto,
>  	.xpo_result_payload = svc_rdma_result_payload,
> -	.xpo_release_rqst = svc_rdma_release_rqst,
> +	.xpo_release_ctxt = svc_rdma_release_ctxt,
>  	.xpo_detach = svc_rdma_detach,
>  	.xpo_free = svc_rdma_free,
>  	.xpo_has_wspace = svc_rdma_has_wspace,

Reviewed-by: Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/2] SUNRPC: always free ctxt when freeing deferred request
  2023-05-08 23:42 ` [PATCH 2/2] SUNRPC: always free ctxt when freeing deferred request NeilBrown
  2023-05-09 14:11   ` Jeff Layton
@ 2023-05-09 14:31   ` Chuck Lever III
  1 sibling, 0 replies; 6+ messages in thread
From: Chuck Lever III @ 2023-05-09 14:31 UTC (permalink / raw)
  To: Neil Brown; +Cc: Jeff Layton, Linux NFS Mailing List



> On May 8, 2023, at 4:42 PM, NeilBrown <neilb@suse.de> wrote:
> 
> 
> Since the ->xprt_ctxt pointer was added to svc_deferred_req, it has not
> been sufficient to use kfree() to free a deferred request.  We may need
> to free the ctxt as well.
> 
> As freeing the ctxt is all that ->xpo_release_rqst() does, we repurpose
> it to explicit do that even when the ctxt is not stored in an rqst.
> So we now have ->xpo_release_ctxt() which is given an xprt and a ctxt,
> which may have been taken either from an rqst or from a dreq.  The
> caller is now responsible for clearing that pointer after the call to
> ->xpo_release_ctxt.
> 
> We also clear dr->xprt_ctxt when the ctxt is moved into a new rqst when
> revisiting a deferred request.  This ensures there is only one pointer
> to the ctxt, so the risk of double freeing in future is reduced.  The
> new code in svc_xprt_release which releases both the ctxt and any
> rq_deferred depends on this.
> 
> Fixes: 773f91b2cf3f ("SUNRPC: Fix NFSD's request deferral on RDMA transports")
> Signed-off-by: NeilBrown <neilb@suse.de>

I haven't yet looked closely at these, but they look clean
and sensible at first glance. I'll apply them to nfsd-fixes
with Jeff's suggestion and Reviewed-by.


> ---
> include/linux/sunrpc/svc_rdma.h          |  2 +-
> include/linux/sunrpc/svc_xprt.h          |  2 +-
> net/sunrpc/svc_xprt.c                    | 21 ++++++++++++-----
> net/sunrpc/svcsock.c                     | 30 +++++++++++++-----------
> net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  | 11 ++++-----
> net/sunrpc/xprtrdma/svc_rdma_transport.c |  2 +-
> 6 files changed, 39 insertions(+), 29 deletions(-)
> 
> diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
> index 24aa159d29a7..fbc4bd423b35 100644
> --- a/include/linux/sunrpc/svc_rdma.h
> +++ b/include/linux/sunrpc/svc_rdma.h
> @@ -176,7 +176,7 @@ extern struct svc_rdma_recv_ctxt *
> extern void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
>   struct svc_rdma_recv_ctxt *ctxt);
> extern void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma);
> -extern void svc_rdma_release_rqst(struct svc_rqst *rqstp);
> +extern void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *ctxt);
> extern int svc_rdma_recvfrom(struct svc_rqst *);
> 
> /* svc_rdma_rw.c */
> diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
> index 867479204840..6f4473ee68e1 100644
> --- a/include/linux/sunrpc/svc_xprt.h
> +++ b/include/linux/sunrpc/svc_xprt.h
> @@ -23,7 +23,7 @@ struct svc_xprt_ops {
> int (*xpo_sendto)(struct svc_rqst *);
> int (*xpo_result_payload)(struct svc_rqst *, unsigned int,
>      unsigned int);
> - void (*xpo_release_rqst)(struct svc_rqst *);
> + void (*xpo_release_ctxt)(struct svc_xprt *, void *);
> void (*xpo_detach)(struct svc_xprt *);
> void (*xpo_free)(struct svc_xprt *);
> void (*xpo_kill_temp_xprt)(struct svc_xprt *);
> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index 5fd94f6bdc75..1e3bba433561 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -532,13 +532,21 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
> }
> EXPORT_SYMBOL_GPL(svc_reserve);
> 
> +static void free_deferred(struct svc_xprt *xprt, struct svc_deferred_req *dr)
> +{
> + if (dr)
> + xprt->xpt_ops->xpo_release_ctxt(xprt, dr->xprt_ctxt);
> + kfree(dr);
> +}
> +
> static void svc_xprt_release(struct svc_rqst *rqstp)
> {
> struct svc_xprt *xprt = rqstp->rq_xprt;
> 
> - xprt->xpt_ops->xpo_release_rqst(rqstp);
> + xprt->xpt_ops->xpo_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
> + rqstp->rq_xprt_ctxt = NULL;
> 
> - kfree(rqstp->rq_deferred);
> + free_deferred(xprt, rqstp->rq_deferred);
> rqstp->rq_deferred = NULL;
> 
> svc_rqst_release_pages(rqstp);
> @@ -1054,7 +1062,7 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
> spin_unlock_bh(&serv->sv_lock);
> 
> while ((dr = svc_deferred_dequeue(xprt)) != NULL)
> - kfree(dr);
> + free_deferred(xprt, dr);
> 
> call_xpt_users(xprt);
> svc_xprt_put(xprt);
> @@ -1176,8 +1184,8 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
> if (too_many || test_bit(XPT_DEAD, &xprt->xpt_flags)) {
> spin_unlock(&xprt->xpt_lock);
> trace_svc_defer_drop(dr);
> + free_deferred(xprt, dr);
> svc_xprt_put(xprt);
> - kfree(dr);
> return;
> }
> dr->xprt = NULL;
> @@ -1222,14 +1230,13 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
> dr->addrlen = rqstp->rq_addrlen;
> dr->daddr = rqstp->rq_daddr;
> dr->argslen = rqstp->rq_arg.len >> 2;
> - dr->xprt_ctxt = rqstp->rq_xprt_ctxt;
> 
> /* back up head to the start of the buffer and copy */
> skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
> memcpy(dr->args, rqstp->rq_arg.head[0].iov_base - skip,
>       dr->argslen << 2);
> }
> - WARN_ON_ONCE(rqstp->rq_xprt_ctxt != dr->xprt_ctxt);
> + dr->xprt_ctxt = rqstp->rq_xprt_ctxt;
> rqstp->rq_xprt_ctxt = NULL;
> trace_svc_defer(rqstp);
> svc_xprt_get(rqstp->rq_xprt);
> @@ -1263,6 +1270,8 @@ static noinline int svc_deferred_recv(struct svc_rqst *rqstp)
> rqstp->rq_daddr       = dr->daddr;
> rqstp->rq_respages    = rqstp->rq_pages;
> rqstp->rq_xprt_ctxt   = dr->xprt_ctxt;
> +
> + dr->xprt_ctxt = NULL;
> svc_xprt_received(rqstp->rq_xprt);
> return dr->argslen << 2;
> }
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index a51c9b989d58..aa4f31a770e3 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -121,27 +121,27 @@ static void svc_reclassify_socket(struct socket *sock)
> #endif
> 
> /**
> - * svc_tcp_release_rqst - Release transport-related resources
> - * @rqstp: request structure with resources to be released
> + * svc_tcp_release_ctxt - Release transport-related resources
> + * @xprt: the transport which owned the context
> + * @ctxt: the context from rqstp->rq_xprt_ctxt or dr->xprt_ctxt
>  *
>  */
> -static void svc_tcp_release_rqst(struct svc_rqst *rqstp)
> +static void svc_tcp_release_ctxt(struct svc_xprt *xprt, void *ctxt)
> {
> }
> 
> /**
> - * svc_udp_release_rqst - Release transport-related resources
> - * @rqstp: request structure with resources to be released
> + * svc_udp_release_ctxt - Release transport-related resources
> + * @xprt: the transport which owned the context
> + * @ctxt: the context from rqstp->rq_xprt_ctxt or dr->xprt_ctxt
>  *
>  */
> -static void svc_udp_release_rqst(struct svc_rqst *rqstp)
> +static void svc_udp_release_ctxt(struct svc_xprt *xprt, void *ctxt)
> {
> - struct sk_buff *skb = rqstp->rq_xprt_ctxt;
> + struct sk_buff *skb = ctxt;
> 
> - if (skb) {
> - rqstp->rq_xprt_ctxt = NULL;
> + if (skb)
> consume_skb(skb);
> - }
> }
> 
> union svc_pktinfo_u {
> @@ -696,7 +696,8 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
> unsigned int sent;
> int err;
> 
> - svc_udp_release_rqst(rqstp);
> + svc_udp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
> + rqstp->rq_xprt_ctxt = NULL;
> 
> svc_set_cmsg_data(rqstp, cmh);
> 
> @@ -768,7 +769,7 @@ static const struct svc_xprt_ops svc_udp_ops = {
> .xpo_recvfrom = svc_udp_recvfrom,
> .xpo_sendto = svc_udp_sendto,
> .xpo_result_payload = svc_sock_result_payload,
> - .xpo_release_rqst = svc_udp_release_rqst,
> + .xpo_release_ctxt = svc_udp_release_ctxt,
> .xpo_detach = svc_sock_detach,
> .xpo_free = svc_sock_free,
> .xpo_has_wspace = svc_udp_has_wspace,
> @@ -1298,7 +1299,8 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
> unsigned int sent;
> int err;
> 
> - svc_tcp_release_rqst(rqstp);
> + svc_tcp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
> + rqstp->rq_xprt_ctxt = NULL;
> 
> atomic_inc(&svsk->sk_sendqlen);
> mutex_lock(&xprt->xpt_mutex);
> @@ -1343,7 +1345,7 @@ static const struct svc_xprt_ops svc_tcp_ops = {
> .xpo_recvfrom = svc_tcp_recvfrom,
> .xpo_sendto = svc_tcp_sendto,
> .xpo_result_payload = svc_sock_result_payload,
> - .xpo_release_rqst = svc_tcp_release_rqst,
> + .xpo_release_ctxt = svc_tcp_release_ctxt,
> .xpo_detach = svc_tcp_sock_detach,
> .xpo_free = svc_sock_free,
> .xpo_has_wspace = svc_tcp_has_wspace,
> diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
> index 1c658fa43063..5c51e28b3111 100644
> --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
> +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
> @@ -239,21 +239,20 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
> }
> 
> /**
> - * svc_rdma_release_rqst - Release transport-specific per-rqst resources
> - * @rqstp: svc_rqst being released
> + * svc_rdma_release_ctxt - Release transport-specific per-rqst resources
> + * @xprt: the transport which owned the context
> + * @ctxt: the context from rqstp->rq_xprt_ctxt or dr->xprt_ctxt
>  *
>  * Ensure that the recv_ctxt is released whether or not a Reply
>  * was sent. For example, the client could close the connection,
>  * or svc_process could drop an RPC, before the Reply is sent.
>  */
> -void svc_rdma_release_rqst(struct svc_rqst *rqstp)
> +void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *vctxt)
> {
> - struct svc_rdma_recv_ctxt *ctxt = rqstp->rq_xprt_ctxt;
> - struct svc_xprt *xprt = rqstp->rq_xprt;
> + struct svc_rdma_recv_ctxt *ctxt = vctxt;
> struct svcxprt_rdma *rdma =
> container_of(xprt, struct svcxprt_rdma, sc_xprt);
> 
> - rqstp->rq_xprt_ctxt = NULL;
> if (ctxt)
> svc_rdma_recv_ctxt_put(rdma, ctxt);
> }
> diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
> index 416b298f74dd..ca04f7a6a085 100644
> --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
> +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
> @@ -80,7 +80,7 @@ static const struct svc_xprt_ops svc_rdma_ops = {
> .xpo_recvfrom = svc_rdma_recvfrom,
> .xpo_sendto = svc_rdma_sendto,
> .xpo_result_payload = svc_rdma_result_payload,
> - .xpo_release_rqst = svc_rdma_release_rqst,
> + .xpo_release_ctxt = svc_rdma_release_ctxt,
> .xpo_detach = svc_rdma_detach,
> .xpo_free = svc_rdma_free,
> .xpo_has_wspace = svc_rdma_has_wspace,
> -- 
> 2.40.1
> 

--
Chuck Lever



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/2] SUNRPC: always free ctxt when freeing deferred request
  2023-05-09 14:11   ` Jeff Layton
@ 2023-05-09 20:49     ` NeilBrown
  0 siblings, 0 replies; 6+ messages in thread
From: NeilBrown @ 2023-05-09 20:49 UTC (permalink / raw)
  To: Jeff Layton; +Cc: Chuck Lever, linux-nfs

On Wed, 10 May 2023, Jeff Layton wrote:
> On Tue, 2023-05-09 at 09:42 +1000, NeilBrown wrote:
> > Since the ->xprt_ctxt pointer was added to svc_deferred_req, it has not
> > been sufficient to use kfree() to free a deferred request.  We may need
> > to free the ctxt as well.
> > 
> > As freeing the ctxt is all that ->xpo_release_rqst() does, we repurpose
> > it to explicit do that even when the ctxt is not stored in an rqst.
> > So we now have ->xpo_release_ctxt() which is given an xprt and a ctxt,
> > which may have been taken either from an rqst or from a dreq.  The
> > caller is now responsible for clearing that pointer after the call to
> > ->xpo_release_ctxt.
> > 
> > We also clear dr->xprt_ctxt when the ctxt is moved into a new rqst when
> > revisiting a deferred request.  This ensures there is only one pointer
> > to the ctxt, so the risk of double freeing in future is reduced.  The
> > new code in svc_xprt_release which releases both the ctxt and any
> > rq_deferred depends on this.
> > 
> 
> Thank you. Leaving stray pointers around like that is just asking for
> trouble.
> 
> > Fixes: 773f91b2cf3f ("SUNRPC: Fix NFSD's request deferral on RDMA transports")
> > Signed-off-by: NeilBrown <neilb@suse.de>
> > ---
> >  include/linux/sunrpc/svc_rdma.h          |  2 +-
> >  include/linux/sunrpc/svc_xprt.h          |  2 +-
> >  net/sunrpc/svc_xprt.c                    | 21 ++++++++++++-----
> >  net/sunrpc/svcsock.c                     | 30 +++++++++++++-----------
> >  net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  | 11 ++++-----
> >  net/sunrpc/xprtrdma/svc_rdma_transport.c |  2 +-
> >  6 files changed, 39 insertions(+), 29 deletions(-)
> > 
> > diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
> > index 24aa159d29a7..fbc4bd423b35 100644
> > --- a/include/linux/sunrpc/svc_rdma.h
> > +++ b/include/linux/sunrpc/svc_rdma.h
> > @@ -176,7 +176,7 @@ extern struct svc_rdma_recv_ctxt *
> >  extern void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
> >  				   struct svc_rdma_recv_ctxt *ctxt);
> >  extern void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma);
> > -extern void svc_rdma_release_rqst(struct svc_rqst *rqstp);
> > +extern void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *ctxt);
> >  extern int svc_rdma_recvfrom(struct svc_rqst *);
> >  
> >  /* svc_rdma_rw.c */
> > diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
> > index 867479204840..6f4473ee68e1 100644
> > --- a/include/linux/sunrpc/svc_xprt.h
> > +++ b/include/linux/sunrpc/svc_xprt.h
> > @@ -23,7 +23,7 @@ struct svc_xprt_ops {
> >  	int		(*xpo_sendto)(struct svc_rqst *);
> >  	int		(*xpo_result_payload)(struct svc_rqst *, unsigned int,
> >  					      unsigned int);
> > -	void		(*xpo_release_rqst)(struct svc_rqst *);
> > +	void		(*xpo_release_ctxt)(struct svc_xprt *, void *);
> >  	void		(*xpo_detach)(struct svc_xprt *);
> >  	void		(*xpo_free)(struct svc_xprt *);
> >  	void		(*xpo_kill_temp_xprt)(struct svc_xprt *);
> > diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> > index 5fd94f6bdc75..1e3bba433561 100644
> > --- a/net/sunrpc/svc_xprt.c
> > +++ b/net/sunrpc/svc_xprt.c
> > @@ -532,13 +532,21 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
> >  }
> >  EXPORT_SYMBOL_GPL(svc_reserve);
> >  
> > +static void free_deferred(struct svc_xprt *xprt, struct svc_deferred_req *dr)
> > +{
> > +	if (dr)
> > +		xprt->xpt_ops->xpo_release_ctxt(xprt, dr->xprt_ctxt);
> > +	kfree(dr);
> 
> nit: might as well put the kfree inside the if block to avoid it in the
> common case of dr == NULL.

I did that at first.  Then I took it out of the if block - more vague
aesthetics than anything else.  I'm perfectly happy for it to go back in.

> 
> > +}
> > +
> >  static void svc_xprt_release(struct svc_rqst *rqstp)
> >  {
> >  	struct svc_xprt	*xprt = rqstp->rq_xprt;
> >  
> > -	xprt->xpt_ops->xpo_release_rqst(rqstp);
> > +	xprt->xpt_ops->xpo_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
> > +	rqstp->rq_xprt_ctxt = NULL;
> >  
> > -	kfree(rqstp->rq_deferred);
> > +	free_deferred(xprt, rqstp->rq_deferred);
> >  	rqstp->rq_deferred = NULL;
> >  
> >  	svc_rqst_release_pages(rqstp);
> > @@ -1054,7 +1062,7 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
> >  	spin_unlock_bh(&serv->sv_lock);
> >  
> >  	while ((dr = svc_deferred_dequeue(xprt)) != NULL)
> > -		kfree(dr);
> > +		free_deferred(xprt, dr);
> >  
> >  	call_xpt_users(xprt);
> >  	svc_xprt_put(xprt);
> > @@ -1176,8 +1184,8 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
> >  	if (too_many || test_bit(XPT_DEAD, &xprt->xpt_flags)) {
> >  		spin_unlock(&xprt->xpt_lock);
> >  		trace_svc_defer_drop(dr);
> > +		free_deferred(xprt, dr);
> >  		svc_xprt_put(xprt);
> > -		kfree(dr);
> >  		return;
> >  	}
> >  	dr->xprt = NULL;
> > @@ -1222,14 +1230,13 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
> >  		dr->addrlen = rqstp->rq_addrlen;
> >  		dr->daddr = rqstp->rq_daddr;
> >  		dr->argslen = rqstp->rq_arg.len >> 2;
> > -		dr->xprt_ctxt = rqstp->rq_xprt_ctxt;
> >  
> >  		/* back up head to the start of the buffer and copy */
> >  		skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
> >  		memcpy(dr->args, rqstp->rq_arg.head[0].iov_base - skip,
> >  		       dr->argslen << 2);
> >  	}
> > -	WARN_ON_ONCE(rqstp->rq_xprt_ctxt != dr->xprt_ctxt);
> > +	dr->xprt_ctxt = rqstp->rq_xprt_ctxt;
> >  	rqstp->rq_xprt_ctxt = NULL;
> >  	trace_svc_defer(rqstp);
> >  	svc_xprt_get(rqstp->rq_xprt);
> > @@ -1263,6 +1270,8 @@ static noinline int svc_deferred_recv(struct svc_rqst *rqstp)
> >  	rqstp->rq_daddr       = dr->daddr;
> >  	rqstp->rq_respages    = rqstp->rq_pages;
> >  	rqstp->rq_xprt_ctxt   = dr->xprt_ctxt;
> > +
> > +	dr->xprt_ctxt = NULL;
> >  	svc_xprt_received(rqstp->rq_xprt);
> >  	return dr->argslen << 2;
> >  }
> > diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> > index a51c9b989d58..aa4f31a770e3 100644
> > --- a/net/sunrpc/svcsock.c
> > +++ b/net/sunrpc/svcsock.c
> > @@ -121,27 +121,27 @@ static void svc_reclassify_socket(struct socket *sock)
> >  #endif
> >  
> >  /**
> > - * svc_tcp_release_rqst - Release transport-related resources
> > - * @rqstp: request structure with resources to be released
> > + * svc_tcp_release_ctxt - Release transport-related resources
> > + * @xprt: the transport which owned the context
> > + * @ctxt: the context from rqstp->rq_xprt_ctxt or dr->xprt_ctxt
> >   *
> >   */
> > -static void svc_tcp_release_rqst(struct svc_rqst *rqstp)
> > +static void svc_tcp_release_ctxt(struct svc_xprt *xprt, void *ctxt)
> >  {
> >  }
> >  
> >  /**
> > - * svc_udp_release_rqst - Release transport-related resources
> > - * @rqstp: request structure with resources to be released
> > + * svc_udp_release_ctxt - Release transport-related resources
> > + * @xprt: the transport which owned the context
> > + * @ctxt: the context from rqstp->rq_xprt_ctxt or dr->xprt_ctxt
> >   *
> >   */
> > -static void svc_udp_release_rqst(struct svc_rqst *rqstp)
> > +static void svc_udp_release_ctxt(struct svc_xprt *xprt, void *ctxt)
> >  {
> > -	struct sk_buff *skb = rqstp->rq_xprt_ctxt;
> > +	struct sk_buff *skb = ctxt;
> >  
> > -	if (skb) {
> > -		rqstp->rq_xprt_ctxt = NULL;
> > +	if (skb)
> >  		consume_skb(skb);
> > -	}
> >  }
> >  
> >  union svc_pktinfo_u {
> > @@ -696,7 +696,8 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
> >  	unsigned int sent;
> >  	int err;
> >  
> > -	svc_udp_release_rqst(rqstp);
> > +	svc_udp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
> > +	rqstp->rq_xprt_ctxt = NULL;
> >  
> >  	svc_set_cmsg_data(rqstp, cmh);
> >  
> > @@ -768,7 +769,7 @@ static const struct svc_xprt_ops svc_udp_ops = {
> >  	.xpo_recvfrom = svc_udp_recvfrom,
> >  	.xpo_sendto = svc_udp_sendto,
> >  	.xpo_result_payload = svc_sock_result_payload,
> > -	.xpo_release_rqst = svc_udp_release_rqst,
> > +	.xpo_release_ctxt = svc_udp_release_ctxt,
> >  	.xpo_detach = svc_sock_detach,
> >  	.xpo_free = svc_sock_free,
> >  	.xpo_has_wspace = svc_udp_has_wspace,
> > @@ -1298,7 +1299,8 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
> >  	unsigned int sent;
> >  	int err;
> >  
> > -	svc_tcp_release_rqst(rqstp);
> > +	svc_tcp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
> > +	rqstp->rq_xprt_ctxt = NULL;
> >  
> >  	atomic_inc(&svsk->sk_sendqlen);
> >  	mutex_lock(&xprt->xpt_mutex);
> > @@ -1343,7 +1345,7 @@ static const struct svc_xprt_ops svc_tcp_ops = {
> >  	.xpo_recvfrom = svc_tcp_recvfrom,
> >  	.xpo_sendto = svc_tcp_sendto,
> >  	.xpo_result_payload = svc_sock_result_payload,
> > -	.xpo_release_rqst = svc_tcp_release_rqst,
> > +	.xpo_release_ctxt = svc_tcp_release_ctxt,
> >  	.xpo_detach = svc_tcp_sock_detach,
> >  	.xpo_free = svc_sock_free,
> >  	.xpo_has_wspace = svc_tcp_has_wspace,
> > diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
> > index 1c658fa43063..5c51e28b3111 100644
> > --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
> > +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
> > @@ -239,21 +239,20 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
> >  }
> >  
> >  /**
> > - * svc_rdma_release_rqst - Release transport-specific per-rqst resources
> > - * @rqstp: svc_rqst being released
> > + * svc_rdma_release_ctxt - Release transport-specific per-rqst resources
> > + * @xprt: the transport which owned the context
> > + * @ctxt: the context from rqstp->rq_xprt_ctxt or dr->xprt_ctxt
> >   *
> >   * Ensure that the recv_ctxt is released whether or not a Reply
> >   * was sent. For example, the client could close the connection,
> >   * or svc_process could drop an RPC, before the Reply is sent.
> >   */
> > -void svc_rdma_release_rqst(struct svc_rqst *rqstp)
> > +void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *vctxt)
> >  {
> > -	struct svc_rdma_recv_ctxt *ctxt = rqstp->rq_xprt_ctxt;
> > -	struct svc_xprt *xprt = rqstp->rq_xprt;
> > +	struct svc_rdma_recv_ctxt *ctxt = vctxt;
> >  	struct svcxprt_rdma *rdma =
> >  		container_of(xprt, struct svcxprt_rdma, sc_xprt);
> >  
> > -	rqstp->rq_xprt_ctxt = NULL;
> >  	if (ctxt)
> >  		svc_rdma_recv_ctxt_put(rdma, ctxt);
> >  }
> > diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
> > index 416b298f74dd..ca04f7a6a085 100644
> > --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
> > +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
> > @@ -80,7 +80,7 @@ static const struct svc_xprt_ops svc_rdma_ops = {
> >  	.xpo_recvfrom = svc_rdma_recvfrom,
> >  	.xpo_sendto = svc_rdma_sendto,
> >  	.xpo_result_payload = svc_rdma_result_payload,
> > -	.xpo_release_rqst = svc_rdma_release_rqst,
> > +	.xpo_release_ctxt = svc_rdma_release_ctxt,
> >  	.xpo_detach = svc_rdma_detach,
> >  	.xpo_free = svc_rdma_free,
> >  	.xpo_has_wspace = svc_rdma_has_wspace,
> 
> Reviewed-by: Jeff Layton <jlayton@kernel.org>

Thanks,
NeilBrown

> 


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-05-09 20:52 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-08 23:41 [PATCH 1/2] SUNRPC: double free xprt_ctxt while still in use NeilBrown
2023-05-08 23:42 ` [PATCH 2/2] SUNRPC: always free ctxt when freeing deferred request NeilBrown
2023-05-09 14:11   ` Jeff Layton
2023-05-09 20:49     ` NeilBrown
2023-05-09 14:31   ` Chuck Lever III
2023-05-09 14:02 ` [PATCH 1/2] SUNRPC: double free xprt_ctxt while still in use Jeff Layton

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.