From: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org> To: bfields-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org Subject: [PATCH v4 04/11] svcrdma: Improve allocation of struct svc_rdma_op_ctxt Date: Mon, 14 Dec 2015 16:30:34 -0500 [thread overview] Message-ID: <20151214213034.12932.3797.stgit@klimt.1015granger.net> (raw) In-Reply-To: <20151214211951.12932.99017.stgit-Hs+gFlyCn65vLzlybtyyYzGyq/o6K9yX@public.gmane.org> When the maximum payload size of NFS READ and WRITE was increased by commit cc9a903d915c ("svcrdma: Change maximum server payload back to RPCSVC_MAXPAYLOAD"), the size of struct svc_rdma_op_ctxt increased to over 6KB (on x86_64). That makes allocating one of these from a kmem_cache more likely to fail in situations when system memory is exhausted. Since I'm about to add a caller where this allocation must always work _and_ it cannot sleep, pre-allocate ctxts for each connection. Another motivation for this change is that NFSv4.x servers are required by specification not to drop NFS requests. Pre-allocating memory resources reduces the likelihood of a drop. Signed-off-by: Chuck Lever <chuck.lever-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org> --- include/linux/sunrpc/svc_rdma.h | 6 +- net/sunrpc/xprtrdma/svc_rdma_transport.c | 102 ++++++++++++++++++++++++++---- 2 files changed, 94 insertions(+), 14 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index f869807..be2804b 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -69,6 +69,7 @@ extern atomic_t rdma_stat_sq_prod; * completes. */ struct svc_rdma_op_ctxt { + struct list_head free; struct svc_rdma_op_ctxt *read_hdr; struct svc_rdma_fastreg_mr *frmr; int hdr_count; @@ -141,7 +142,10 @@ struct svcxprt_rdma { struct ib_pd *sc_pd; atomic_t sc_dma_used; - atomic_t sc_ctxt_used; + spinlock_t sc_ctxt_lock; + struct list_head sc_ctxts; + int sc_ctxt_used; + struct list_head sc_rq_dto_q; spinlock_t sc_rq_dto_lock; struct ib_qp *sc_qp; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 0783f6e..58ed9f2 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -153,18 +153,76 @@ static void svc_rdma_bc_free(struct svc_xprt *xprt) } #endif /* CONFIG_SUNRPC_BACKCHANNEL */ -struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) +static struct svc_rdma_op_ctxt *alloc_ctxt(struct svcxprt_rdma *xprt, + gfp_t flags) { struct svc_rdma_op_ctxt *ctxt; - ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep, - GFP_KERNEL | __GFP_NOFAIL); - ctxt->xprt = xprt; - INIT_LIST_HEAD(&ctxt->dto_q); + ctxt = kmalloc(sizeof(*ctxt), flags); + if (ctxt) { + ctxt->xprt = xprt; + INIT_LIST_HEAD(&ctxt->free); + INIT_LIST_HEAD(&ctxt->dto_q); + } + return ctxt; +} + +static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt) +{ + int i; + + /* Each RPC/RDMA credit can consume a number of send + * and receive WQEs. One ctxt is allocated for each. + */ + i = xprt->sc_sq_depth + xprt->sc_max_requests; + + while (i--) { + struct svc_rdma_op_ctxt *ctxt; + + ctxt = alloc_ctxt(xprt, GFP_KERNEL); + if (!ctxt) { + dprintk("svcrdma: No memory for RDMA ctxt\n"); + return false; + } + list_add(&ctxt->free, &xprt->sc_ctxts); + } + return true; +} + +struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) +{ + struct svc_rdma_op_ctxt *ctxt = NULL; + + spin_lock_bh(&xprt->sc_ctxt_lock); + xprt->sc_ctxt_used++; + if (list_empty(&xprt->sc_ctxts)) + goto out_empty; + + ctxt = list_first_entry(&xprt->sc_ctxts, + struct svc_rdma_op_ctxt, free); + list_del_init(&ctxt->free); + spin_unlock_bh(&xprt->sc_ctxt_lock); + +out: ctxt->count = 0; ctxt->frmr = NULL; - atomic_inc(&xprt->sc_ctxt_used); return ctxt; + +out_empty: + /* Either pre-allocation missed the mark, or send + * queue accounting is broken. + */ + spin_unlock_bh(&xprt->sc_ctxt_lock); + + ctxt = alloc_ctxt(xprt, GFP_NOIO); + if (ctxt) + goto out; + + spin_lock_bh(&xprt->sc_ctxt_lock); + xprt->sc_ctxt_used--; + spin_unlock_bh(&xprt->sc_ctxt_lock); + WARN_ON_ONCE("svcrdma: empty RDMA ctxt list?\n"); + return NULL; } void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) @@ -190,16 +248,29 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) { - struct svcxprt_rdma *xprt; + struct svcxprt_rdma *xprt = ctxt->xprt; int i; - xprt = ctxt->xprt; if (free_pages) for (i = 0; i < ctxt->count; i++) put_page(ctxt->pages[i]); - kmem_cache_free(svc_rdma_ctxt_cachep, ctxt); - atomic_dec(&xprt->sc_ctxt_used); + spin_lock_bh(&xprt->sc_ctxt_lock); + xprt->sc_ctxt_used--; + list_add(&ctxt->free, &xprt->sc_ctxts); + spin_unlock_bh(&xprt->sc_ctxt_lock); +} + +static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt) +{ + while (!list_empty(&xprt->sc_ctxts)) { + struct svc_rdma_op_ctxt *ctxt; + + ctxt = list_first_entry(&xprt->sc_ctxts, + struct svc_rdma_op_ctxt, free); + list_del(&ctxt->free); + kfree(ctxt); + } } /* @@ -521,11 +592,13 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); INIT_LIST_HEAD(&cma_xprt->sc_frmr_q); + INIT_LIST_HEAD(&cma_xprt->sc_ctxts); init_waitqueue_head(&cma_xprt->sc_send_wait); spin_lock_init(&cma_xprt->sc_lock); spin_lock_init(&cma_xprt->sc_rq_dto_lock); spin_lock_init(&cma_xprt->sc_frmr_q_lock); + spin_lock_init(&cma_xprt->sc_ctxt_lock); if (listener) set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); @@ -913,6 +986,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) (size_t)svcrdma_max_requests); newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests; + if (!svc_rdma_prealloc_ctxts(newxprt)) + goto errout; + /* * Limit ORD based on client limit, local device limit, and * configured svcrdma limit. @@ -1174,15 +1250,15 @@ static void __svc_rdma_free(struct work_struct *work) } /* Warn if we leaked a resource or under-referenced */ - if (atomic_read(&rdma->sc_ctxt_used) != 0) + if (rdma->sc_ctxt_used != 0) pr_err("svcrdma: ctxt still in use? (%d)\n", - atomic_read(&rdma->sc_ctxt_used)); + rdma->sc_ctxt_used); if (atomic_read(&rdma->sc_dma_used) != 0) pr_err("svcrdma: dma still in use? (%d)\n", atomic_read(&rdma->sc_dma_used)); - /* De-allocate fastreg mr */ rdma_dealloc_frmr_q(rdma); + svc_rdma_destroy_ctxts(rdma); /* Destroy the QP if present (not a listener) */ if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org More majordomo info at http://vger.kernel.org/majordomo-info.html
WARNING: multiple messages have this Message-ID (diff)
From: Chuck Lever <chuck.lever@oracle.com> To: bfields@fieldses.org Cc: linux-rdma@vger.kernel.org, linux-nfs@vger.kernel.org Subject: [PATCH v4 04/11] svcrdma: Improve allocation of struct svc_rdma_op_ctxt Date: Mon, 14 Dec 2015 16:30:34 -0500 [thread overview] Message-ID: <20151214213034.12932.3797.stgit@klimt.1015granger.net> (raw) In-Reply-To: <20151214211951.12932.99017.stgit@klimt.1015granger.net> When the maximum payload size of NFS READ and WRITE was increased by commit cc9a903d915c ("svcrdma: Change maximum server payload back to RPCSVC_MAXPAYLOAD"), the size of struct svc_rdma_op_ctxt increased to over 6KB (on x86_64). That makes allocating one of these from a kmem_cache more likely to fail in situations when system memory is exhausted. Since I'm about to add a caller where this allocation must always work _and_ it cannot sleep, pre-allocate ctxts for each connection. Another motivation for this change is that NFSv4.x servers are required by specification not to drop NFS requests. Pre-allocating memory resources reduces the likelihood of a drop. Signed-off-by: Chuck Lever <chuck.lever@oracle.com> --- include/linux/sunrpc/svc_rdma.h | 6 +- net/sunrpc/xprtrdma/svc_rdma_transport.c | 102 ++++++++++++++++++++++++++---- 2 files changed, 94 insertions(+), 14 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index f869807..be2804b 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -69,6 +69,7 @@ extern atomic_t rdma_stat_sq_prod; * completes. */ struct svc_rdma_op_ctxt { + struct list_head free; struct svc_rdma_op_ctxt *read_hdr; struct svc_rdma_fastreg_mr *frmr; int hdr_count; @@ -141,7 +142,10 @@ struct svcxprt_rdma { struct ib_pd *sc_pd; atomic_t sc_dma_used; - atomic_t sc_ctxt_used; + spinlock_t sc_ctxt_lock; + struct list_head sc_ctxts; + int sc_ctxt_used; + struct list_head sc_rq_dto_q; spinlock_t sc_rq_dto_lock; struct ib_qp *sc_qp; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 0783f6e..58ed9f2 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -153,18 +153,76 @@ static void svc_rdma_bc_free(struct svc_xprt *xprt) } #endif /* CONFIG_SUNRPC_BACKCHANNEL */ -struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) +static struct svc_rdma_op_ctxt *alloc_ctxt(struct svcxprt_rdma *xprt, + gfp_t flags) { struct svc_rdma_op_ctxt *ctxt; - ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep, - GFP_KERNEL | __GFP_NOFAIL); - ctxt->xprt = xprt; - INIT_LIST_HEAD(&ctxt->dto_q); + ctxt = kmalloc(sizeof(*ctxt), flags); + if (ctxt) { + ctxt->xprt = xprt; + INIT_LIST_HEAD(&ctxt->free); + INIT_LIST_HEAD(&ctxt->dto_q); + } + return ctxt; +} + +static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt) +{ + int i; + + /* Each RPC/RDMA credit can consume a number of send + * and receive WQEs. One ctxt is allocated for each. + */ + i = xprt->sc_sq_depth + xprt->sc_max_requests; + + while (i--) { + struct svc_rdma_op_ctxt *ctxt; + + ctxt = alloc_ctxt(xprt, GFP_KERNEL); + if (!ctxt) { + dprintk("svcrdma: No memory for RDMA ctxt\n"); + return false; + } + list_add(&ctxt->free, &xprt->sc_ctxts); + } + return true; +} + +struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) +{ + struct svc_rdma_op_ctxt *ctxt = NULL; + + spin_lock_bh(&xprt->sc_ctxt_lock); + xprt->sc_ctxt_used++; + if (list_empty(&xprt->sc_ctxts)) + goto out_empty; + + ctxt = list_first_entry(&xprt->sc_ctxts, + struct svc_rdma_op_ctxt, free); + list_del_init(&ctxt->free); + spin_unlock_bh(&xprt->sc_ctxt_lock); + +out: ctxt->count = 0; ctxt->frmr = NULL; - atomic_inc(&xprt->sc_ctxt_used); return ctxt; + +out_empty: + /* Either pre-allocation missed the mark, or send + * queue accounting is broken. + */ + spin_unlock_bh(&xprt->sc_ctxt_lock); + + ctxt = alloc_ctxt(xprt, GFP_NOIO); + if (ctxt) + goto out; + + spin_lock_bh(&xprt->sc_ctxt_lock); + xprt->sc_ctxt_used--; + spin_unlock_bh(&xprt->sc_ctxt_lock); + WARN_ON_ONCE("svcrdma: empty RDMA ctxt list?\n"); + return NULL; } void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) @@ -190,16 +248,29 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) { - struct svcxprt_rdma *xprt; + struct svcxprt_rdma *xprt = ctxt->xprt; int i; - xprt = ctxt->xprt; if (free_pages) for (i = 0; i < ctxt->count; i++) put_page(ctxt->pages[i]); - kmem_cache_free(svc_rdma_ctxt_cachep, ctxt); - atomic_dec(&xprt->sc_ctxt_used); + spin_lock_bh(&xprt->sc_ctxt_lock); + xprt->sc_ctxt_used--; + list_add(&ctxt->free, &xprt->sc_ctxts); + spin_unlock_bh(&xprt->sc_ctxt_lock); +} + +static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt) +{ + while (!list_empty(&xprt->sc_ctxts)) { + struct svc_rdma_op_ctxt *ctxt; + + ctxt = list_first_entry(&xprt->sc_ctxts, + struct svc_rdma_op_ctxt, free); + list_del(&ctxt->free); + kfree(ctxt); + } } /* @@ -521,11 +592,13 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); INIT_LIST_HEAD(&cma_xprt->sc_frmr_q); + INIT_LIST_HEAD(&cma_xprt->sc_ctxts); init_waitqueue_head(&cma_xprt->sc_send_wait); spin_lock_init(&cma_xprt->sc_lock); spin_lock_init(&cma_xprt->sc_rq_dto_lock); spin_lock_init(&cma_xprt->sc_frmr_q_lock); + spin_lock_init(&cma_xprt->sc_ctxt_lock); if (listener) set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); @@ -913,6 +986,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) (size_t)svcrdma_max_requests); newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests; + if (!svc_rdma_prealloc_ctxts(newxprt)) + goto errout; + /* * Limit ORD based on client limit, local device limit, and * configured svcrdma limit. @@ -1174,15 +1250,15 @@ static void __svc_rdma_free(struct work_struct *work) } /* Warn if we leaked a resource or under-referenced */ - if (atomic_read(&rdma->sc_ctxt_used) != 0) + if (rdma->sc_ctxt_used != 0) pr_err("svcrdma: ctxt still in use? (%d)\n", - atomic_read(&rdma->sc_ctxt_used)); + rdma->sc_ctxt_used); if (atomic_read(&rdma->sc_dma_used) != 0) pr_err("svcrdma: dma still in use? (%d)\n", atomic_read(&rdma->sc_dma_used)); - /* De-allocate fastreg mr */ rdma_dealloc_frmr_q(rdma); + svc_rdma_destroy_ctxts(rdma); /* Destroy the QP if present (not a listener) */ if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
next prev parent reply other threads:[~2015-12-14 21:30 UTC|newest] Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top 2015-12-14 21:30 [PATCH v4 00/11] NFS/RDMA server patches for v4.5 Chuck Lever 2015-12-14 21:30 ` Chuck Lever [not found] ` <20151214211951.12932.99017.stgit-Hs+gFlyCn65vLzlybtyyYzGyq/o6K9yX@public.gmane.org> 2015-12-14 21:30 ` [PATCH v4 01/11] svcrdma: Do not send XDR roundup bytes for a write chunk Chuck Lever 2015-12-14 21:30 ` Chuck Lever [not found] ` <20151214213009.12932.60521.stgit-Hs+gFlyCn65vLzlybtyyYzGyq/o6K9yX@public.gmane.org> 2015-12-21 21:07 ` J. Bruce Fields 2015-12-21 21:07 ` J. Bruce Fields [not found] ` <20151221210708.GD7869-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org> 2015-12-21 21:15 ` Chuck Lever 2015-12-21 21:15 ` Chuck Lever [not found] ` <D0AD304E-0356-48DE-A68D-F9F8930D854D-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org> 2015-12-21 21:29 ` J. Bruce Fields 2015-12-21 21:29 ` J. Bruce Fields [not found] ` <20151221212959.GE7869-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org> 2015-12-21 22:11 ` Chuck Lever 2015-12-21 22:11 ` Chuck Lever [not found] ` <DF5B7D29-0C6C-47EF-8E3E-74BF137D7F95-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org> 2015-12-23 19:59 ` J. Bruce Fields 2015-12-23 19:59 ` J. Bruce Fields 2015-12-14 21:30 ` [PATCH v4 02/11] svcrdma: Clean up rdma_create_xprt() Chuck Lever 2015-12-14 21:30 ` Chuck Lever 2015-12-14 21:30 ` [PATCH v4 03/11] svcrdma: Clean up process_context() Chuck Lever 2015-12-14 21:30 ` Chuck Lever 2015-12-14 21:30 ` Chuck Lever [this message] 2015-12-14 21:30 ` [PATCH v4 04/11] svcrdma: Improve allocation of struct svc_rdma_op_ctxt Chuck Lever 2015-12-14 21:30 ` [PATCH v4 05/11] svcrdma: Improve allocation of struct svc_rdma_req_map Chuck Lever 2015-12-14 21:30 ` Chuck Lever 2015-12-14 21:30 ` [PATCH v4 06/11] svcrdma: Remove unused req_map and ctxt kmem_caches Chuck Lever 2015-12-14 21:30 ` Chuck Lever 2015-12-14 21:30 ` [PATCH v4 07/11] svcrdma: Add gfp flags to svc_rdma_post_recv() Chuck Lever 2015-12-14 21:30 ` Chuck Lever 2015-12-14 21:31 ` [PATCH v4 08/11] svcrdma: Remove last two __GFP_NOFAIL call sites Chuck Lever 2015-12-14 21:31 ` Chuck Lever 2015-12-14 21:31 ` [PATCH v4 09/11] svcrdma: Make map_xdr non-static Chuck Lever 2015-12-14 21:31 ` Chuck Lever 2015-12-14 21:31 ` [PATCH v4 10/11] svcrdma: Define maximum number of backchannel requests Chuck Lever 2015-12-14 21:31 ` Chuck Lever 2015-12-14 21:31 ` [PATCH v4 11/11] svcrdma: Add class for RDMA backwards direction transport Chuck Lever 2015-12-14 21:31 ` Chuck Lever 2015-12-16 12:10 ` [PATCH v4 00/11] NFS/RDMA server patches for v4.5 Devesh Sharma 2015-12-16 12:10 ` Devesh Sharma [not found] ` <CANjDDBjypu0jX22fZ-Nf-bNNeam2MM0MADaZY9C-o9ihJPiseg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> 2015-12-23 21:00 ` J. Bruce Fields 2015-12-23 21:00 ` J. Bruce Fields [not found] ` <20151223210015.GA29650-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org> 2015-12-24 9:57 ` Chuck Lever 2015-12-24 9:57 ` Chuck Lever
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20151214213034.12932.3797.stgit@klimt.1015granger.net \ --to=chuck.lever-qhclzuegtsvqt0dzr+alfa@public.gmane.org \ --cc=bfields-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org \ --cc=linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \ --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.