All of lore.kernel.org
 help / color / mirror / Atom feed
From: "J. Bruce Fields" <bfields@fieldses.org>
To: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>,
	Neil Brown <neilb@suse.de>, David Miller <davem@davemloft.net>,
	linux-nfs@vger.kernel.org, netdev <netdev@vger.kernel.org>,
	linux-kernel <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH] sunrpc: use better NUMA affinities
Date: Fri, 5 Aug 2011 17:28:43 -0400	[thread overview]
Message-ID: <20110805212843.GA21997@fieldses.org> (raw)
In-Reply-To: <1311876249.2346.39.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>

On Thu, Jul 28, 2011 at 08:04:09PM +0200, Eric Dumazet wrote:
> Use NUMA aware allocations to reduce latencies and increase throughput.
> 
> sunrpc kthreads can use kthread_create_on_node() if pool_mode is
> "percpu" or "pernode", and svc_prepare_thread()/svc_init_buffer() can
> also take into account NUMA node affinity for memory allocations.

By the way, thanks, applying for 3.2 with one minor fixup below.--b.

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index ce620b5..516f337 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -199,7 +199,7 @@ nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
 	INIT_LIST_HEAD(&serv->sv_cb_list);
 	spin_lock_init(&serv->sv_cb_lock);
 	init_waitqueue_head(&serv->sv_cb_waitq);
-	rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]);
+	rqstp = svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE);
 	if (IS_ERR(rqstp)) {
 		svc_xprt_put(serv->sv_bc_xprt);
 		serv->sv_bc_xprt = NULL;

> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> CC: "J. Bruce Fields" <bfields@fieldses.org>
> CC: Neil Brown <neilb@suse.de>
> CC: David Miller <davem@davemloft.net>
> ---
>  fs/lockd/svc.c             |    2 +-
>  fs/nfs/callback.c          |    2 +-
>  include/linux/sunrpc/svc.h |    2 +-
>  net/sunrpc/svc.c           |   33 ++++++++++++++++++++++++---------
>  4 files changed, 27 insertions(+), 12 deletions(-)
> 
> diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
> index abfff9d..c061b9a 100644
> --- a/fs/lockd/svc.c
> +++ b/fs/lockd/svc.c
> @@ -282,7 +282,7 @@ int lockd_up(void)
>  	/*
>  	 * Create the kernel thread and wait for it to start.
>  	 */
> -	nlmsvc_rqst = svc_prepare_thread(serv, &serv->sv_pools[0]);
> +	nlmsvc_rqst = svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE);
>  	if (IS_ERR(nlmsvc_rqst)) {
>  		error = PTR_ERR(nlmsvc_rqst);
>  		nlmsvc_rqst = NULL;
> diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
> index e3d2942..ce620b5 100644
> --- a/fs/nfs/callback.c
> +++ b/fs/nfs/callback.c
> @@ -125,7 +125,7 @@ nfs4_callback_up(struct svc_serv *serv)
>  	else
>  		goto out_err;
>  
> -	return svc_prepare_thread(serv, &serv->sv_pools[0]);
> +	return svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE);
>  
>  out_err:
>  	if (ret == 0)
> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> index 223588a..a78a51e 100644
> --- a/include/linux/sunrpc/svc.h
> +++ b/include/linux/sunrpc/svc.h
> @@ -404,7 +404,7 @@ struct svc_procedure {
>  struct svc_serv *svc_create(struct svc_program *, unsigned int,
>  			    void (*shutdown)(struct svc_serv *));
>  struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
> -					struct svc_pool *pool);
> +					struct svc_pool *pool, int node);
>  void		   svc_exit_thread(struct svc_rqst *);
>  struct svc_serv *  svc_create_pooled(struct svc_program *, unsigned int,
>  			void (*shutdown)(struct svc_serv *),
> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> index 6a69a11..30d70ab 100644
> --- a/net/sunrpc/svc.c
> +++ b/net/sunrpc/svc.c
> @@ -295,6 +295,18 @@ svc_pool_map_put(void)
>  }
>  
>  
> +static int svc_pool_map_get_node(unsigned int pidx)
> +{
> +	const struct svc_pool_map *m = &svc_pool_map;
> +
> +	if (m->count) {
> +		if (m->mode == SVC_POOL_PERCPU)
> +			return cpu_to_node(m->pool_to[pidx]);
> +		if (m->mode == SVC_POOL_PERNODE)
> +			return m->pool_to[pidx];
> +	}
> +	return NUMA_NO_NODE;
> +}
>  /*
>   * Set the given thread's cpus_allowed mask so that it
>   * will only run on cpus in the given pool.
> @@ -499,7 +511,7 @@ EXPORT_SYMBOL_GPL(svc_destroy);
>   * We allocate pages and place them in rq_argpages.
>   */
>  static int
> -svc_init_buffer(struct svc_rqst *rqstp, unsigned int size)
> +svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node)
>  {
>  	unsigned int pages, arghi;
>  
> @@ -513,7 +525,7 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size)
>  	arghi = 0;
>  	BUG_ON(pages > RPCSVC_MAXPAGES);
>  	while (pages) {
> -		struct page *p = alloc_page(GFP_KERNEL);
> +		struct page *p = alloc_pages_node(node, GFP_KERNEL, 0);
>  		if (!p)
>  			break;
>  		rqstp->rq_pages[arghi++] = p;
> @@ -536,11 +548,11 @@ svc_release_buffer(struct svc_rqst *rqstp)
>  }
>  
>  struct svc_rqst *
> -svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool)
> +svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
>  {
>  	struct svc_rqst	*rqstp;
>  
> -	rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL);
> +	rqstp = kzalloc_node(sizeof(*rqstp), GFP_KERNEL, node);
>  	if (!rqstp)
>  		goto out_enomem;
>  
> @@ -554,15 +566,15 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool)
>  	rqstp->rq_server = serv;
>  	rqstp->rq_pool = pool;
>  
> -	rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL);
> +	rqstp->rq_argp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node);
>  	if (!rqstp->rq_argp)
>  		goto out_thread;
>  
> -	rqstp->rq_resp = kmalloc(serv->sv_xdrsize, GFP_KERNEL);
> +	rqstp->rq_resp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node);
>  	if (!rqstp->rq_resp)
>  		goto out_thread;
>  
> -	if (!svc_init_buffer(rqstp, serv->sv_max_mesg))
> +	if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node))
>  		goto out_thread;
>  
>  	return rqstp;
> @@ -647,6 +659,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
>  	struct svc_pool *chosen_pool;
>  	int error = 0;
>  	unsigned int state = serv->sv_nrthreads-1;
> +	int node;
>  
>  	if (pool == NULL) {
>  		/* The -1 assumes caller has done a svc_get() */
> @@ -662,14 +675,16 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
>  		nrservs--;
>  		chosen_pool = choose_pool(serv, pool, &state);
>  
> -		rqstp = svc_prepare_thread(serv, chosen_pool);
> +		node = svc_pool_map_get_node(chosen_pool->sp_id);
> +		rqstp = svc_prepare_thread(serv, chosen_pool, node);
>  		if (IS_ERR(rqstp)) {
>  			error = PTR_ERR(rqstp);
>  			break;
>  		}
>  
>  		__module_get(serv->sv_module);
> -		task = kthread_create(serv->sv_function, rqstp, serv->sv_name);
> +		task = kthread_create_on_node(serv->sv_function, rqstp,
> +					      node, serv->sv_name);
>  		if (IS_ERR(task)) {
>  			error = PTR_ERR(task);
>  			module_put(serv->sv_module);
> 
> 

WARNING: multiple messages have this Message-ID (diff)
From: "J. Bruce Fields" <bfields-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org>
To: Eric Dumazet <eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Cc: Trond Myklebust
	<Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>,
	Neil Brown <neilb-l3A5Bk7waGM@public.gmane.org>,
	David Miller <davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>,
	linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	netdev <netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>,
	linux-kernel
	<linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>
Subject: Re: [PATCH] sunrpc: use better NUMA affinities
Date: Fri, 5 Aug 2011 17:28:43 -0400	[thread overview]
Message-ID: <20110805212843.GA21997@fieldses.org> (raw)
In-Reply-To: <1311876249.2346.39.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>

On Thu, Jul 28, 2011 at 08:04:09PM +0200, Eric Dumazet wrote:
> Use NUMA aware allocations to reduce latencies and increase throughput.
> 
> sunrpc kthreads can use kthread_create_on_node() if pool_mode is
> "percpu" or "pernode", and svc_prepare_thread()/svc_init_buffer() can
> also take into account NUMA node affinity for memory allocations.

By the way, thanks, applying for 3.2 with one minor fixup below.--b.

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index ce620b5..516f337 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -199,7 +199,7 @@ nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
 	INIT_LIST_HEAD(&serv->sv_cb_list);
 	spin_lock_init(&serv->sv_cb_lock);
 	init_waitqueue_head(&serv->sv_cb_waitq);
-	rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]);
+	rqstp = svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE);
 	if (IS_ERR(rqstp)) {
 		svc_xprt_put(serv->sv_bc_xprt);
 		serv->sv_bc_xprt = NULL;

> 
> Signed-off-by: Eric Dumazet <eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
> CC: "J. Bruce Fields" <bfields-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org>
> CC: Neil Brown <neilb-l3A5Bk7waGM@public.gmane.org>
> CC: David Miller <davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>
> ---
>  fs/lockd/svc.c             |    2 +-
>  fs/nfs/callback.c          |    2 +-
>  include/linux/sunrpc/svc.h |    2 +-
>  net/sunrpc/svc.c           |   33 ++++++++++++++++++++++++---------
>  4 files changed, 27 insertions(+), 12 deletions(-)
> 
> diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
> index abfff9d..c061b9a 100644
> --- a/fs/lockd/svc.c
> +++ b/fs/lockd/svc.c
> @@ -282,7 +282,7 @@ int lockd_up(void)
>  	/*
>  	 * Create the kernel thread and wait for it to start.
>  	 */
> -	nlmsvc_rqst = svc_prepare_thread(serv, &serv->sv_pools[0]);
> +	nlmsvc_rqst = svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE);
>  	if (IS_ERR(nlmsvc_rqst)) {
>  		error = PTR_ERR(nlmsvc_rqst);
>  		nlmsvc_rqst = NULL;
> diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
> index e3d2942..ce620b5 100644
> --- a/fs/nfs/callback.c
> +++ b/fs/nfs/callback.c
> @@ -125,7 +125,7 @@ nfs4_callback_up(struct svc_serv *serv)
>  	else
>  		goto out_err;
>  
> -	return svc_prepare_thread(serv, &serv->sv_pools[0]);
> +	return svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE);
>  
>  out_err:
>  	if (ret == 0)
> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> index 223588a..a78a51e 100644
> --- a/include/linux/sunrpc/svc.h
> +++ b/include/linux/sunrpc/svc.h
> @@ -404,7 +404,7 @@ struct svc_procedure {
>  struct svc_serv *svc_create(struct svc_program *, unsigned int,
>  			    void (*shutdown)(struct svc_serv *));
>  struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
> -					struct svc_pool *pool);
> +					struct svc_pool *pool, int node);
>  void		   svc_exit_thread(struct svc_rqst *);
>  struct svc_serv *  svc_create_pooled(struct svc_program *, unsigned int,
>  			void (*shutdown)(struct svc_serv *),
> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> index 6a69a11..30d70ab 100644
> --- a/net/sunrpc/svc.c
> +++ b/net/sunrpc/svc.c
> @@ -295,6 +295,18 @@ svc_pool_map_put(void)
>  }
>  
>  
> +static int svc_pool_map_get_node(unsigned int pidx)
> +{
> +	const struct svc_pool_map *m = &svc_pool_map;
> +
> +	if (m->count) {
> +		if (m->mode == SVC_POOL_PERCPU)
> +			return cpu_to_node(m->pool_to[pidx]);
> +		if (m->mode == SVC_POOL_PERNODE)
> +			return m->pool_to[pidx];
> +	}
> +	return NUMA_NO_NODE;
> +}
>  /*
>   * Set the given thread's cpus_allowed mask so that it
>   * will only run on cpus in the given pool.
> @@ -499,7 +511,7 @@ EXPORT_SYMBOL_GPL(svc_destroy);
>   * We allocate pages and place them in rq_argpages.
>   */
>  static int
> -svc_init_buffer(struct svc_rqst *rqstp, unsigned int size)
> +svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node)
>  {
>  	unsigned int pages, arghi;
>  
> @@ -513,7 +525,7 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size)
>  	arghi = 0;
>  	BUG_ON(pages > RPCSVC_MAXPAGES);
>  	while (pages) {
> -		struct page *p = alloc_page(GFP_KERNEL);
> +		struct page *p = alloc_pages_node(node, GFP_KERNEL, 0);
>  		if (!p)
>  			break;
>  		rqstp->rq_pages[arghi++] = p;
> @@ -536,11 +548,11 @@ svc_release_buffer(struct svc_rqst *rqstp)
>  }
>  
>  struct svc_rqst *
> -svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool)
> +svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
>  {
>  	struct svc_rqst	*rqstp;
>  
> -	rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL);
> +	rqstp = kzalloc_node(sizeof(*rqstp), GFP_KERNEL, node);
>  	if (!rqstp)
>  		goto out_enomem;
>  
> @@ -554,15 +566,15 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool)
>  	rqstp->rq_server = serv;
>  	rqstp->rq_pool = pool;
>  
> -	rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL);
> +	rqstp->rq_argp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node);
>  	if (!rqstp->rq_argp)
>  		goto out_thread;
>  
> -	rqstp->rq_resp = kmalloc(serv->sv_xdrsize, GFP_KERNEL);
> +	rqstp->rq_resp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node);
>  	if (!rqstp->rq_resp)
>  		goto out_thread;
>  
> -	if (!svc_init_buffer(rqstp, serv->sv_max_mesg))
> +	if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node))
>  		goto out_thread;
>  
>  	return rqstp;
> @@ -647,6 +659,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
>  	struct svc_pool *chosen_pool;
>  	int error = 0;
>  	unsigned int state = serv->sv_nrthreads-1;
> +	int node;
>  
>  	if (pool == NULL) {
>  		/* The -1 assumes caller has done a svc_get() */
> @@ -662,14 +675,16 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
>  		nrservs--;
>  		chosen_pool = choose_pool(serv, pool, &state);
>  
> -		rqstp = svc_prepare_thread(serv, chosen_pool);
> +		node = svc_pool_map_get_node(chosen_pool->sp_id);
> +		rqstp = svc_prepare_thread(serv, chosen_pool, node);
>  		if (IS_ERR(rqstp)) {
>  			error = PTR_ERR(rqstp);
>  			break;
>  		}
>  
>  		__module_get(serv->sv_module);
> -		task = kthread_create(serv->sv_function, rqstp, serv->sv_name);
> +		task = kthread_create_on_node(serv->sv_function, rqstp,
> +					      node, serv->sv_name);
>  		if (IS_ERR(task)) {
>  			error = PTR_ERR(task);
>  			module_put(serv->sv_module);
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  parent reply	other threads:[~2011-08-05 21:28 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-07-28 18:04 [PATCH] sunrpc: use better NUMA affinities Eric Dumazet
2011-07-29 16:42 ` J. Bruce Fields
2011-07-29 16:42   ` J. Bruce Fields
2011-07-29 18:02   ` Eric Dumazet
2011-07-29 18:02     ` Eric Dumazet
2011-07-29 18:08     ` J. Bruce Fields
2011-07-29 18:08       ` J. Bruce Fields
2011-07-29 20:39       ` Greg Banks
2011-07-29 20:39         ` Greg Banks
2011-08-05 21:28 ` J. Bruce Fields [this message]
2011-08-05 21:28   ` J. Bruce Fields
     [not found] <20110729153207.17af3085@notabene.brown>
2011-07-29  6:05 ` Fw: " Greg Banks
2011-07-29  6:30   ` Eric Dumazet
2011-07-29  6:53     ` Greg Banks
2011-07-29 10:36       ` Christoph Hellwig
2011-07-29 11:58         ` Greg Banks
2011-07-29 12:11           ` Eric Dumazet
2011-07-29 13:30             ` Greg Banks
2011-07-29 13:30               ` Greg Banks
2011-07-29 13:30               ` Greg Banks
2011-07-29 16:48               ` J. Bruce Fields
2011-07-29 16:48                 ` J. Bruce Fields
2011-07-29 16:53                 ` J. Bruce Fields
2011-07-29 18:15                   ` Eric Dumazet
2011-07-29 18:15                     ` Eric Dumazet
2011-07-29 20:34                   ` Greg Banks
2011-07-29 20:34                     ` Greg Banks
2011-07-29 23:30                     ` NeilBrown
2011-07-29 23:30                       ` NeilBrown
2011-07-29 23:48                       ` J. Bruce Fields
2011-07-29 23:48                         ` J. Bruce Fields
2011-07-29 23:48                         ` J. Bruce Fields
2011-07-30  4:08                         ` Eric Dumazet
2011-07-30  4:08                           ` Eric Dumazet
2011-07-30  4:08                           ` Eric Dumazet
2011-07-30  6:06                           ` NeilBrown
2011-07-30  6:06                             ` NeilBrown
2011-07-30  6:23                             ` Eric Dumazet
2011-07-30  6:23                               ` Eric Dumazet
2011-07-31  6:58                               ` Eric Dumazet
2011-07-31  6:58                                 ` Eric Dumazet
2011-08-27  0:02                                 ` J. Bruce Fields
2011-08-28 10:02                                   ` Eric Dumazet
2011-08-28 10:02                                     ` Eric Dumazet
2011-08-02  1:06                       ` J. Bruce Fields
2011-08-02  1:06                         ` J. Bruce Fields
2011-07-29 16:45       ` Fw: " J. Bruce Fields
2011-07-29 20:24         ` Greg Banks

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20110805212843.GA21997@fieldses.org \
    --to=bfields@fieldses.org \
    --cc=Trond.Myklebust@netapp.com \
    --cc=davem@davemloft.net \
    --cc=eric.dumazet@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=neilb@suse.de \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.