All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nagendra Tomar <Nagendra.Tomar@microsoft.com>
To: "linux-nfs@vger.kernel.org" <linux-nfs@vger.kernel.org>
Cc: Trond Myklebust <trond.myklebust@hammerspace.com>,
	"anna.schumaker@netapp.com" <anna.schumaker@netapp.com>
Subject: [PATCH 2/5] nfs: Add mount option for forcing RPC requests to one file over one connection
Date: Tue, 23 Mar 2021 05:48:42 +0000	[thread overview]
Message-ID: <SG2P153MB0361FD1C5138A0C1FCD7DB719E649@SG2P153MB0361.APCP153.PROD.OUTLOOK.COM> (raw)

From: Nagendra S Tomar <natomar@microsoft.com>

Adds a new mount option ncpolicy=roundrobin|hash which allows user to
select the nconnect policy for the given mount. Defaults to roundrobin.
We store the user selected policy inside the rpc_clnt structure and
pass it down to the RPC client where the transport selection can be
accordingly done.
Also adds a new function pointer p_fhhash to struct rpc_procinfo.
This can be supplied to find the target file's hash for the given RPC
which will then be used to affine RPCs for a file to one xprt.

Signed-off-by: Nagendra S Tomar <natomar@microsoft.com>
---
 fs/nfs/client.c             |  3 +++
 fs/nfs/fs_context.c         | 26 ++++++++++++++++++++++++++
 fs/nfs/internal.h           |  2 ++
 fs/nfs/nfs3client.c         |  4 +++-
 fs/nfs/nfs4client.c         | 14 +++++++++++---
 fs/nfs/super.c              |  7 ++++++-
 include/linux/nfs_fs_sb.h   |  1 +
 include/linux/sunrpc/clnt.h | 15 +++++++++++++++
 net/sunrpc/clnt.c           | 34 ++++++++++++++++++++++++++++------
 9 files changed, 95 insertions(+), 11 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index ff5c4d0d6d13..5c2809d8368a 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -179,6 +179,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
 
 	clp->cl_proto = cl_init->proto;
 	clp->cl_nconnect = cl_init->nconnect;
+	clp->cl_ncpolicy = cl_init->ncpolicy;
 	clp->cl_net = get_net(cl_init->net);
 
 	clp->cl_principal = "*";
@@ -506,6 +507,7 @@ int nfs_create_rpc_client(struct nfs_client *clp,
 		.net		= clp->cl_net,
 		.protocol	= clp->cl_proto,
 		.nconnect	= clp->cl_nconnect,
+		.ncpolicy	= clp->cl_ncpolicy,
 		.address	= (struct sockaddr *)&clp->cl_addr,
 		.addrsize	= clp->cl_addrlen,
 		.timeout	= cl_init->timeparms,
@@ -678,6 +680,7 @@ static int nfs_init_server(struct nfs_server *server,
 		.timeparms = &timeparms,
 		.cred = server->cred,
 		.nconnect = ctx->nfs_server.nconnect,
+		.ncpolicy = ctx->nfs_server.ncpolicy,
 		.init_flags = (1UL << NFS_CS_REUSEPORT),
 	};
 	struct nfs_client *clp;
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index 971a9251c1d9..7bb8f1c8356f 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -60,6 +60,7 @@ enum nfs_param {
 	Opt_mountvers,
 	Opt_namelen,
 	Opt_nconnect,
+	Opt_ncpolicy,
 	Opt_port,
 	Opt_posix,
 	Opt_proto,
@@ -127,6 +128,18 @@ static const struct constant_table nfs_param_enums_write[] = {
 	{}
 };
 
+enum {
+	Opt_ncpolicy_roundrobin,
+	Opt_ncpolicy_hash,
+};
+
+static const struct constant_table nfs_param_enums_ncpolicy[] = {
+	{ "hash",		Opt_ncpolicy_hash },
+	{ "roundrobin",		Opt_ncpolicy_roundrobin },
+	{ "rr",			Opt_ncpolicy_roundrobin },
+	{}
+};
+
 static const struct fs_parameter_spec nfs_fs_parameters[] = {
 	fsparam_flag_no("ac",		Opt_ac),
 	fsparam_u32   ("acdirmax",	Opt_acdirmax),
@@ -158,6 +171,7 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = {
 	fsparam_u32   ("mountvers",	Opt_mountvers),
 	fsparam_u32   ("namlen",	Opt_namelen),
 	fsparam_u32   ("nconnect",	Opt_nconnect),
+	fsparam_enum  ("ncpolicy",	Opt_ncpolicy, nfs_param_enums_ncpolicy),
 	fsparam_string("nfsvers",	Opt_vers),
 	fsparam_u32   ("port",		Opt_port),
 	fsparam_flag_no("posix",	Opt_posix),
@@ -749,6 +763,18 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
 			goto out_of_bounds;
 		ctx->nfs_server.nconnect = result.uint_32;
 		break;
+	case Opt_ncpolicy:
+		switch (result.uint_32) {
+		case Opt_ncpolicy_roundrobin:
+			ctx->nfs_server.ncpolicy = ncpolicy_roundrobin;
+			break;
+		case Opt_ncpolicy_hash:
+			ctx->nfs_server.ncpolicy = ncpolicy_hash;
+			break;
+		default:
+			goto out_invalid_value;
+		}
+		break;
 	case Opt_lookupcache:
 		switch (result.uint_32) {
 		case Opt_lookupcache_all:
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 7b644d6c09e4..e6ca664d7e91 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -67,6 +67,7 @@ struct nfs_client_initdata {
 	int proto;
 	u32 minorversion;
 	unsigned int nconnect;
+	enum ncpolicy ncpolicy;
 	struct net *net;
 	const struct rpc_timeout *timeparms;
 	const struct cred *cred;
@@ -120,6 +121,7 @@ struct nfs_fs_context {
 		int			port;
 		unsigned short		protocol;
 		unsigned short		nconnect;
+		enum ncpolicy		ncpolicy;
 		unsigned short		export_path_len;
 	} nfs_server;
 
diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c
index 5601e47360c2..f8a648f7492a 100644
--- a/fs/nfs/nfs3client.c
+++ b/fs/nfs/nfs3client.c
@@ -102,8 +102,10 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
 		return ERR_PTR(-EINVAL);
 	cl_init.hostname = buf;
 
-	if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP)
+	if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) {
 		cl_init.nconnect = mds_clp->cl_nconnect;
+		cl_init.ncpolicy = mds_clp->cl_ncpolicy;
+	}
 
 	if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
 		__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 889a9f4c0310..c967c214129a 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -863,6 +863,7 @@ static int nfs4_set_client(struct nfs_server *server,
 		const char *ip_addr,
 		int proto, const struct rpc_timeout *timeparms,
 		u32 minorversion, unsigned int nconnect,
+		enum ncpolicy ncpolicy,
 		struct net *net)
 {
 	struct nfs_client_initdata cl_init = {
@@ -881,8 +882,10 @@ static int nfs4_set_client(struct nfs_server *server,
 
 	if (minorversion == 0)
 		__set_bit(NFS_CS_REUSEPORT, &cl_init.init_flags);
-	if (proto == XPRT_TRANSPORT_TCP)
+	if (proto == XPRT_TRANSPORT_TCP) {
 		cl_init.nconnect = nconnect;
+		cl_init.ncpolicy = ncpolicy;
+	}
 
 	if (server->flags & NFS_MOUNT_NORESVPORT)
 		__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
@@ -950,8 +953,10 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
 		return ERR_PTR(-EINVAL);
 	cl_init.hostname = buf;
 
-	if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP)
+	if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) {
 		cl_init.nconnect = mds_clp->cl_nconnect;
+		cl_init.ncpolicy = mds_clp->cl_ncpolicy;
+	}
 
 	if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
 		__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
@@ -1120,6 +1125,7 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc)
 				&timeparms,
 				ctx->minorversion,
 				ctx->nfs_server.nconnect,
+				ctx->nfs_server.ncpolicy,
 				fc->net_ns);
 	if (error < 0)
 		return error;
@@ -1209,6 +1215,7 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
 				parent_server->client->cl_timeout,
 				parent_client->cl_mvops->minor_version,
 				parent_client->cl_nconnect,
+				parent_client->cl_ncpolicy,
 				parent_client->cl_net);
 	if (!error)
 		goto init_server;
@@ -1224,6 +1231,7 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
 				parent_server->client->cl_timeout,
 				parent_client->cl_mvops->minor_version,
 				parent_client->cl_nconnect,
+				parent_client->cl_ncpolicy,
 				parent_client->cl_net);
 	if (error < 0)
 		goto error;
@@ -1321,7 +1329,7 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
 	error = nfs4_set_client(server, hostname, sap, salen, buf,
 				clp->cl_proto, clnt->cl_timeout,
 				clp->cl_minorversion,
-				clp->cl_nconnect, net);
+				clp->cl_nconnect, clp->cl_ncpolicy, net);
 	clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
 	if (error != 0) {
 		nfs_server_insert_lists(server);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 94885c6f8f54..8719be70051b 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -481,8 +481,13 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 	seq_printf(m, ",proto=%s",
 		   rpc_peeraddr2str(nfss->client, RPC_DISPLAY_NETID));
 	rcu_read_unlock();
-	if (clp->cl_nconnect > 0)
+	if (clp->cl_nconnect > 0) {
 		seq_printf(m, ",nconnect=%u", clp->cl_nconnect);
+		if (clp->cl_ncpolicy == ncpolicy_roundrobin)
+			seq_puts(m, ",ncpolicy=roundrobin");
+		else if (clp->cl_ncpolicy == ncpolicy_hash)
+			seq_puts(m, ",ncpolicy=hash");
+	}
 	if (version == 4) {
 		if (nfss->port != NFS_PORT)
 			seq_printf(m, ",port=%u", nfss->port);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 6f76b32a0238..737f4d231e23 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -62,6 +62,7 @@ struct nfs_client {
 
 	u32			cl_minorversion;/* NFSv4 minorversion */
 	unsigned int		cl_nconnect;	/* Number of connections */
+	enum ncpolicy		cl_ncpolicy;	/* nconnect policy */
 	const char *		cl_principal;  /* used for machine cred */
 
 #if IS_ENABLED(CONFIG_NFS_V4)
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 02e7a5863d28..aa1c1706f4d5 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -28,6 +28,15 @@
 #include <net/ipv6.h>
 #include <linux/sunrpc/xprtmultipath.h>
 
+/*
+ * Policies for controlling distribution of RPC requests over multiple
+ * nconnect connections.
+ */
+enum ncpolicy {
+	ncpolicy_roundrobin,	// Select roundrobin.
+	ncpolicy_hash,		// Select based on target filehandle hash.
+};
+
 struct rpc_inode;
 
 /*
@@ -40,6 +49,7 @@ struct rpc_clnt {
 	struct list_head	cl_tasks;	/* List of tasks */
 	spinlock_t		cl_lock;	/* spinlock */
 	struct rpc_xprt __rcu *	cl_xprt;	/* transport */
+	enum ncpolicy		cl_ncpolicy;	/* nconnect policy */
 	const struct rpc_procinfo *cl_procinfo;	/* procedure info */
 	u32			cl_prog,	/* RPC program number */
 				cl_vers,	/* RPC version number */
@@ -101,6 +111,8 @@ struct rpc_version {
 	unsigned int		*counts;	/* call counts */
 };
 
+typedef u32 (*getfhhash_t)(const void *obj);
+
 /*
  * Procedure information
  */
@@ -108,6 +120,7 @@ struct rpc_procinfo {
 	u32			p_proc;		/* RPC procedure number */
 	kxdreproc_t		p_encode;	/* XDR encode function */
 	kxdrdproc_t		p_decode;	/* XDR decode function */
+	getfhhash_t		p_fhhash;	/* Returns target fh hash */
 	unsigned int		p_arglen;	/* argument hdr length (u32) */
 	unsigned int		p_replen;	/* reply hdr length (u32) */
 	unsigned int		p_timer;	/* Which RTT timer to use */
@@ -129,6 +142,7 @@ struct rpc_create_args {
 	u32			version;
 	rpc_authflavor_t	authflavor;
 	u32			nconnect;
+	enum ncpolicy		ncpolicy;
 	unsigned long		flags;
 	char			*client_name;
 	struct svc_xprt		*bc_xprt;	/* NFSv4.1 backchannel */
@@ -247,4 +261,5 @@ static inline void rpc_task_close_connection(struct rpc_task *task)
 	if (task->tk_xprt)
 		xprt_force_disconnect(task->tk_xprt);
 }
+
 #endif /* _LINUX_SUNRPC_CLNT_H */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 1b2a02460601..ed470a75e91d 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -410,6 +410,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
 	}
 
 	rpc_clnt_set_transport(clnt, xprt, timeout);
+	clnt->cl_ncpolicy = args->ncpolicy;
 	xprt_iter_init(&clnt->cl_xpi, xps);
 	xprt_switch_put(xps);
 
@@ -640,6 +641,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
 	new->cl_discrtry = clnt->cl_discrtry;
 	new->cl_chatty = clnt->cl_chatty;
 	new->cl_principal = clnt->cl_principal;
+	new->cl_ncpolicy = clnt->cl_ncpolicy;
 	return new;
 
 out_err:
@@ -1053,9 +1055,10 @@ rpc_task_get_first_xprt(struct rpc_clnt *clnt)
 }
 
 static struct rpc_xprt *
-rpc_task_get_next_xprt(struct rpc_clnt *clnt)
+rpc_task_get_next_xprt(struct rpc_clnt *clnt, u32 hash)
 {
-	return rpc_task_get_xprt(clnt, xprt_iter_get_next(&clnt->cl_xpi, 0));
+	return rpc_task_get_xprt(clnt,
+			xprt_iter_get_next(&clnt->cl_xpi, hash));
 }
 
 static
@@ -1065,8 +1068,16 @@ void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt)
 		return;
 	if (task->tk_flags & RPC_TASK_NO_ROUND_ROBIN)
 		task->tk_xprt = rpc_task_get_first_xprt(clnt);
-	else
-		task->tk_xprt = rpc_task_get_next_xprt(clnt);
+	else {
+		u32 xprt_hint = 0;
+
+		if (clnt->cl_ncpolicy == ncpolicy_hash &&
+		    task->tk_msg.rpc_proc->p_fhhash) {
+			xprt_hint = task->tk_msg.rpc_proc->p_fhhash(
+						task->tk_msg.rpc_argp);
+		}
+		task->tk_xprt = rpc_task_get_next_xprt(clnt, xprt_hint);
+	}
 }
 
 static
@@ -1130,8 +1141,8 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
 	if (!RPC_IS_ASYNC(task))
 		task->tk_flags |= RPC_TASK_CRED_NOREF;
 
-	rpc_task_set_client(task, task_setup_data->rpc_client);
 	rpc_task_set_rpc_message(task, task_setup_data->rpc_message);
+	rpc_task_set_client(task, task_setup_data->rpc_client);
 
 	if (task->tk_action == NULL)
 		rpc_call_start(task);
@@ -1636,6 +1647,7 @@ call_start(struct rpc_task *task)
 	/* Increment call count (version might not be valid for ping) */
 	if (clnt->cl_program->version[clnt->cl_vers])
 		clnt->cl_program->version[clnt->cl_vers]->counts[idx]++;
+
 	clnt->cl_stats->rpccnt++;
 	task->tk_action = call_reserve;
 	rpc_task_set_transport(task, clnt);
@@ -2888,7 +2900,17 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
 				connect_timeout,
 				reconnect_timeout);
 
-	rpc_xprt_switch_set_roundrobin(xps);
+	switch (clnt->cl_ncpolicy) {
+	case ncpolicy_roundrobin:
+	default:
+		WARN_ON(clnt->cl_ncpolicy != ncpolicy_roundrobin);
+		rpc_xprt_switch_set_roundrobin(xps);
+		break;
+	case ncpolicy_hash:
+		rpc_xprt_switch_set_hash(xps);
+		break;
+	}
+
 	if (setup) {
 		ret = setup(clnt, xps, xprt, data);
 		if (ret != 0)

                 reply	other threads:[~2021-03-23  5:49 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=SG2P153MB0361FD1C5138A0C1FCD7DB719E649@SG2P153MB0361.APCP153.PROD.OUTLOOK.COM \
    --to=nagendra.tomar@microsoft.com \
    --cc=anna.schumaker@netapp.com \
    --cc=linux-nfs@vger.kernel.org \
    --cc=trond.myklebust@hammerspace.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.