linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] Fix up a couple of issues around layout handling
@ 2017-05-02 16:38 Trond Myklebust
  2017-05-02 16:38 ` [RFC PATCH 1/5] SUNRPC: Allow creation of RPC clients with multiple connections Trond Myklebust
  0 siblings, 1 reply; 20+ messages in thread
From: Trond Myklebust @ 2017-05-02 16:38 UTC (permalink / raw)
  To: linux-nfs

The main issue to be dealt with is a deadlock that can occur due to
an ABBA-type of situation between layoutget and layoutreturn.

Trond Myklebust (3):
  pNFS: Don't clear the layout return info if there are segments to
    return
  pNFS: Fix a deadlock when coalescing writes and returning the layout
  pNFS: Fix a typo in pnfs_generic_alloc_ds_commits

 fs/nfs/pnfs.c     | 10 +++++++---
 fs/nfs/pnfs_nfs.c |  2 +-
 2 files changed, 8 insertions(+), 4 deletions(-)

-- 
2.9.3


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [RFC PATCH 1/5] SUNRPC: Allow creation of RPC clients with multiple connections
  2017-05-02 16:38 [PATCH 0/3] Fix up a couple of issues around layout handling Trond Myklebust
@ 2017-05-02 16:38 ` Trond Myklebust
  2017-05-02 16:38   ` [PATCH 1/3] pNFS: Don't clear the layout return info if there are segments to return Trond Myklebust
  0 siblings, 1 reply; 20+ messages in thread
From: Trond Myklebust @ 2017-05-02 16:38 UTC (permalink / raw)
  To: linux-nfs

Add an argument to struct rpc_create_args that allows the specification
of how many transport connections you want to set up to the server.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/clnt.h |  1 +
 net/sunrpc/clnt.c           | 17 ++++++++++++++++-
 net/sunrpc/xprtmultipath.c  |  3 +--
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 6095ecba0dde..8c3cb38a385b 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -120,6 +120,7 @@ struct rpc_create_args {
 	u32			prognumber;	/* overrides program->number */
 	u32			version;
 	rpc_authflavor_t	authflavor;
+	u32			nconnect;
 	unsigned long		flags;
 	char			*client_name;
 	struct svc_xprt		*bc_xprt;	/* NFSv4.1 backchannel */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 673046c64e48..0ff97288b43f 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -522,6 +522,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
 		.bc_xprt = args->bc_xprt,
 	};
 	char servername[48];
+	struct rpc_clnt *clnt;
+	int i;
 
 	if (args->bc_xprt) {
 		WARN_ON_ONCE(!(args->protocol & XPRT_TRANSPORT_BC));
@@ -584,7 +586,15 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
 	if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT)
 		xprt->resvport = 0;
 
-	return rpc_create_xprt(args, xprt);
+	clnt = rpc_create_xprt(args, xprt);
+	if (IS_ERR(clnt) || args->nconnect <= 1)
+		return clnt;
+
+	for (i = 0; i < args->nconnect - 1; i++) {
+		if (rpc_clnt_add_xprt(clnt, &xprtargs, NULL, NULL) < 0)
+			break;
+	}
+	return clnt;
 }
 EXPORT_SYMBOL_GPL(rpc_create);
 
@@ -2605,6 +2615,10 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
 		return -ENOMEM;
 	data->xps = xprt_switch_get(xps);
 	data->xprt = xprt_get(xprt);
+	if (rpc_xprt_switch_has_addr(data->xps, (struct sockaddr *)&xprt->addr)) {
+		rpc_cb_add_xprt_release(data);
+		goto success;
+	}
 
 	cred = authnull_ops.lookup_cred(NULL, NULL, 0);
 	task = rpc_call_null_helper(clnt, xprt, cred,
@@ -2614,6 +2628,7 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
 	if (IS_ERR(task))
 		return PTR_ERR(task);
 	rpc_put_task(task);
+success:
 	return 1;
 }
 EXPORT_SYMBOL_GPL(rpc_clnt_test_and_add_xprt);
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
index 95064d510ce6..486819d0c58b 100644
--- a/net/sunrpc/xprtmultipath.c
+++ b/net/sunrpc/xprtmultipath.c
@@ -51,8 +51,7 @@ void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps,
 	if (xprt == NULL)
 		return;
 	spin_lock(&xps->xps_lock);
-	if ((xps->xps_net == xprt->xprt_net || xps->xps_net == NULL) &&
-	    !rpc_xprt_switch_has_addr(xps, (struct sockaddr *)&xprt->addr))
+	if (xps->xps_net == xprt->xprt_net || xps->xps_net == NULL)
 		xprt_switch_add_xprt_locked(xps, xprt);
 	spin_unlock(&xps->xps_lock);
 }
-- 
2.9.3


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 1/3] pNFS: Don't clear the layout return info if there are segments to return
  2017-05-02 16:38 ` [RFC PATCH 1/5] SUNRPC: Allow creation of RPC clients with multiple connections Trond Myklebust
@ 2017-05-02 16:38   ` Trond Myklebust
  2017-05-02 16:38     ` [RFC PATCH 2/5] NFS: Add a mount option to specify number of TCP connections to use Trond Myklebust
  0 siblings, 1 reply; 20+ messages in thread
From: Trond Myklebust @ 2017-05-02 16:38 UTC (permalink / raw)
  To: linux-nfs

In pnfs_clear_layoutreturn_info, ensure that we don't clear the layout
return info if there are new segments queued for return due to, for
instance, a race between a LAYOUTRETURN and a failed I/O attempt.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pnfs.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 140ecd7d350f..cea1e838efae 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -322,9 +322,15 @@ pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode,
 static void
 pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo)
 {
+	struct pnfs_layout_segment *lseg;
 	lo->plh_return_iomode = 0;
 	lo->plh_return_seq = 0;
 	clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
+	list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
+		if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
+			continue;
+		pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0);
+	}
 }
 
 static void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
@@ -367,9 +373,9 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
 	struct pnfs_layout_segment *lseg, *next;
 
 	set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
-	pnfs_clear_layoutreturn_info(lo);
 	list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
 		pnfs_clear_lseg_state(lseg, lseg_list);
+	pnfs_clear_layoutreturn_info(lo);
 	pnfs_free_returned_lsegs(lo, lseg_list, &range, 0);
 	if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) &&
 	    !test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags))
-- 
2.9.3


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [RFC PATCH 2/5] NFS: Add a mount option to specify number of TCP connections to use
  2017-05-02 16:38   ` [PATCH 1/3] pNFS: Don't clear the layout return info if there are segments to return Trond Myklebust
@ 2017-05-02 16:38     ` Trond Myklebust
  2017-05-02 16:38       ` [PATCH 2/3] pNFS: Fix a deadlock when coalescing writes and returning the layout Trond Myklebust
  0 siblings, 1 reply; 20+ messages in thread
From: Trond Myklebust @ 2017-05-02 16:38 UTC (permalink / raw)
  To: linux-nfs

Allow the user to specify that the client should use multiple connections
to the server. For the moment, this functionality will be limited to
TCP and to NFSv4.x (x>0).

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/internal.h |  1 +
 fs/nfs/super.c    | 10 ++++++++++
 2 files changed, 11 insertions(+)

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 31b26cf1b476..31757a742e9b 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -117,6 +117,7 @@ struct nfs_parsed_mount_data {
 		char			*export_path;
 		int			port;
 		unsigned short		protocol;
+		unsigned short		nconnect;
 	} nfs_server;
 
 	struct security_mnt_opts lsm_opts;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 54e0f9f2dd94..7eb48934dc79 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -76,6 +76,8 @@
 #define NFS_DEFAULT_VERSION 2
 #endif
 
+#define NFS_MAX_CONNECTIONS 16
+
 enum {
 	/* Mount options that take no arguments */
 	Opt_soft, Opt_hard,
@@ -107,6 +109,7 @@ enum {
 	Opt_nfsvers,
 	Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost,
 	Opt_addr, Opt_mountaddr, Opt_clientaddr,
+	Opt_nconnect,
 	Opt_lookupcache,
 	Opt_fscache_uniq,
 	Opt_local_lock,
@@ -179,6 +182,8 @@ static const match_table_t nfs_mount_option_tokens = {
 	{ Opt_mounthost, "mounthost=%s" },
 	{ Opt_mountaddr, "mountaddr=%s" },
 
+	{ Opt_nconnect, "nconnect=%s" },
+
 	{ Opt_lookupcache, "lookupcache=%s" },
 	{ Opt_fscache_uniq, "fsc=%s" },
 	{ Opt_local_lock, "local_lock=%s" },
@@ -1544,6 +1549,11 @@ static int nfs_parse_mount_options(char *raw,
 			if (mnt->mount_server.addrlen == 0)
 				goto out_invalid_address;
 			break;
+		case Opt_nconnect:
+			if (nfs_get_option_ul_bound(args, &option, 1, NFS_MAX_CONNECTIONS))
+				goto out_invalid_value;
+			mnt->nfs_server.nconnect = option;
+			break;
 		case Opt_lookupcache:
 			string = match_strdup(args);
 			if (string == NULL)
-- 
2.9.3


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 2/3] pNFS: Fix a deadlock when coalescing writes and returning the layout
  2017-05-02 16:38     ` [RFC PATCH 2/5] NFS: Add a mount option to specify number of TCP connections to use Trond Myklebust
@ 2017-05-02 16:38       ` Trond Myklebust
  2017-05-02 16:38         ` [RFC PATCH 3/5] NFSv4: Allow multiple connections to NFSv4.x (x>0) servers Trond Myklebust
  0 siblings, 1 reply; 20+ messages in thread
From: Trond Myklebust @ 2017-05-02 16:38 UTC (permalink / raw)
  To: linux-nfs

Consider the following deadlock:

Process P1	Process P2		Process P3
==========	==========		==========
					lock_page(page)

		lseg = pnfs_update_layout(inode)

lo = NFS_I(inode)->layout
pnfs_error_mark_layout_for_return(lo)

		lock_page(page)

					lseg = pnfs_update_layout(inode)

In this scenario,
- P1 has declared the layout to be in error, but P2 holds a reference to
  a layout segment on that inode, so the layoutreturn is deferred.
- P2 is waiting for a page lock held by P3.
- P3 is asking for a new layout segment, but is blocked waiting
  for the layoutreturn.

The fix is to ensure that pnfs_error_mark_layout_for_return() does
not set the NFS_LAYOUT_RETURN flag, which blocks P3. Instead, we allow
the latter to call LAYOUTGET so that it can make progress and unblock
P2.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pnfs.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index cea1e838efae..adc6ec28d4b5 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2063,8 +2063,6 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
 		return;
 	}
 	pnfs_set_plh_return_info(lo, range.iomode, 0);
-	/* Block LAYOUTGET */
-	set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
 	/*
 	 * mark all matching lsegs so that we are sure to have no live
 	 * segments at hand when sending layoutreturn. See pnfs_put_lseg()
-- 
2.9.3


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [RFC PATCH 3/5] NFSv4: Allow multiple connections to NFSv4.x (x>0) servers
  2017-05-02 16:38       ` [PATCH 2/3] pNFS: Fix a deadlock when coalescing writes and returning the layout Trond Myklebust
@ 2017-05-02 16:38         ` Trond Myklebust
  2017-05-02 16:38           ` [PATCH 3/3] pNFS: Fix a typo in pnfs_generic_alloc_ds_commits Trond Myklebust
  0 siblings, 1 reply; 20+ messages in thread
From: Trond Myklebust @ 2017-05-02 16:38 UTC (permalink / raw)
  To: linux-nfs

If the user specifies the -onconn=<number> mount option, and the transport
protocol is TCP, then set up <number> connections to the server. The
connections will all go to the same IP address.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/client.c           |  2 ++
 fs/nfs/internal.h         |  1 +
 fs/nfs/nfs4client.c       | 10 ++++++++--
 include/linux/nfs_fs_sb.h |  1 +
 4 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index e0302101e18a..c5b0f3e270a3 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -180,6 +180,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
 	clp->cl_rpcclient = ERR_PTR(-EINVAL);
 
 	clp->cl_proto = cl_init->proto;
+	clp->cl_nconnect = cl_init->nconnect;
 	clp->cl_net = get_net(cl_init->net);
 
 	cred = rpc_lookup_machine_cred("*");
@@ -488,6 +489,7 @@ int nfs_create_rpc_client(struct nfs_client *clp,
 	struct rpc_create_args args = {
 		.net		= clp->cl_net,
 		.protocol	= clp->cl_proto,
+		.nconnect	= clp->cl_nconnect,
 		.address	= (struct sockaddr *)&clp->cl_addr,
 		.addrsize	= clp->cl_addrlen,
 		.timeout	= cl_init->timeparms,
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 31757a742e9b..abe5d3934eaf 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -77,6 +77,7 @@ struct nfs_client_initdata {
 	struct nfs_subversion *nfs_mod;
 	int proto;
 	u32 minorversion;
+	unsigned int nconnect;
 	struct net *net;
 	const struct rpc_timeout *timeparms;
 };
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 692a7a8bfc7a..c9b10b7829f0 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -834,7 +834,8 @@ static int nfs4_set_client(struct nfs_server *server,
 		const size_t addrlen,
 		const char *ip_addr,
 		int proto, const struct rpc_timeout *timeparms,
-		u32 minorversion, struct net *net)
+		u32 minorversion, unsigned int nconnect,
+		struct net *net)
 {
 	struct nfs_client_initdata cl_init = {
 		.hostname = hostname,
@@ -849,6 +850,8 @@ static int nfs4_set_client(struct nfs_server *server,
 	};
 	struct nfs_client *clp;
 
+	if (minorversion > 0 && proto == XPRT_TRANSPORT_TCP)
+		cl_init.nconnect = nconnect;
 	if (server->flags & NFS_MOUNT_NORESVPORT)
 		set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
 	if (server->options & NFS_OPTION_MIGRATION)
@@ -1040,6 +1043,7 @@ static int nfs4_init_server(struct nfs_server *server,
 			data->nfs_server.protocol,
 			&timeparms,
 			data->minorversion,
+			data->nfs_server.nconnect,
 			data->net);
 	if (error < 0)
 		return error;
@@ -1124,6 +1128,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 				rpc_protocol(parent_server->client),
 				parent_server->client->cl_timeout,
 				parent_client->cl_mvops->minor_version,
+				parent_client->cl_nconnect,
 				parent_client->cl_net);
 	if (error < 0)
 		goto error;
@@ -1215,7 +1220,8 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
 	nfs_server_remove_lists(server);
 	error = nfs4_set_client(server, hostname, sap, salen, buf,
 				clp->cl_proto, clnt->cl_timeout,
-				clp->cl_minorversion, net);
+				clp->cl_minorversion,
+				clp->cl_nconnect, net);
 	nfs_put_client(clp);
 	if (error != 0) {
 		nfs_server_insert_lists(server);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 2a70f34dffe8..b7e6b94d1246 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -55,6 +55,7 @@ struct nfs_client {
 	struct nfs_subversion *	cl_nfs_mod;	/* pointer to nfs version module */
 
 	u32			cl_minorversion;/* NFSv4 minorversion */
+	unsigned int		cl_nconnect;	/* Number of connections */
 	struct rpc_cred		*cl_machine_cred;
 
 #if IS_ENABLED(CONFIG_NFS_V4)
-- 
2.9.3


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 3/3] pNFS: Fix a typo in pnfs_generic_alloc_ds_commits
  2017-05-02 16:38         ` [RFC PATCH 3/5] NFSv4: Allow multiple connections to NFSv4.x (x>0) servers Trond Myklebust
@ 2017-05-02 16:38           ` Trond Myklebust
  2017-05-02 16:38             ` [RFC PATCH 4/5] pNFS: Allow multiple connections to the DS Trond Myklebust
  0 siblings, 1 reply; 20+ messages in thread
From: Trond Myklebust @ 2017-05-02 16:38 UTC (permalink / raw)
  To: linux-nfs

If the layout segment is invalid, we want to just resend the remaining
writes.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pnfs_nfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 7697ac0ff81a..ae600ab1a646 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -223,7 +223,7 @@ pnfs_generic_alloc_ds_commits(struct nfs_commit_info *cinfo,
 		 */
 		if (!pnfs_is_valid_lseg(bucket->clseg) &&
 		    !test_bit(NFS_LSEG_LAYOUTRETURN, &bucket->clseg->pls_flags))
-			continue;
+			break;
 		data = nfs_commitdata_alloc(false);
 		if (!data)
 			break;
-- 
2.9.3


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [RFC PATCH 4/5] pNFS: Allow multiple connections to the DS
  2017-05-02 16:38           ` [PATCH 3/3] pNFS: Fix a typo in pnfs_generic_alloc_ds_commits Trond Myklebust
@ 2017-05-02 16:38             ` Trond Myklebust
  2017-05-02 16:38               ` [RFC PATCH 5/5] NFS: Display the "nconnect" mount option if it is set Trond Myklebust
  0 siblings, 1 reply; 20+ messages in thread
From: Trond Myklebust @ 2017-05-02 16:38 UTC (permalink / raw)
  To: linux-nfs

If the user specifies -onconn=<number> mount option, and the transport
protocol is TCP, then set up <number> connections to the pNFS data server
as well. The connections will all go to the same IP address.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs3client.c | 3 +++
 fs/nfs/nfs4client.c | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c
index 7879f2a0fcfd..8c624c74ddbe 100644
--- a/fs/nfs/nfs3client.c
+++ b/fs/nfs/nfs3client.c
@@ -100,6 +100,9 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
 		return ERR_PTR(-EINVAL);
 	cl_init.hostname = buf;
 
+	if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP)
+		cl_init.nconnect = mds_clp->cl_nconnect;
+
 	if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
 		set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
 
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index c9b10b7829f0..bfea1b232dd2 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -912,6 +912,9 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
 		return ERR_PTR(-EINVAL);
 	cl_init.hostname = buf;
 
+	if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP)
+		cl_init.nconnect = mds_clp->cl_nconnect;
+
 	if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
 		__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
 
-- 
2.9.3


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [RFC PATCH 5/5] NFS: Display the "nconnect" mount option if it is set.
  2017-05-02 16:38             ` [RFC PATCH 4/5] pNFS: Allow multiple connections to the DS Trond Myklebust
@ 2017-05-02 16:38               ` Trond Myklebust
  0 siblings, 0 replies; 20+ messages in thread
From: Trond Myklebust @ 2017-05-02 16:38 UTC (permalink / raw)
  To: linux-nfs

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/super.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 7eb48934dc79..0e07a6684235 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -673,6 +673,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 	seq_printf(m, ",proto=%s",
 		   rpc_peeraddr2str(nfss->client, RPC_DISPLAY_NETID));
 	rcu_read_unlock();
+	if (clp->cl_nconnect > 0)
+		seq_printf(m, ",nconnect=%u", clp->cl_nconnect);
 	if (version == 4) {
 		if (nfss->port != NFS_PORT)
 			seq_printf(m, ",port=%u", nfss->port);
-- 
2.9.3


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [RFC PATCH 2/5] NFS: Add a mount option to specify number of TCP connections to use
  2017-05-04 20:40               ` Trond Myklebust
@ 2017-05-04 20:42                 ` bfields
  0 siblings, 0 replies; 20+ messages in thread
From: bfields @ 2017-05-04 20:42 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: chuck.lever, linux-nfs

On Thu, May 04, 2017 at 08:40:07PM +0000, Trond Myklebust wrote:
> See https://tools.ietf.org/html/rfc5661#section-2.10.3.1
> 
> So, we probably should send the BIND_CONN_TO_SESSION after creating the
> session, but since that involves figuring out whether or not state
> protection was successfully negotiated, and since we have to support
> handling NFS4ERR_CONN_NOT_BOUND_TO_SESSION anyway, I'm all for just
> waiting for the server to send the error.

Makes sense to me.

--b.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC PATCH 2/5] NFS: Add a mount option to specify number of TCP connections to use
  2017-05-04 17:45             ` J. Bruce Fields
  2017-05-04 18:55               ` Chuck Lever
@ 2017-05-04 20:40               ` Trond Myklebust
  2017-05-04 20:42                 ` bfields
  1 sibling, 1 reply; 20+ messages in thread
From: Trond Myklebust @ 2017-05-04 20:40 UTC (permalink / raw)
  To: bfields, chuck.lever; +Cc: linux-nfs

T24gVGh1LCAyMDE3LTA1LTA0IGF0IDEzOjQ1IC0wNDAwLCBKLiBCcnVjZSBGaWVsZHMgd3JvdGU6
DQo+IE9uIFRodSwgTWF5IDA0LCAyMDE3IGF0IDAxOjM4OjM1UE0gLTA0MDAsIENodWNrIExldmVy
IHdyb3RlOg0KPiA+IA0KPiA+ID4gT24gTWF5IDQsIDIwMTcsIGF0IDE6MzYgUE0sIGJmaWVsZHNA
ZmllbGRzZXMub3JnIHdyb3RlOg0KPiA+ID4gDQo+ID4gPiBPbiBUaHUsIE1heSAwNCwgMjAxNyBh
dCAxMjowMToyOVBNIC0wNDAwLCBDaHVjayBMZXZlciB3cm90ZToNCj4gPiA+ID4gDQo+ID4gPiA+
ID4gT24gTWF5IDQsIDIwMTcsIGF0IDk6NDUgQU0sIENodWNrIExldmVyIDxjaHVjay5sZXZlckBv
cmFjbGUuYw0KPiA+ID4gPiA+IG9tPiB3cm90ZToNCj4gPiA+ID4gPiANCj4gPiA+ID4gPiAtIFRl
c3Rpbmcgd2l0aCBhIExpbnV4IHNlcnZlciBzaG93cyB0aGF0IHRoZSBiYXNpYyBORlMvUkRNQQ0K
PiA+ID4gPiA+IHBpZWNlcw0KPiA+ID4gPiA+IHdvcmssIGJ1dCBhbnkgT1BFTiBvcGVyYXRpb24g
Z2V0cyBORlM0RVJSX0dSQUNFLCBmb3JldmVyLA0KPiA+ID4gPiA+IHdoZW4gSSB1c2UNCj4gPiA+
ID4gPiBuY29ubmVjdCA+IDEuIEknbSBsb29raW5nIGludG8gaXQuDQo+ID4gPiA+IA0KPiA+ID4g
PiBSZXByb2R1Y2VkIHdpdGggTkZTdjQuMSwgVENQLCBhbmQgbmNvbm5lY3Q9Mi4NCj4gPiA+ID4g
DQo+ID4gPiA+IDM2M8KgwqDCoMKgwqDCoMKgwqDCoC8qDQo+ID4gPiA+IDM2NMKgwqDCoMKgwqDC
oMKgwqDCoMKgKiBSRkM1NjYxIDE4LjUxLjMNCj4gPiA+ID4gMzY1wqDCoMKgwqDCoMKgwqDCoMKg
wqAqIEJlZm9yZSBSRUNMQUlNX0NPTVBMRVRFIGRvbmUsIHNlcnZlciBzaG91bGQgZGVueQ0KPiA+
ID4gPiBuZXcgbG9jaw0KPiA+ID4gPiAzNjbCoMKgwqDCoMKgwqDCoMKgwqDCoCovDQo+ID4gPiA+
IDM2N8KgwqDCoMKgwqDCoMKgwqDCoGlmIChuZnNkNF9oYXNfc2Vzc2lvbihjc3RhdGUpICYmDQo+
ID4gPiA+IDM2OMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgIXRlc3RfYml0KE5GU0Q0X0NMSUVO
VF9SRUNMQUlNX0NPTVBMRVRFLA0KPiA+ID4gPiAzNjnCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDC
oMKgwqDCoMKgwqDCoMKgwqDCoMKgJmNzdGF0ZS0+c2Vzc2lvbi0+c2VfY2xpZW50LQ0KPiA+ID4g
PiA+Y2xfZmxhZ3MpICYmDQo+ID4gPiA+IDM3MMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgb3Bl
bi0+b3BfY2xhaW1fdHlwZSAhPQ0KPiA+ID4gPiBORlM0X09QRU5fQ0xBSU1fUFJFVklPVVMpDQo+
ID4gPiA+IDM3McKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqByZXR1cm4gbmZzZXJy
X2dyYWNlOw0KPiA+ID4gPiANCj4gPiA+ID4gU2VydmVyLXNpZGUgaW5zdHJ1bWVudGF0aW9uIGNv
bmZpcm1zOg0KPiA+ID4gPiANCj4gPiA+ID4gTWF5wqDCoDQgMTE6Mjg6Mjkga2xpbXQga2VybmVs
OiBuZnNkNF9vcGVuOiBoYXNfc2Vzc2lvbiByZXR1cm5zDQo+ID4gPiA+IHRydWUNCj4gPiA+ID4g
TWF5wqDCoDQgMTE6Mjg6Mjkga2xpbXQga2VybmVsOiBuZnNkNF9vcGVuOiBSRUNMQUlNX0NPTVBM
RVRFIGlzDQo+ID4gPiA+IGZhbHNlDQo+ID4gPiA+IE1hecKgwqA0IDExOjI4OjI5IGtsaW10IGtl
cm5lbDogbmZzZDRfb3BlbjogY2xhaW1fdHlwZSBpcyAwDQo+ID4gPiA+IA0KPiA+ID4gPiBOZXR3
b3JrIGNhcHR1cmUgc2hvd3MgdGhlIFJQQ3MgYXJlIGludGVybGVhdmVkIGJldHdlZW4gdGhlIHR3
bw0KPiA+ID4gPiBjb25uZWN0aW9ucyBhcyB0aGUgY2xpZW50IGVzdGFibGlzaGVzIGl0cyBsZWFz
ZSwgYW5kIHRoYXQNCj4gPiA+ID4gYXBwZWFycw0KPiA+ID4gPiB0byBiZSBjb25mdXNpbmcgdGhl
IHNlcnZlci4NCj4gPiA+ID4gDQo+ID4gPiA+IEMxOiBOVUxMIC0+IE5GUzRfT0sNCj4gPiA+ID4g
QzE6IEVYQ0hBTkdFX0lEIC0+IE5GUzRfT0sNCj4gPiA+ID4gQzI6IENSRUFURV9TRVNTSU9OIC0+
IE5GUzRfT0sNCj4gPiA+ID4gQzE6IFJFQ0xBSU1fQ09NUExFVEUgLT4gTkZTNEVSUl9DT05OX05P
VF9CT1VORF9UT19TRVNTSU9ODQo+ID4gPiANCj4gPiA+IFdoYXQgc2VjdXJpdHkgZmxhdm9ycyBh
cmUgaW52b2x2ZWQ/wqDCoEkgYmVsaWV2ZSB0aGUgY29ycmVjdA0KPiA+ID4gYmVoYXZpb3INCj4g
PiA+IGRlcGVuZHMgb24gd2hldGhlciBnc3MgaXMgaW4gdXNlIG9yIG5vdC4NCj4gPiANCj4gPiBU
aGUgbW91bnQgb3B0aW9ucyBhcmUgInNlYz1zeXMiIGJ1dCBib3RoIHNpZGVzIGhhdmUgYSBrZXl0
YWIuDQo+ID4gU28gdGhlIGxlYXNlIG1hbmFnZW1lbnQgb3BlcmF0aW9ucyBhcmUgZG9uZSB3aXRo
IGtyYjVpLg0KPiANCj4gT0suwqDCoEknbSBwcmV0dHkgc3VyZSB0aGUgY2xpZW50IG5lZWRzIHRv
IHNlbmQgQklORF9DT05OX1RPX1NFU1NJT04NCj4gYmVmb3JlIHN0ZXAgQzEuDQo+IA0KPiBNeSBt
ZW1vcnkgaXMgdGhhdCBvdmVyIGF1dGhfc3lzIHlvdSdyZSBhbGxvd2VkIHRvIHRyZWF0IGFueSBT
RVFVRU5DRQ0KPiBvdmVyIGEgbmV3IGNvbm5lY3Rpb24gYXMgaW1wbGljaXRseSBiaW5kaW5nIHRo
YXQgY29ubmVjdGlvbiB0byB0aGUNCj4gcmVmZXJlbmNlZCBzZXNzaW9uLCBidXQgb3ZlciBrcmI1
IHRoZSBzZXJ2ZXIncyByZXF1aXJlZCB0byByZXR1cm4NCj4gdGhhdA0KPiBOT1RfQk9VTkQgZXJy
b3IgaWYgdGhlIHNlcnZlciBza2lwcyB0aGUgQklORF9DT05OX1RPX1NFU1NJT04uDQo+IA0KPiBJ
IHRoaW5rIENSRUFURV9TRVNTSU9OIGlzIGFsbG93ZWQgYXMgbG9uZyBhcyB0aGUgcHJpbmNpcGFs
cyBhZ3JlZSwNCj4gYW5kDQo+IHRoYXQncyB3aHkgdGhlIGNhbGwgYXQgQzIgc3VjY2VlZHMuwqDC
oFNlZW1zIGEgbGl0dGxlIHdlaXJkLCB0aG91Z2guDQo+IA0KDQpTZWUgaHR0cHM6Ly90b29scy5p
ZXRmLm9yZy9odG1sL3JmYzU2NjEjc2VjdGlvbi0yLjEwLjMuMQ0KDQpTbywgd2UgcHJvYmFibHkg
c2hvdWxkIHNlbmQgdGhlIEJJTkRfQ09OTl9UT19TRVNTSU9OIGFmdGVyIGNyZWF0aW5nIHRoZQ0K
c2Vzc2lvbiwgYnV0IHNpbmNlIHRoYXQgaW52b2x2ZXMgZmlndXJpbmcgb3V0IHdoZXRoZXIgb3Ig
bm90IHN0YXRlDQpwcm90ZWN0aW9uIHdhcyBzdWNjZXNzZnVsbHkgbmVnb3RpYXRlZCwgYW5kIHNp
bmNlIHdlIGhhdmUgdG8gc3VwcG9ydA0KaGFuZGxpbmcgTkZTNEVSUl9DT05OX05PVF9CT1VORF9U
T19TRVNTSU9OIGFueXdheSwgSSdtIGFsbCBmb3IganVzdA0Kd2FpdGluZyBmb3IgdGhlIHNlcnZl
ciB0byBzZW5kIHRoZSBlcnJvci4NCg0KPiAtLWIuDQo+IA0KPiA+IA0KPiA+IA0KPiA+ID4gLS1i
Lg0KPiA+ID4gDQo+ID4gPiA+IEMxOiBQVVRST09URkggfCBHRVRBVFRSIC0+IE5GUzRFUlJfU0VR
X01JU09SREVSRUQNCj4gPiA+ID4gQzI6IFNFUVVFTkNFIC0+IE5GUzRfT0sNCj4gPiA+ID4gQzE6
IFBVVFJPT1RGSCB8IEdFVEFUVFIgLT4gTkZTNEVSUl9DT05OX05PVF9CT1VORF9UT19TRVNTSU9O
DQo+ID4gPiA+IEMxOiBCSU5EX0NPTk5fVE9fU0VTU0lPTiAtPiBORlM0X09LDQo+ID4gPiA+IEMy
OiBCSU5EX0NPTk5fVE9fU0VTU0lPTiAtPiBORlM0X09LDQo+ID4gPiA+IEMyOiBQVVRST09URkgg
fCBHRVRBVFRSIC0+IE5GUzRFUlJfU0VRX01JU09SREVSRUQNCj4gPiA+ID4gDQo+ID4gPiA+IC4u
Li4gbWl4IG9mIEdFVEFUVFJzIGFuZCBvdGhlciBzaW1wbGUgcmVxdWVzdHMgLi4uLg0KPiA+ID4g
PiANCj4gPiA+ID4gQzE6IE9QRU4gLT4gTkZTNEVSUl9HUkFDRQ0KPiA+ID4gPiBDMjogT1BFTiAt
PiBORlM0RVJSX0dSQUNFDQo+ID4gPiA+IA0KPiA+ID4gPiBUaGUgUkVDTEFJTV9DT01QTEVURSBv
cGVyYXRpb24gZmFpbGVkLCBhbmQgdGhlIGNsaWVudCBkb2VzIG5vdA0KPiA+ID4gPiByZXRyeSBp
dC4gVGhhdCBsZWF2ZXMgaXRzIGxlYXNlIHN0dWNrIGluIEdSQUNFLg0KPiA+ID4gPiANCj4gPiA+
ID4gDQo+ID4gPiA+IC0tDQo+ID4gPiA+IENodWNrIExldmVyDQo+ID4gPiA+IA0KPiA+ID4gPiAN
Cj4gPiA+ID4gDQo+ID4gPiA+IC0tDQo+ID4gPiA+IFRvIHVuc3Vic2NyaWJlIGZyb20gdGhpcyBs
aXN0OiBzZW5kIHRoZSBsaW5lICJ1bnN1YnNjcmliZQ0KPiA+ID4gPiBsaW51eC1uZnMiIGluDQo+
ID4gPiA+IHRoZSBib2R5IG9mIGEgbWVzc2FnZSB0byBtYWpvcmRvbW9Admdlci5rZXJuZWwub3Jn
DQo+ID4gPiA+IE1vcmUgbWFqb3Jkb21vIGluZm8gYXTCoMKgaHR0cDovL3ZnZXIua2VybmVsLm9y
Zy9tYWpvcmRvbW8taW5mby5oDQo+ID4gPiA+IHRtbA0KPiA+ID4gDQo+ID4gPiAtLQ0KPiA+ID4g
VG8gdW5zdWJzY3JpYmUgZnJvbSB0aGlzIGxpc3Q6IHNlbmQgdGhlIGxpbmUgInVuc3Vic2NyaWJl
IGxpbnV4LQ0KPiA+ID4gbmZzIiBpbg0KPiA+ID4gdGhlIGJvZHkgb2YgYSBtZXNzYWdlIHRvIG1h
am9yZG9tb0B2Z2VyLmtlcm5lbC5vcmcNCj4gPiA+IE1vcmUgbWFqb3Jkb21vIGluZm8gYXTCoMKg
aHR0cDovL3ZnZXIua2VybmVsLm9yZy9tYWpvcmRvbW8taW5mby5odG0NCj4gPiA+IGwNCj4gPiAN
Cj4gPiAtLQ0KPiA+IENodWNrIExldmVyDQo+ID4gDQo+ID4gDQo+ID4gDQo+ID4gLS0NCj4gPiBU
byB1bnN1YnNjcmliZSBmcm9tIHRoaXMgbGlzdDogc2VuZCB0aGUgbGluZSAidW5zdWJzY3JpYmUg
bGludXgtDQo+ID4gbmZzIiBpbg0KPiA+IHRoZSBib2R5IG9mIGEgbWVzc2FnZSB0byBtYWpvcmRv
bW9Admdlci5rZXJuZWwub3JnDQo+ID4gTW9yZSBtYWpvcmRvbW8gaW5mbyBhdMKgwqBodHRwOi8v
dmdlci5rZXJuZWwub3JnL21ham9yZG9tby1pbmZvLmh0bWwNCj4gDQo+IC0tDQo+IFRvIHVuc3Vi
c2NyaWJlIGZyb20gdGhpcyBsaXN0OiBzZW5kIHRoZSBsaW5lICJ1bnN1YnNjcmliZSBsaW51eC1u
ZnMiDQo+IGluDQo+IHRoZSBib2R5IG9mIGEgbWVzc2FnZSB0byBtYWpvcmRvbW9Admdlci5rZXJu
ZWwub3JnDQo+IE1vcmUgbWFqb3Jkb21vIGluZm8gYXTCoMKgaHR0cDovL3ZnZXIua2VybmVsLm9y
Zy9tYWpvcmRvbW8taW5mby5odG1sDQo+IA0KLS0gDQpUcm9uZCBNeWtsZWJ1c3QNCkxpbnV4IE5G
UyBjbGllbnQgbWFpbnRhaW5lciwgUHJpbWFyeURhdGENCnRyb25kLm15a2xlYnVzdEBwcmltYXJ5
ZGF0YS5jb20NCg==


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC PATCH 2/5] NFS: Add a mount option to specify number of TCP connections to use
  2017-05-04 18:55               ` Chuck Lever
@ 2017-05-04 19:58                 ` J. Bruce Fields
  0 siblings, 0 replies; 20+ messages in thread
From: J. Bruce Fields @ 2017-05-04 19:58 UTC (permalink / raw)
  To: Chuck Lever; +Cc: Trond Myklebust, Linux NFS Mailing List

On Thu, May 04, 2017 at 02:55:06PM -0400, Chuck Lever wrote:
> 
> > On May 4, 2017, at 1:45 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
> > 
> > On Thu, May 04, 2017 at 01:38:35PM -0400, Chuck Lever wrote:
> >> 
> >>> On May 4, 2017, at 1:36 PM, bfields@fieldses.org wrote:
> >>> 
> >>> On Thu, May 04, 2017 at 12:01:29PM -0400, Chuck Lever wrote:
> >>>> 
> >>>>> On May 4, 2017, at 9:45 AM, Chuck Lever <chuck.lever@oracle.com> wrote:
> >>>>> 
> >>>>> - Testing with a Linux server shows that the basic NFS/RDMA pieces
> >>>>> work, but any OPEN operation gets NFS4ERR_GRACE, forever, when I use
> >>>>> nconnect > 1. I'm looking into it.
> >>>> 
> >>>> Reproduced with NFSv4.1, TCP, and nconnect=2.
> >>>> 
> >>>> 363         /*
> >>>> 364          * RFC5661 18.51.3
> >>>> 365          * Before RECLAIM_COMPLETE done, server should deny new lock
> >>>> 366          */
> >>>> 367         if (nfsd4_has_session(cstate) &&
> >>>> 368             !test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE,
> >>>> 369                       &cstate->session->se_client->cl_flags) &&
> >>>> 370             open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
> >>>> 371                 return nfserr_grace;
> >>>> 
> >>>> Server-side instrumentation confirms:
> >>>> 
> >>>> May  4 11:28:29 klimt kernel: nfsd4_open: has_session returns true
> >>>> May  4 11:28:29 klimt kernel: nfsd4_open: RECLAIM_COMPLETE is false
> >>>> May  4 11:28:29 klimt kernel: nfsd4_open: claim_type is 0
> >>>> 
> >>>> Network capture shows the RPCs are interleaved between the two
> >>>> connections as the client establishes its lease, and that appears
> >>>> to be confusing the server.
> >>>> 
> >>>> C1: NULL -> NFS4_OK
> >>>> C1: EXCHANGE_ID -> NFS4_OK
> >>>> C2: CREATE_SESSION -> NFS4_OK
> >>>> C1: RECLAIM_COMPLETE -> NFS4ERR_CONN_NOT_BOUND_TO_SESSION
> >>> 
> >>> What security flavors are involved?  I believe the correct behavior
> >>> depends on whether gss is in use or not.
> >> 
> >> The mount options are "sec=sys" but both sides have a keytab.
> >> So the lease management operations are done with krb5i.
> > 
> > OK.  I'm pretty sure the client needs to send BIND_CONN_TO_SESSION
> > before step C1.
> > 
> > My memory is that over auth_sys you're allowed to treat any SEQUENCE
> > over a new connection as implicitly binding that connection to the
> > referenced session, but over krb5 the server's required to return that
> > NOT_BOUND error if the server skips the BIND_CONN_TO_SESSION.
> 
> Ah, that would explain why nconnect=[234] is working against my
> Solaris 12 server: no keytab on that server means lease management
> is done using plain-old AUTH_SYS.
> 
> Multiple connections are now handled entirely by the RPC layer,
> and are opened and used at rpc_clnt creation time. The NFS client
> is not aware (except for allowing more than one connection to be
> used) and relies on its own recovery mechanisms to deal with
> exceptions that might arise. IOW it doesn't seem to know that an
> extra BC2S is needed, nor does it know where in the RPC stream
> to insert that operation.
> 
> Seems to me a good approach would be to handle server trunking
> discovery and lease establishment using a single connection, and
> then open more connections. A conservative approach might actually
> hold off on opening additional connections until there are enough
> RPC transactions being initiated in parallel to warrant it. Or, if
> @nconnect > 1, use a single connection to perform lease management,
> and open @nconnect additional connections that handle only per-
> mount I/O activity.
> 
> 
> > I think CREATE_SESSION is allowed as long as the principals agree, and
> > that's why the call at C2 succeeds.  Seems a little weird, though.
> 
> Well, there's no SEQUENCE operation in that COMPOUND. No session
> or connection to use there, I think the principal and client ID
> are the only way to recognize the target of the operation?

I'm just not clear why the explicit BIND_CONN_TO_SESSION is required in
the gss case.

Actually, it's not gss exactly, it's the state protection level:

	If, when the client ID was created, the client opted for
	SP4_NONE state protection, the client is not required to use
	BIND_CONN_TO_SESSION to associate the connection with the
	session, unless the client wishes to associate the connection
	with the backchannel.  When SP4_NONE protection is used, simply
	sending a COMPOUND request with a SEQUENCE operation is
	sufficient to associate the connection with the session
	specified in SEQUENCE.

Anyway.

--b.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC PATCH 2/5] NFS: Add a mount option to specify number of TCP connections to use
  2017-05-04 17:45             ` J. Bruce Fields
@ 2017-05-04 18:55               ` Chuck Lever
  2017-05-04 19:58                 ` J. Bruce Fields
  2017-05-04 20:40               ` Trond Myklebust
  1 sibling, 1 reply; 20+ messages in thread
From: Chuck Lever @ 2017-05-04 18:55 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Trond Myklebust, Linux NFS Mailing List


> On May 4, 2017, at 1:45 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
> 
> On Thu, May 04, 2017 at 01:38:35PM -0400, Chuck Lever wrote:
>> 
>>> On May 4, 2017, at 1:36 PM, bfields@fieldses.org wrote:
>>> 
>>> On Thu, May 04, 2017 at 12:01:29PM -0400, Chuck Lever wrote:
>>>> 
>>>>> On May 4, 2017, at 9:45 AM, Chuck Lever <chuck.lever@oracle.com> wrote:
>>>>> 
>>>>> - Testing with a Linux server shows that the basic NFS/RDMA pieces
>>>>> work, but any OPEN operation gets NFS4ERR_GRACE, forever, when I use
>>>>> nconnect > 1. I'm looking into it.
>>>> 
>>>> Reproduced with NFSv4.1, TCP, and nconnect=2.
>>>> 
>>>> 363         /*
>>>> 364          * RFC5661 18.51.3
>>>> 365          * Before RECLAIM_COMPLETE done, server should deny new lock
>>>> 366          */
>>>> 367         if (nfsd4_has_session(cstate) &&
>>>> 368             !test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE,
>>>> 369                       &cstate->session->se_client->cl_flags) &&
>>>> 370             open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
>>>> 371                 return nfserr_grace;
>>>> 
>>>> Server-side instrumentation confirms:
>>>> 
>>>> May  4 11:28:29 klimt kernel: nfsd4_open: has_session returns true
>>>> May  4 11:28:29 klimt kernel: nfsd4_open: RECLAIM_COMPLETE is false
>>>> May  4 11:28:29 klimt kernel: nfsd4_open: claim_type is 0
>>>> 
>>>> Network capture shows the RPCs are interleaved between the two
>>>> connections as the client establishes its lease, and that appears
>>>> to be confusing the server.
>>>> 
>>>> C1: NULL -> NFS4_OK
>>>> C1: EXCHANGE_ID -> NFS4_OK
>>>> C2: CREATE_SESSION -> NFS4_OK
>>>> C1: RECLAIM_COMPLETE -> NFS4ERR_CONN_NOT_BOUND_TO_SESSION
>>> 
>>> What security flavors are involved?  I believe the correct behavior
>>> depends on whether gss is in use or not.
>> 
>> The mount options are "sec=sys" but both sides have a keytab.
>> So the lease management operations are done with krb5i.
> 
> OK.  I'm pretty sure the client needs to send BIND_CONN_TO_SESSION
> before step C1.
> 
> My memory is that over auth_sys you're allowed to treat any SEQUENCE
> over a new connection as implicitly binding that connection to the
> referenced session, but over krb5 the server's required to return that
> NOT_BOUND error if the server skips the BIND_CONN_TO_SESSION.

Ah, that would explain why nconnect=[234] is working against my
Solaris 12 server: no keytab on that server means lease management
is done using plain-old AUTH_SYS.

Multiple connections are now handled entirely by the RPC layer,
and are opened and used at rpc_clnt creation time. The NFS client
is not aware (except for allowing more than one connection to be
used) and relies on its own recovery mechanisms to deal with
exceptions that might arise. IOW it doesn't seem to know that an
extra BC2S is needed, nor does it know where in the RPC stream
to insert that operation.

Seems to me a good approach would be to handle server trunking
discovery and lease establishment using a single connection, and
then open more connections. A conservative approach might actually
hold off on opening additional connections until there are enough
RPC transactions being initiated in parallel to warrant it. Or, if
@nconnect > 1, use a single connection to perform lease management,
and open @nconnect additional connections that handle only per-
mount I/O activity.


> I think CREATE_SESSION is allowed as long as the principals agree, and
> that's why the call at C2 succeeds.  Seems a little weird, though.

Well, there's no SEQUENCE operation in that COMPOUND. No session
or connection to use there, I think the principal and client ID
are the only way to recognize the target of the operation?


> --b.
> 
>> 
>> 
>>> --b.
>>> 
>>>> C1: PUTROOTFH | GETATTR -> NFS4ERR_SEQ_MISORDERED
>>>> C2: SEQUENCE -> NFS4_OK
>>>> C1: PUTROOTFH | GETATTR -> NFS4ERR_CONN_NOT_BOUND_TO_SESSION
>>>> C1: BIND_CONN_TO_SESSION -> NFS4_OK
>>>> C2: BIND_CONN_TO_SESSION -> NFS4_OK
>>>> C2: PUTROOTFH | GETATTR -> NFS4ERR_SEQ_MISORDERED
>>>> 
>>>> .... mix of GETATTRs and other simple requests ....
>>>> 
>>>> C1: OPEN -> NFS4ERR_GRACE
>>>> C2: OPEN -> NFS4ERR_GRACE
>>>> 
>>>> The RECLAIM_COMPLETE operation failed, and the client does not
>>>> retry it. That leaves its lease stuck in GRACE.
>>>> 
>>>> 
>>>> --
>>>> Chuck Lever
>>>> 
>>>> 
>>>> 
>>>> --
>>>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>>>> the body of a message to majordomo@vger.kernel.org
>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> 
>> --
>> Chuck Lever
>> 
>> 
>> 
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
Chuck Lever




^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC PATCH 2/5] NFS: Add a mount option to specify number of TCP connections to use
  2017-05-04 17:38           ` Chuck Lever
@ 2017-05-04 17:45             ` J. Bruce Fields
  2017-05-04 18:55               ` Chuck Lever
  2017-05-04 20:40               ` Trond Myklebust
  0 siblings, 2 replies; 20+ messages in thread
From: J. Bruce Fields @ 2017-05-04 17:45 UTC (permalink / raw)
  To: Chuck Lever; +Cc: Trond Myklebust, Linux NFS Mailing List

On Thu, May 04, 2017 at 01:38:35PM -0400, Chuck Lever wrote:
> 
> > On May 4, 2017, at 1:36 PM, bfields@fieldses.org wrote:
> > 
> > On Thu, May 04, 2017 at 12:01:29PM -0400, Chuck Lever wrote:
> >> 
> >>> On May 4, 2017, at 9:45 AM, Chuck Lever <chuck.lever@oracle.com> wrote:
> >>> 
> >>> - Testing with a Linux server shows that the basic NFS/RDMA pieces
> >>> work, but any OPEN operation gets NFS4ERR_GRACE, forever, when I use
> >>> nconnect > 1. I'm looking into it.
> >> 
> >> Reproduced with NFSv4.1, TCP, and nconnect=2.
> >> 
> >> 363         /*
> >> 364          * RFC5661 18.51.3
> >> 365          * Before RECLAIM_COMPLETE done, server should deny new lock
> >> 366          */
> >> 367         if (nfsd4_has_session(cstate) &&
> >> 368             !test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE,
> >> 369                       &cstate->session->se_client->cl_flags) &&
> >> 370             open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
> >> 371                 return nfserr_grace;
> >> 
> >> Server-side instrumentation confirms:
> >> 
> >> May  4 11:28:29 klimt kernel: nfsd4_open: has_session returns true
> >> May  4 11:28:29 klimt kernel: nfsd4_open: RECLAIM_COMPLETE is false
> >> May  4 11:28:29 klimt kernel: nfsd4_open: claim_type is 0
> >> 
> >> Network capture shows the RPCs are interleaved between the two
> >> connections as the client establishes its lease, and that appears
> >> to be confusing the server.
> >> 
> >> C1: NULL -> NFS4_OK
> >> C1: EXCHANGE_ID -> NFS4_OK
> >> C2: CREATE_SESSION -> NFS4_OK
> >> C1: RECLAIM_COMPLETE -> NFS4ERR_CONN_NOT_BOUND_TO_SESSION
> > 
> > What security flavors are involved?  I believe the correct behavior
> > depends on whether gss is in use or not.
> 
> The mount options are "sec=sys" but both sides have a keytab.
> So the lease management operations are done with krb5i.

OK.  I'm pretty sure the client needs to send BIND_CONN_TO_SESSION
before step C1.

My memory is that over auth_sys you're allowed to treat any SEQUENCE
over a new connection as implicitly binding that connection to the
referenced session, but over krb5 the server's required to return that
NOT_BOUND error if the server skips the BIND_CONN_TO_SESSION.

I think CREATE_SESSION is allowed as long as the principals agree, and
that's why the call at C2 succeeds.  Seems a little weird, though.

--b.

> 
> 
> > --b.
> > 
> >> C1: PUTROOTFH | GETATTR -> NFS4ERR_SEQ_MISORDERED
> >> C2: SEQUENCE -> NFS4_OK
> >> C1: PUTROOTFH | GETATTR -> NFS4ERR_CONN_NOT_BOUND_TO_SESSION
> >> C1: BIND_CONN_TO_SESSION -> NFS4_OK
> >> C2: BIND_CONN_TO_SESSION -> NFS4_OK
> >> C2: PUTROOTFH | GETATTR -> NFS4ERR_SEQ_MISORDERED
> >> 
> >> .... mix of GETATTRs and other simple requests ....
> >> 
> >> C1: OPEN -> NFS4ERR_GRACE
> >> C2: OPEN -> NFS4ERR_GRACE
> >> 
> >> The RECLAIM_COMPLETE operation failed, and the client does not
> >> retry it. That leaves its lease stuck in GRACE.
> >> 
> >> 
> >> --
> >> Chuck Lever
> >> 
> >> 
> >> 
> >> --
> >> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> >> the body of a message to majordomo@vger.kernel.org
> >> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> --
> Chuck Lever
> 
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC PATCH 2/5] NFS: Add a mount option to specify number of TCP connections to use
  2017-05-04 17:36         ` J. Bruce Fields
@ 2017-05-04 17:38           ` Chuck Lever
  2017-05-04 17:45             ` J. Bruce Fields
  0 siblings, 1 reply; 20+ messages in thread
From: Chuck Lever @ 2017-05-04 17:38 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Trond Myklebust, Linux NFS Mailing List


> On May 4, 2017, at 1:36 PM, bfields@fieldses.org wrote:
> 
> On Thu, May 04, 2017 at 12:01:29PM -0400, Chuck Lever wrote:
>> 
>>> On May 4, 2017, at 9:45 AM, Chuck Lever <chuck.lever@oracle.com> wrote:
>>> 
>>> - Testing with a Linux server shows that the basic NFS/RDMA pieces
>>> work, but any OPEN operation gets NFS4ERR_GRACE, forever, when I use
>>> nconnect > 1. I'm looking into it.
>> 
>> Reproduced with NFSv4.1, TCP, and nconnect=2.
>> 
>> 363         /*
>> 364          * RFC5661 18.51.3
>> 365          * Before RECLAIM_COMPLETE done, server should deny new lock
>> 366          */
>> 367         if (nfsd4_has_session(cstate) &&
>> 368             !test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE,
>> 369                       &cstate->session->se_client->cl_flags) &&
>> 370             open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
>> 371                 return nfserr_grace;
>> 
>> Server-side instrumentation confirms:
>> 
>> May  4 11:28:29 klimt kernel: nfsd4_open: has_session returns true
>> May  4 11:28:29 klimt kernel: nfsd4_open: RECLAIM_COMPLETE is false
>> May  4 11:28:29 klimt kernel: nfsd4_open: claim_type is 0
>> 
>> Network capture shows the RPCs are interleaved between the two
>> connections as the client establishes its lease, and that appears
>> to be confusing the server.
>> 
>> C1: NULL -> NFS4_OK
>> C1: EXCHANGE_ID -> NFS4_OK
>> C2: CREATE_SESSION -> NFS4_OK
>> C1: RECLAIM_COMPLETE -> NFS4ERR_CONN_NOT_BOUND_TO_SESSION
> 
> What security flavors are involved?  I believe the correct behavior
> depends on whether gss is in use or not.

The mount options are "sec=sys" but both sides have a keytab.
So the lease management operations are done with krb5i.


> --b.
> 
>> C1: PUTROOTFH | GETATTR -> NFS4ERR_SEQ_MISORDERED
>> C2: SEQUENCE -> NFS4_OK
>> C1: PUTROOTFH | GETATTR -> NFS4ERR_CONN_NOT_BOUND_TO_SESSION
>> C1: BIND_CONN_TO_SESSION -> NFS4_OK
>> C2: BIND_CONN_TO_SESSION -> NFS4_OK
>> C2: PUTROOTFH | GETATTR -> NFS4ERR_SEQ_MISORDERED
>> 
>> .... mix of GETATTRs and other simple requests ....
>> 
>> C1: OPEN -> NFS4ERR_GRACE
>> C2: OPEN -> NFS4ERR_GRACE
>> 
>> The RECLAIM_COMPLETE operation failed, and the client does not
>> retry it. That leaves its lease stuck in GRACE.
>> 
>> 
>> --
>> Chuck Lever
>> 
>> 
>> 
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
Chuck Lever




^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC PATCH 2/5] NFS: Add a mount option to specify number of TCP connections to use
  2017-05-04 16:01       ` Chuck Lever
@ 2017-05-04 17:36         ` J. Bruce Fields
  2017-05-04 17:38           ` Chuck Lever
  0 siblings, 1 reply; 20+ messages in thread
From: J. Bruce Fields @ 2017-05-04 17:36 UTC (permalink / raw)
  To: Chuck Lever; +Cc: Trond Myklebust, Linux NFS Mailing List

On Thu, May 04, 2017 at 12:01:29PM -0400, Chuck Lever wrote:
> 
> > On May 4, 2017, at 9:45 AM, Chuck Lever <chuck.lever@oracle.com> wrote:
> > 
> > - Testing with a Linux server shows that the basic NFS/RDMA pieces
> > work, but any OPEN operation gets NFS4ERR_GRACE, forever, when I use
> > nconnect > 1. I'm looking into it.
> 
> Reproduced with NFSv4.1, TCP, and nconnect=2.
> 
> 363         /*
> 364          * RFC5661 18.51.3
> 365          * Before RECLAIM_COMPLETE done, server should deny new lock
> 366          */
> 367         if (nfsd4_has_session(cstate) &&
> 368             !test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE,
> 369                       &cstate->session->se_client->cl_flags) &&
> 370             open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
> 371                 return nfserr_grace;
> 
> Server-side instrumentation confirms:
> 
> May  4 11:28:29 klimt kernel: nfsd4_open: has_session returns true
> May  4 11:28:29 klimt kernel: nfsd4_open: RECLAIM_COMPLETE is false
> May  4 11:28:29 klimt kernel: nfsd4_open: claim_type is 0
> 
> Network capture shows the RPCs are interleaved between the two
> connections as the client establishes its lease, and that appears
> to be confusing the server.
> 
> C1: NULL -> NFS4_OK
> C1: EXCHANGE_ID -> NFS4_OK
> C2: CREATE_SESSION -> NFS4_OK
> C1: RECLAIM_COMPLETE -> NFS4ERR_CONN_NOT_BOUND_TO_SESSION

What security flavors are involved?  I believe the correct behavior
depends on whether gss is in use or not.

--b.

> C1: PUTROOTFH | GETATTR -> NFS4ERR_SEQ_MISORDERED
> C2: SEQUENCE -> NFS4_OK
> C1: PUTROOTFH | GETATTR -> NFS4ERR_CONN_NOT_BOUND_TO_SESSION
> C1: BIND_CONN_TO_SESSION -> NFS4_OK
> C2: BIND_CONN_TO_SESSION -> NFS4_OK
> C2: PUTROOTFH | GETATTR -> NFS4ERR_SEQ_MISORDERED
> 
> .... mix of GETATTRs and other simple requests ....
> 
> C1: OPEN -> NFS4ERR_GRACE
> C2: OPEN -> NFS4ERR_GRACE
> 
> The RECLAIM_COMPLETE operation failed, and the client does not
> retry it. That leaves its lease stuck in GRACE.
> 
> 
> --
> Chuck Lever
> 
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC PATCH 2/5] NFS: Add a mount option to specify number of TCP connections to use
  2017-05-04 13:45     ` Chuck Lever
  2017-05-04 13:53       ` Chuck Lever
@ 2017-05-04 16:01       ` Chuck Lever
  2017-05-04 17:36         ` J. Bruce Fields
  1 sibling, 1 reply; 20+ messages in thread
From: Chuck Lever @ 2017-05-04 16:01 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: Linux NFS Mailing List


> On May 4, 2017, at 9:45 AM, Chuck Lever <chuck.lever@oracle.com> wrote:
> 
> - Testing with a Linux server shows that the basic NFS/RDMA pieces
> work, but any OPEN operation gets NFS4ERR_GRACE, forever, when I use
> nconnect > 1. I'm looking into it.

Reproduced with NFSv4.1, TCP, and nconnect=2.

363         /*
364          * RFC5661 18.51.3
365          * Before RECLAIM_COMPLETE done, server should deny new lock
366          */
367         if (nfsd4_has_session(cstate) &&
368             !test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE,
369                       &cstate->session->se_client->cl_flags) &&
370             open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
371                 return nfserr_grace;

Server-side instrumentation confirms:

May  4 11:28:29 klimt kernel: nfsd4_open: has_session returns true
May  4 11:28:29 klimt kernel: nfsd4_open: RECLAIM_COMPLETE is false
May  4 11:28:29 klimt kernel: nfsd4_open: claim_type is 0

Network capture shows the RPCs are interleaved between the two
connections as the client establishes its lease, and that appears
to be confusing the server.

C1: NULL -> NFS4_OK
C1: EXCHANGE_ID -> NFS4_OK
C2: CREATE_SESSION -> NFS4_OK
C1: RECLAIM_COMPLETE -> NFS4ERR_CONN_NOT_BOUND_TO_SESSION
C1: PUTROOTFH | GETATTR -> NFS4ERR_SEQ_MISORDERED
C2: SEQUENCE -> NFS4_OK
C1: PUTROOTFH | GETATTR -> NFS4ERR_CONN_NOT_BOUND_TO_SESSION
C1: BIND_CONN_TO_SESSION -> NFS4_OK
C2: BIND_CONN_TO_SESSION -> NFS4_OK
C2: PUTROOTFH | GETATTR -> NFS4ERR_SEQ_MISORDERED

.... mix of GETATTRs and other simple requests ....

C1: OPEN -> NFS4ERR_GRACE
C2: OPEN -> NFS4ERR_GRACE

The RECLAIM_COMPLETE operation failed, and the client does not
retry it. That leaves its lease stuck in GRACE.


--
Chuck Lever




^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC PATCH 2/5] NFS: Add a mount option to specify number of TCP connections to use
  2017-05-04 13:45     ` Chuck Lever
@ 2017-05-04 13:53       ` Chuck Lever
  2017-05-04 16:01       ` Chuck Lever
  1 sibling, 0 replies; 20+ messages in thread
From: Chuck Lever @ 2017-05-04 13:53 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: Linux NFS Mailing List


> On May 4, 2017, at 9:45 AM, Chuck Lever <chuck.lever@oracle.com> wrote:
> 
> Hi Trond-
> 
> 
>> On Apr 28, 2017, at 1:25 PM, Trond Myklebust <trond.myklebust@primarydata.com> wrote:
>> 
>> Allow the user to specify that the client should use multiple connections
>> to the server. For the moment, this functionality will be limited to
>> TCP and to NFSv4.x (x>0).
> 
> Some initial reactions:
> 
> - 5/5 could be squashed into this patch (2/5).
> 
> - 4/5 adds support for using NFSv3 with a DS. Why can't you add NFSv3
> support for multiple connections in the non-pNFS case? If there is a
> good reason, it should be stated in 4/5's patch description or added
> as a comment somewhere in the source code.
> 
> - Testing with a Linux server shows that the basic NFS/RDMA pieces
> work, but any OPEN operation gets NFS4ERR_GRACE, forever, when I use
> nconnect > 1. I'm looking into it.
> 
> - Testing with a Solaris 12 server prototype that supports NFSv4.1
> works fine with nconnect=[23]. Not seeing much performance improvement
> there because the server is using QDR and a single SATA SSD.
> 
> Thus I don't see a strong need to keep the TCP-only limitation. However,
> if you do keep it, the logic that implements the second sentence in the
> patch description above is added in 3/5. Should this sentence be in that
> patch description instead? Or, instead:
> 
> s/For the moment/In a subsequent patch

Oops, I forgot to mention: mountstats data looks a little confused
when nconnect > 1. For example:

WRITE:
           3075342 ops (131%)
        avg bytes sent per op: 26829    avg bytes received per op: 113
        backlog wait: 162.375128        RTT: 1.481101   total execute time: 163.861735 (milliseconds)

Haven't looked closely at that 131%, but it could be either the kernel
or the script itself is assuming one connection per mount.


>> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
>> ---
>> fs/nfs/internal.h |  1 +
>> fs/nfs/super.c    | 10 ++++++++++
>> 2 files changed, 11 insertions(+)
>> 
>> diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
>> index 31b26cf1b476..31757a742e9b 100644
>> --- a/fs/nfs/internal.h
>> +++ b/fs/nfs/internal.h
>> @@ -117,6 +117,7 @@ struct nfs_parsed_mount_data {
>> 		char			*export_path;
>> 		int			port;
>> 		unsigned short		protocol;
>> +		unsigned short		nconnect;
>> 	} nfs_server;
>> 
>> 	struct security_mnt_opts lsm_opts;
>> diff --git a/fs/nfs/super.c b/fs/nfs/super.c
>> index 54e0f9f2dd94..7eb48934dc79 100644
>> --- a/fs/nfs/super.c
>> +++ b/fs/nfs/super.c
>> @@ -76,6 +76,8 @@
>> #define NFS_DEFAULT_VERSION 2
>> #endif
>> 
>> +#define NFS_MAX_CONNECTIONS 16
>> +
>> enum {
>> 	/* Mount options that take no arguments */
>> 	Opt_soft, Opt_hard,
>> @@ -107,6 +109,7 @@ enum {
>> 	Opt_nfsvers,
>> 	Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost,
>> 	Opt_addr, Opt_mountaddr, Opt_clientaddr,
>> +	Opt_nconnect,
>> 	Opt_lookupcache,
>> 	Opt_fscache_uniq,
>> 	Opt_local_lock,
>> @@ -179,6 +182,8 @@ static const match_table_t nfs_mount_option_tokens = {
>> 	{ Opt_mounthost, "mounthost=%s" },
>> 	{ Opt_mountaddr, "mountaddr=%s" },
>> 
>> +	{ Opt_nconnect, "nconnect=%s" },
>> +
>> 	{ Opt_lookupcache, "lookupcache=%s" },
>> 	{ Opt_fscache_uniq, "fsc=%s" },
>> 	{ Opt_local_lock, "local_lock=%s" },
>> @@ -1544,6 +1549,11 @@ static int nfs_parse_mount_options(char *raw,
>> 			if (mnt->mount_server.addrlen == 0)
>> 				goto out_invalid_address;
>> 			break;
>> +		case Opt_nconnect:
>> +			if (nfs_get_option_ul_bound(args, &option, 1, NFS_MAX_CONNECTIONS))
>> +				goto out_invalid_value;
>> +			mnt->nfs_server.nconnect = option;
>> +			break;
>> 		case Opt_lookupcache:
>> 			string = match_strdup(args);
>> 			if (string == NULL)
>> -- 
>> 2.9.3
>> 
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> --
> Chuck Lever
> 
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
Chuck Lever




^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC PATCH 2/5] NFS: Add a mount option to specify number of TCP connections to use
  2017-04-28 17:25   ` [RFC PATCH 2/5] NFS: Add a mount option to specify number of TCP connections to use Trond Myklebust
@ 2017-05-04 13:45     ` Chuck Lever
  2017-05-04 13:53       ` Chuck Lever
  2017-05-04 16:01       ` Chuck Lever
  0 siblings, 2 replies; 20+ messages in thread
From: Chuck Lever @ 2017-05-04 13:45 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: Linux NFS Mailing List

Hi Trond-


> On Apr 28, 2017, at 1:25 PM, Trond Myklebust <trond.myklebust@primarydata.com> wrote:
> 
> Allow the user to specify that the client should use multiple connections
> to the server. For the moment, this functionality will be limited to
> TCP and to NFSv4.x (x>0).

Some initial reactions:

- 5/5 could be squashed into this patch (2/5).

- 4/5 adds support for using NFSv3 with a DS. Why can't you add NFSv3
support for multiple connections in the non-pNFS case? If there is a
good reason, it should be stated in 4/5's patch description or added
as a comment somewhere in the source code.

- Testing with a Linux server shows that the basic NFS/RDMA pieces
work, but any OPEN operation gets NFS4ERR_GRACE, forever, when I use
nconnect > 1. I'm looking into it.

- Testing with a Solaris 12 server prototype that supports NFSv4.1
works fine with nconnect=[23]. Not seeing much performance improvement
there because the server is using QDR and a single SATA SSD.

Thus I don't see a strong need to keep the TCP-only limitation. However,
if you do keep it, the logic that implements the second sentence in the
patch description above is added in 3/5. Should this sentence be in that
patch description instead? Or, instead:

s/For the moment/In a subsequent patch


> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
> ---
> fs/nfs/internal.h |  1 +
> fs/nfs/super.c    | 10 ++++++++++
> 2 files changed, 11 insertions(+)
> 
> diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
> index 31b26cf1b476..31757a742e9b 100644
> --- a/fs/nfs/internal.h
> +++ b/fs/nfs/internal.h
> @@ -117,6 +117,7 @@ struct nfs_parsed_mount_data {
> 		char			*export_path;
> 		int			port;
> 		unsigned short		protocol;
> +		unsigned short		nconnect;
> 	} nfs_server;
> 
> 	struct security_mnt_opts lsm_opts;
> diff --git a/fs/nfs/super.c b/fs/nfs/super.c
> index 54e0f9f2dd94..7eb48934dc79 100644
> --- a/fs/nfs/super.c
> +++ b/fs/nfs/super.c
> @@ -76,6 +76,8 @@
> #define NFS_DEFAULT_VERSION 2
> #endif
> 
> +#define NFS_MAX_CONNECTIONS 16
> +
> enum {
> 	/* Mount options that take no arguments */
> 	Opt_soft, Opt_hard,
> @@ -107,6 +109,7 @@ enum {
> 	Opt_nfsvers,
> 	Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost,
> 	Opt_addr, Opt_mountaddr, Opt_clientaddr,
> +	Opt_nconnect,
> 	Opt_lookupcache,
> 	Opt_fscache_uniq,
> 	Opt_local_lock,
> @@ -179,6 +182,8 @@ static const match_table_t nfs_mount_option_tokens = {
> 	{ Opt_mounthost, "mounthost=%s" },
> 	{ Opt_mountaddr, "mountaddr=%s" },
> 
> +	{ Opt_nconnect, "nconnect=%s" },
> +
> 	{ Opt_lookupcache, "lookupcache=%s" },
> 	{ Opt_fscache_uniq, "fsc=%s" },
> 	{ Opt_local_lock, "local_lock=%s" },
> @@ -1544,6 +1549,11 @@ static int nfs_parse_mount_options(char *raw,
> 			if (mnt->mount_server.addrlen == 0)
> 				goto out_invalid_address;
> 			break;
> +		case Opt_nconnect:
> +			if (nfs_get_option_ul_bound(args, &option, 1, NFS_MAX_CONNECTIONS))
> +				goto out_invalid_value;
> +			mnt->nfs_server.nconnect = option;
> +			break;
> 		case Opt_lookupcache:
> 			string = match_strdup(args);
> 			if (string == NULL)
> -- 
> 2.9.3
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
Chuck Lever




^ permalink raw reply	[flat|nested] 20+ messages in thread

* [RFC PATCH 2/5] NFS: Add a mount option to specify number of TCP connections to use
  2017-04-28 17:25 ` [RFC PATCH 1/5] SUNRPC: Allow creation of RPC clients with multiple connections Trond Myklebust
@ 2017-04-28 17:25   ` Trond Myklebust
  2017-05-04 13:45     ` Chuck Lever
  0 siblings, 1 reply; 20+ messages in thread
From: Trond Myklebust @ 2017-04-28 17:25 UTC (permalink / raw)
  To: linux-nfs

Allow the user to specify that the client should use multiple connections
to the server. For the moment, this functionality will be limited to
TCP and to NFSv4.x (x>0).

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/internal.h |  1 +
 fs/nfs/super.c    | 10 ++++++++++
 2 files changed, 11 insertions(+)

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 31b26cf1b476..31757a742e9b 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -117,6 +117,7 @@ struct nfs_parsed_mount_data {
 		char			*export_path;
 		int			port;
 		unsigned short		protocol;
+		unsigned short		nconnect;
 	} nfs_server;
 
 	struct security_mnt_opts lsm_opts;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 54e0f9f2dd94..7eb48934dc79 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -76,6 +76,8 @@
 #define NFS_DEFAULT_VERSION 2
 #endif
 
+#define NFS_MAX_CONNECTIONS 16
+
 enum {
 	/* Mount options that take no arguments */
 	Opt_soft, Opt_hard,
@@ -107,6 +109,7 @@ enum {
 	Opt_nfsvers,
 	Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost,
 	Opt_addr, Opt_mountaddr, Opt_clientaddr,
+	Opt_nconnect,
 	Opt_lookupcache,
 	Opt_fscache_uniq,
 	Opt_local_lock,
@@ -179,6 +182,8 @@ static const match_table_t nfs_mount_option_tokens = {
 	{ Opt_mounthost, "mounthost=%s" },
 	{ Opt_mountaddr, "mountaddr=%s" },
 
+	{ Opt_nconnect, "nconnect=%s" },
+
 	{ Opt_lookupcache, "lookupcache=%s" },
 	{ Opt_fscache_uniq, "fsc=%s" },
 	{ Opt_local_lock, "local_lock=%s" },
@@ -1544,6 +1549,11 @@ static int nfs_parse_mount_options(char *raw,
 			if (mnt->mount_server.addrlen == 0)
 				goto out_invalid_address;
 			break;
+		case Opt_nconnect:
+			if (nfs_get_option_ul_bound(args, &option, 1, NFS_MAX_CONNECTIONS))
+				goto out_invalid_value;
+			mnt->nfs_server.nconnect = option;
+			break;
 		case Opt_lookupcache:
 			string = match_strdup(args);
 			if (string == NULL)
-- 
2.9.3


^ permalink raw reply related	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2017-05-04 20:42 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-02 16:38 [PATCH 0/3] Fix up a couple of issues around layout handling Trond Myklebust
2017-05-02 16:38 ` [RFC PATCH 1/5] SUNRPC: Allow creation of RPC clients with multiple connections Trond Myklebust
2017-05-02 16:38   ` [PATCH 1/3] pNFS: Don't clear the layout return info if there are segments to return Trond Myklebust
2017-05-02 16:38     ` [RFC PATCH 2/5] NFS: Add a mount option to specify number of TCP connections to use Trond Myklebust
2017-05-02 16:38       ` [PATCH 2/3] pNFS: Fix a deadlock when coalescing writes and returning the layout Trond Myklebust
2017-05-02 16:38         ` [RFC PATCH 3/5] NFSv4: Allow multiple connections to NFSv4.x (x>0) servers Trond Myklebust
2017-05-02 16:38           ` [PATCH 3/3] pNFS: Fix a typo in pnfs_generic_alloc_ds_commits Trond Myklebust
2017-05-02 16:38             ` [RFC PATCH 4/5] pNFS: Allow multiple connections to the DS Trond Myklebust
2017-05-02 16:38               ` [RFC PATCH 5/5] NFS: Display the "nconnect" mount option if it is set Trond Myklebust
  -- strict thread matches above, loose matches on Subject: below --
2017-04-28 17:25 [RFC PATCH 0/5] Fun with the multipathing code Trond Myklebust
2017-04-28 17:25 ` [RFC PATCH 1/5] SUNRPC: Allow creation of RPC clients with multiple connections Trond Myklebust
2017-04-28 17:25   ` [RFC PATCH 2/5] NFS: Add a mount option to specify number of TCP connections to use Trond Myklebust
2017-05-04 13:45     ` Chuck Lever
2017-05-04 13:53       ` Chuck Lever
2017-05-04 16:01       ` Chuck Lever
2017-05-04 17:36         ` J. Bruce Fields
2017-05-04 17:38           ` Chuck Lever
2017-05-04 17:45             ` J. Bruce Fields
2017-05-04 18:55               ` Chuck Lever
2017-05-04 19:58                 ` J. Bruce Fields
2017-05-04 20:40               ` Trond Myklebust
2017-05-04 20:42                 ` bfields

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).