linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data channels
@ 2020-11-10 23:18 trondmy
  2020-11-10 23:18 ` [PATCH v3 01/11] SUNRPC: xprt_load_transport() needs to support the netid "rdma6" trondmy
  2020-11-10 23:42 ` [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data channels Trond Myklebust
  0 siblings, 2 replies; 23+ messages in thread
From: trondmy @ 2020-11-10 23:18 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

Add support for connecting to the pNFS files/flexfiles data servers
through RDMA, assuming that the GETDEVICEINFO call advertises that
support.

v2: Fix layoutstats encoding for pNFS/flexfiles.
v3: Move most of the netid handling into the SUNRPC and RDMA modules.
    Fix up the mount code to benefit more from automated loading of
    SUNRPC transport modules.

Trond Myklebust (11):
  SUNRPC: xprt_load_transport() needs to support the netid "rdma6"
  SUNRPC: Close a race with transport setup and module put
  SUNRPC: Add a helper to return the transport identifier given a netid
  NFS: Switch mount code to use xprt_find_transport_ident()
  SUNRPC: Remove unused function xprt_load_transport()
  NFSv4/pNFS: Use connections to a DS that are all of the same protocol
    family
  pNFS: Add helpers for allocation/free of struct nfs4_pnfs_ds_addr
  NFSv4/pNFS: Store the transport type in struct nfs4_pnfs_ds_addr
  pNFS/flexfiles: Fix up layoutstats reporting for non-TCP transports
  SUNRPC: Fix up open coded kmemdup_nul()
  pNFS: Clean up open coded xdr string decoding

 fs/nfs/flexfilelayout/flexfilelayout.c |   9 +-
 fs/nfs/fs_context.c                    |  21 +++--
 fs/nfs/pnfs.h                          |   2 +
 fs/nfs/pnfs_nfs.c                      | 103 ++++++++++------------
 include/linux/sunrpc/xprt.h            |   3 +-
 net/sunrpc/xdr.c                       |   4 +-
 net/sunrpc/xprt.c                      | 117 ++++++++++++++++++-------
 net/sunrpc/xprtrdma/module.c           |   1 +
 net/sunrpc/xprtrdma/transport.c        |   1 +
 net/sunrpc/xprtsock.c                  |   4 +
 10 files changed, 159 insertions(+), 106 deletions(-)

-- 
2.28.0


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH v3 01/11] SUNRPC: xprt_load_transport() needs to support the netid "rdma6"
  2020-11-10 23:18 [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data channels trondmy
@ 2020-11-10 23:18 ` trondmy
  2020-11-10 23:18   ` [PATCH v3 02/11] SUNRPC: Close a race with transport setup and module put trondmy
  2020-11-10 23:42 ` [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data channels Trond Myklebust
  1 sibling, 1 reply; 23+ messages in thread
From: trondmy @ 2020-11-10 23:18 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

According to RFC5666, the correct netid for an IPv6 addressed RDMA
transport is "rdma6", which we've supported as a mount option since
Linux-4.7. The problem is when we try to load the module "xprtrdma6",
that will fail, since there is no modulealias of that name.

Fixes: 181342c5ebe8 ("xprtrdma: Add rdma6 option to support NFS/RDMA IPv6")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xprt.h     |  1 +
 net/sunrpc/xprt.c               | 65 +++++++++++++++++++++++++--------
 net/sunrpc/xprtrdma/module.c    |  1 +
 net/sunrpc/xprtrdma/transport.c |  1 +
 net/sunrpc/xprtsock.c           |  4 ++
 5 files changed, 56 insertions(+), 16 deletions(-)

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index a603d48d2b2c..3ac5037d1c3d 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -330,6 +330,7 @@ struct xprt_class {
 	struct rpc_xprt *	(*setup)(struct xprt_create *);
 	struct module		*owner;
 	char			name[32];
+	const char *		netid[];
 };
 
 /*
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index f6c17e75f20e..1660a4b03352 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -151,31 +151,64 @@ int xprt_unregister_transport(struct xprt_class *transport)
 }
 EXPORT_SYMBOL_GPL(xprt_unregister_transport);
 
+static void
+xprt_class_release(const struct xprt_class *t)
+{
+	module_put(t->owner);
+}
+
+static const struct xprt_class *
+xprt_class_find_by_netid_locked(const char *netid)
+{
+	const struct xprt_class *t;
+	unsigned int i;
+
+	list_for_each_entry(t, &xprt_list, list) {
+		for (i = 0; *t->netid[i] != '\0'; i++) {
+			if (strcmp(t->netid[i], netid) != 0)
+				continue;
+			if (!try_module_get(t->owner))
+				continue;
+			return t;
+		}
+	}
+	return NULL;
+}
+
+static const struct xprt_class *
+xprt_class_find_by_netid(const char *netid)
+{
+	const struct xprt_class *t;
+
+	spin_lock(&xprt_list_lock);
+	t = xprt_class_find_by_netid_locked(netid);
+	if (!t) {
+		spin_unlock(&xprt_list_lock);
+		request_module("rpc%s", netid);
+		spin_lock(&xprt_list_lock);
+		t = xprt_class_find_by_netid_locked(netid);
+	}
+	spin_unlock(&xprt_list_lock);
+	return t;
+}
+
 /**
  * xprt_load_transport - load a transport implementation
- * @transport_name: transport to load
+ * @netid: transport to load
  *
  * Returns:
  * 0:		transport successfully loaded
  * -ENOENT:	transport module not available
  */
-int xprt_load_transport(const char *transport_name)
+int xprt_load_transport(const char *netid)
 {
-	struct xprt_class *t;
-	int result;
+	const struct xprt_class *t;
 
-	result = 0;
-	spin_lock(&xprt_list_lock);
-	list_for_each_entry(t, &xprt_list, list) {
-		if (strcmp(t->name, transport_name) == 0) {
-			spin_unlock(&xprt_list_lock);
-			goto out;
-		}
-	}
-	spin_unlock(&xprt_list_lock);
-	result = request_module("xprt%s", transport_name);
-out:
-	return result;
+	t = xprt_class_find_by_netid(netid);
+	if (!t)
+		return -ENOENT;
+	xprt_class_release(t);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(xprt_load_transport);
 
diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c
index 620327c01302..45c5b41ac8dc 100644
--- a/net/sunrpc/xprtrdma/module.c
+++ b/net/sunrpc/xprtrdma/module.c
@@ -24,6 +24,7 @@ MODULE_DESCRIPTION("RPC/RDMA Transport");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_ALIAS("svcrdma");
 MODULE_ALIAS("xprtrdma");
+MODULE_ALIAS("rpcrdma6");
 
 static void __exit rpc_rdma_cleanup(void)
 {
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 8915e42240d3..035060c05fd5 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -768,6 +768,7 @@ static struct xprt_class xprt_rdma = {
 	.owner			= THIS_MODULE,
 	.ident			= XPRT_TRANSPORT_RDMA,
 	.setup			= xprt_setup_rdma,
+	.netid			= { "rdma", "rdma6", "" },
 };
 
 void xprt_rdma_cleanup(void)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 7090bbee0ec5..c93ff70da3f9 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -3059,6 +3059,7 @@ static struct xprt_class	xs_local_transport = {
 	.owner		= THIS_MODULE,
 	.ident		= XPRT_TRANSPORT_LOCAL,
 	.setup		= xs_setup_local,
+	.netid		= { "" },
 };
 
 static struct xprt_class	xs_udp_transport = {
@@ -3067,6 +3068,7 @@ static struct xprt_class	xs_udp_transport = {
 	.owner		= THIS_MODULE,
 	.ident		= XPRT_TRANSPORT_UDP,
 	.setup		= xs_setup_udp,
+	.netid		= { "udp", "udp6", "" },
 };
 
 static struct xprt_class	xs_tcp_transport = {
@@ -3075,6 +3077,7 @@ static struct xprt_class	xs_tcp_transport = {
 	.owner		= THIS_MODULE,
 	.ident		= XPRT_TRANSPORT_TCP,
 	.setup		= xs_setup_tcp,
+	.netid		= { "tcp", "tcp6", "" },
 };
 
 static struct xprt_class	xs_bc_tcp_transport = {
@@ -3083,6 +3086,7 @@ static struct xprt_class	xs_bc_tcp_transport = {
 	.owner		= THIS_MODULE,
 	.ident		= XPRT_TRANSPORT_BC_TCP,
 	.setup		= xs_setup_bc_tcp,
+	.netid		= { "" },
 };
 
 /**
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v3 02/11] SUNRPC: Close a race with transport setup and module put
  2020-11-10 23:18 ` [PATCH v3 01/11] SUNRPC: xprt_load_transport() needs to support the netid "rdma6" trondmy
@ 2020-11-10 23:18   ` trondmy
  2020-11-10 23:18     ` [PATCH v3 03/11] SUNRPC: Add a helper to return the transport identifier given a netid trondmy
  0 siblings, 1 reply; 23+ messages in thread
From: trondmy @ 2020-11-10 23:18 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

After we've looked up the transport module, we need to ensure it can't
go away until we've finished running the transport setup code.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 net/sunrpc/xprt.c | 44 +++++++++++++++++++++++++++++++++-----------
 1 file changed, 33 insertions(+), 11 deletions(-)

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 1660a4b03352..29de33ea53d6 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -157,6 +157,32 @@ xprt_class_release(const struct xprt_class *t)
 	module_put(t->owner);
 }
 
+static const struct xprt_class *
+xprt_class_find_by_ident_locked(int ident)
+{
+	const struct xprt_class *t;
+
+	list_for_each_entry(t, &xprt_list, list) {
+		if (t->ident != ident)
+			continue;
+		if (!try_module_get(t->owner))
+			continue;
+		return t;
+	}
+	return NULL;
+}
+
+static const struct xprt_class *
+xprt_class_find_by_ident(int ident)
+{
+	const struct xprt_class *t;
+
+	spin_lock(&xprt_list_lock);
+	t = xprt_class_find_by_ident_locked(ident);
+	spin_unlock(&xprt_list_lock);
+	return t;
+}
+
 static const struct xprt_class *
 xprt_class_find_by_netid_locked(const char *netid)
 {
@@ -1929,21 +1955,17 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net)
 struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
 {
 	struct rpc_xprt	*xprt;
-	struct xprt_class *t;
+	const struct xprt_class *t;
 
-	spin_lock(&xprt_list_lock);
-	list_for_each_entry(t, &xprt_list, list) {
-		if (t->ident == args->ident) {
-			spin_unlock(&xprt_list_lock);
-			goto found;
-		}
+	t = xprt_class_find_by_ident(args->ident);
+	if (!t) {
+		dprintk("RPC: transport (%d) not supported\n", args->ident);
+		return ERR_PTR(-EIO);
 	}
-	spin_unlock(&xprt_list_lock);
-	dprintk("RPC: transport (%d) not supported\n", args->ident);
-	return ERR_PTR(-EIO);
 
-found:
 	xprt = t->setup(args);
+	xprt_class_release(t);
+
 	if (IS_ERR(xprt))
 		goto out;
 	if (args->flags & XPRT_CREATE_NO_IDLE_TIMEOUT)
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v3 03/11] SUNRPC: Add a helper to return the transport identifier given a netid
  2020-11-10 23:18   ` [PATCH v3 02/11] SUNRPC: Close a race with transport setup and module put trondmy
@ 2020-11-10 23:18     ` trondmy
  2020-11-10 23:18       ` [PATCH v3 04/11] NFS: Switch mount code to use xprt_find_transport_ident() trondmy
  0 siblings, 1 reply; 23+ messages in thread
From: trondmy @ 2020-11-10 23:18 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xprt.h |  1 +
 net/sunrpc/xprt.c           | 25 +++++++++++++++++++++----
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 3ac5037d1c3d..f7b75c72f80e 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -386,6 +386,7 @@ xprt_disable_swap(struct rpc_xprt *xprt)
 int			xprt_register_transport(struct xprt_class *type);
 int			xprt_unregister_transport(struct xprt_class *type);
 int			xprt_load_transport(const char *);
+int			xprt_find_transport_ident(const char *);
 void			xprt_wait_for_reply_request_def(struct rpc_task *task);
 void			xprt_wait_for_reply_request_rtt(struct rpc_task *task);
 void			xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 29de33ea53d6..1016265d5e53 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -219,22 +219,39 @@ xprt_class_find_by_netid(const char *netid)
 }
 
 /**
- * xprt_load_transport - load a transport implementation
+ * xprt_find_transport_ident - convert a netid into a transport identifier
  * @netid: transport to load
  *
  * Returns:
- * 0:		transport successfully loaded
+ * > 0:		transport identifier
  * -ENOENT:	transport module not available
  */
-int xprt_load_transport(const char *netid)
+int xprt_find_transport_ident(const char *netid)
 {
 	const struct xprt_class *t;
+	int ret;
 
 	t = xprt_class_find_by_netid(netid);
 	if (!t)
 		return -ENOENT;
+	ret = t->ident;
 	xprt_class_release(t);
-	return 0;
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xprt_find_transport_ident);
+
+/**
+ * xprt_load_transport - load a transport implementation
+ * @netid: transport to load
+ *
+ * Returns:
+ * 0:		transport successfully loaded
+ * -ENOENT:	transport module not available
+ */
+int xprt_load_transport(const char *netid)
+{
+	int ret = xprt_find_transport_ident(netid);
+	return ret < 0 ? ret : 0;
 }
 EXPORT_SYMBOL_GPL(xprt_load_transport);
 
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v3 04/11] NFS: Switch mount code to use xprt_find_transport_ident()
  2020-11-10 23:18     ` [PATCH v3 03/11] SUNRPC: Add a helper to return the transport identifier given a netid trondmy
@ 2020-11-10 23:18       ` trondmy
  2020-11-10 23:19         ` [PATCH v3 05/11] SUNRPC: Remove unused function xprt_load_transport() trondmy
  0 siblings, 1 reply; 23+ messages in thread
From: trondmy @ 2020-11-10 23:18 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

Switch the mount code to use xprt_find_transport_ident() and to check
the results before allowing the mount to proceed.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/fs_context.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index 29ec8b09a52d..06894bcdea2d 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -510,13 +510,12 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
 		ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP;
 		break;
 	case Opt_tcp:
-		ctx->flags |= NFS_MOUNT_TCP;
-		ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP;
-		break;
 	case Opt_rdma:
 		ctx->flags |= NFS_MOUNT_TCP; /* for side protocols */
-		ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
-		xprt_load_transport(param->key);
+		ret = xprt_find_transport_ident(param->key);
+		if (ret < 0)
+			goto out_bad_transport;
+		ctx->nfs_server.protocol = ret;
 		break;
 	case Opt_acl:
 		if (result.negated)
@@ -670,11 +669,13 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
 		case Opt_xprt_rdma:
 			/* vector side protocols to TCP */
 			ctx->flags |= NFS_MOUNT_TCP;
-			ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
-			xprt_load_transport(param->string);
+			ret = xprt_find_transport_ident(param->string);
+			if (ret < 0)
+				goto out_bad_transport;
+			ctx->nfs_server.protocol = ret;
 			break;
 		default:
-			return nfs_invalf(fc, "NFS: Unrecognized transport protocol");
+			goto out_bad_transport;
 		}
 
 		ctx->protofamily = protofamily;
@@ -697,7 +698,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
 			break;
 		case Opt_xprt_rdma: /* not used for side protocols */
 		default:
-			return nfs_invalf(fc, "NFS: Unrecognized transport protocol");
+			goto out_bad_transport;
 		}
 		ctx->mountfamily = mountfamily;
 		break;
@@ -787,6 +788,8 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
 	return nfs_invalf(fc, "NFS: Bad IP address specified");
 out_of_bounds:
 	return nfs_invalf(fc, "NFS: Value for '%s' out of range", param->key);
+out_bad_transport:
+	return nfs_invalf(fc, "NFS: Unrecognized transport protocol");
 }
 
 /*
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v3 05/11] SUNRPC: Remove unused function xprt_load_transport()
  2020-11-10 23:18       ` [PATCH v3 04/11] NFS: Switch mount code to use xprt_find_transport_ident() trondmy
@ 2020-11-10 23:19         ` trondmy
  2020-11-10 23:19           ` [PATCH v3 06/11] NFSv4/pNFS: Use connections to a DS that are all of the same protocol family trondmy
  0 siblings, 1 reply; 23+ messages in thread
From: trondmy @ 2020-11-10 23:19 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xprt.h |  1 -
 net/sunrpc/xprt.c           | 15 ---------------
 2 files changed, 16 deletions(-)

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index f7b75c72f80e..d2e97ee802af 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -385,7 +385,6 @@ xprt_disable_swap(struct rpc_xprt *xprt)
  */
 int			xprt_register_transport(struct xprt_class *type);
 int			xprt_unregister_transport(struct xprt_class *type);
-int			xprt_load_transport(const char *);
 int			xprt_find_transport_ident(const char *);
 void			xprt_wait_for_reply_request_def(struct rpc_task *task);
 void			xprt_wait_for_reply_request_rtt(struct rpc_task *task);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 1016265d5e53..9018d73575d6 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -240,21 +240,6 @@ int xprt_find_transport_ident(const char *netid)
 }
 EXPORT_SYMBOL_GPL(xprt_find_transport_ident);
 
-/**
- * xprt_load_transport - load a transport implementation
- * @netid: transport to load
- *
- * Returns:
- * 0:		transport successfully loaded
- * -ENOENT:	transport module not available
- */
-int xprt_load_transport(const char *netid)
-{
-	int ret = xprt_find_transport_ident(netid);
-	return ret < 0 ? ret : 0;
-}
-EXPORT_SYMBOL_GPL(xprt_load_transport);
-
 static void xprt_clear_locked(struct rpc_xprt *xprt)
 {
 	xprt->snd_task = NULL;
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v3 06/11] NFSv4/pNFS: Use connections to a DS that are all of the same protocol family
  2020-11-10 23:19         ` [PATCH v3 05/11] SUNRPC: Remove unused function xprt_load_transport() trondmy
@ 2020-11-10 23:19           ` trondmy
  2020-11-10 23:19             ` [PATCH v3 07/11] pNFS: Add helpers for allocation/free of struct nfs4_pnfs_ds_addr trondmy
  0 siblings, 1 reply; 23+ messages in thread
From: trondmy @ 2020-11-10 23:19 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

If the pNFS metadata server advertises multiple addresses for the same
data server, we should try to connect to just one protocol family and
transport type on the assumption that homogeneity will improve performance.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/pnfs_nfs.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 679767ac258d..7027dac41cc7 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -860,6 +860,9 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
 				.addrlen = da->da_addrlen,
 				.servername = clp->cl_hostname,
 			};
+
+			if (da->da_addr.ss_family != clp->cl_addr.ss_family)
+				continue;
 			/* Add this address as an alias */
 			rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
 					rpc_clnt_test_and_add_xprt, NULL);
@@ -920,6 +923,8 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
 				.data = &xprtdata,
 			};
 
+			if (da->da_addr.ss_family != clp->cl_addr.ss_family)
+				continue;
 			/**
 			* Test this address for session trunking and
 			* add as an alias
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v3 07/11] pNFS: Add helpers for allocation/free of struct nfs4_pnfs_ds_addr
  2020-11-10 23:19           ` [PATCH v3 06/11] NFSv4/pNFS: Use connections to a DS that are all of the same protocol family trondmy
@ 2020-11-10 23:19             ` trondmy
  2020-11-10 23:19               ` [PATCH v3 08/11] NFSv4/pNFS: Store the transport type in " trondmy
  0 siblings, 1 reply; 23+ messages in thread
From: trondmy @ 2020-11-10 23:19 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/pnfs_nfs.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 7027dac41cc7..8c5921be41f8 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -661,6 +661,20 @@ _data_server_lookup_locked(const struct list_head *dsaddrs)
 	return NULL;
 }
 
+static struct nfs4_pnfs_ds_addr *nfs4_pnfs_ds_addr_alloc(gfp_t gfp_flags)
+{
+	struct nfs4_pnfs_ds_addr *da = kzalloc(sizeof(*da), gfp_flags);
+	if (da)
+		INIT_LIST_HEAD(&da->da_node);
+	return da;
+}
+
+static void nfs4_pnfs_ds_addr_free(struct nfs4_pnfs_ds_addr *da)
+{
+	kfree(da->da_remotestr);
+	kfree(da);
+}
+
 static void destroy_ds(struct nfs4_pnfs_ds *ds)
 {
 	struct nfs4_pnfs_ds_addr *da;
@@ -676,8 +690,7 @@ static void destroy_ds(struct nfs4_pnfs_ds *ds)
 				      struct nfs4_pnfs_ds_addr,
 				      da_node);
 		list_del_init(&da->da_node);
-		kfree(da->da_remotestr);
-		kfree(da);
+		nfs4_pnfs_ds_addr_free(da);
 	}
 
 	kfree(ds->ds_remotestr);
@@ -1094,12 +1107,10 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
 	}
 	*portstr = '\0';
 
-	da = kzalloc(sizeof(*da), gfp_flags);
+	da = nfs4_pnfs_ds_addr_alloc(gfp_flags);
 	if (unlikely(!da))
 		goto out_free_buf;
 
-	INIT_LIST_HEAD(&da->da_node);
-
 	if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr,
 		      sizeof(da->da_addr))) {
 		dprintk("%s: error parsing address %s\n", __func__, buf);
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v3 08/11] NFSv4/pNFS: Store the transport type in struct nfs4_pnfs_ds_addr
  2020-11-10 23:19             ` [PATCH v3 07/11] pNFS: Add helpers for allocation/free of struct nfs4_pnfs_ds_addr trondmy
@ 2020-11-10 23:19               ` trondmy
  2020-11-10 23:19                 ` [PATCH v3 09/11] pNFS/flexfiles: Fix up layoutstats reporting for non-TCP transports trondmy
  0 siblings, 1 reply; 23+ messages in thread
From: trondmy @ 2020-11-10 23:19 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

We want to enable RDMA and UDP as valid transport methods if a
GETDEVICEINFO call specifies it. Do so by adding a parser for the
netid that translates it to an appropriate argument for the RPC
transport layer.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/pnfs.h     |  2 ++
 fs/nfs/pnfs_nfs.c | 34 +++++++++++++++++++---------------
 2 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 2661c44c62db..f618c49697bb 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -51,6 +51,8 @@ struct nfs4_pnfs_ds_addr {
 	size_t			da_addrlen;
 	struct list_head	da_node;  /* nfs4_pnfs_dev_hlist dev_dslist */
 	char			*da_remotestr;	/* human readable addr+port */
+	const char		*da_netid;
+	int			da_transport;
 };
 
 struct nfs4_pnfs_ds {
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 8c5921be41f8..b96ecb395310 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -672,6 +672,7 @@ static struct nfs4_pnfs_ds_addr *nfs4_pnfs_ds_addr_alloc(gfp_t gfp_flags)
 static void nfs4_pnfs_ds_addr_free(struct nfs4_pnfs_ds_addr *da)
 {
 	kfree(da->da_remotestr);
+	kfree(da->da_netid);
 	kfree(da);
 }
 
@@ -867,13 +868,15 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
 
 		if (!IS_ERR(clp)) {
 			struct xprt_create xprt_args = {
-				.ident = XPRT_TRANSPORT_TCP,
+				.ident = da->da_transport,
 				.net = clp->cl_net,
 				.dstaddr = (struct sockaddr *)&da->da_addr,
 				.addrlen = da->da_addrlen,
 				.servername = clp->cl_hostname,
 			};
 
+			if (da->da_transport != clp->cl_proto)
+				continue;
 			if (da->da_addr.ss_family != clp->cl_addr.ss_family)
 				continue;
 			/* Add this address as an alias */
@@ -883,7 +886,7 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
 		}
 		clp = get_v3_ds_connect(mds_srv,
 				(struct sockaddr *)&da->da_addr,
-				da->da_addrlen, IPPROTO_TCP,
+				da->da_addrlen, da->da_transport,
 				timeo, retrans);
 		if (IS_ERR(clp))
 			continue;
@@ -921,7 +924,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
 
 		if (!IS_ERR(clp) && clp->cl_mvops->session_trunk) {
 			struct xprt_create xprt_args = {
-				.ident = XPRT_TRANSPORT_TCP,
+				.ident = da->da_transport,
 				.net = clp->cl_net,
 				.dstaddr = (struct sockaddr *)&da->da_addr,
 				.addrlen = da->da_addrlen,
@@ -936,6 +939,8 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
 				.data = &xprtdata,
 			};
 
+			if (da->da_transport != clp->cl_proto)
+				continue;
 			if (da->da_addr.ss_family != clp->cl_addr.ss_family)
 				continue;
 			/**
@@ -950,8 +955,9 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
 		} else {
 			clp = nfs4_set_ds_client(mds_srv,
 						(struct sockaddr *)&da->da_addr,
-						da->da_addrlen, IPPROTO_TCP,
-						timeo, retrans, minor_version);
+						da->da_addrlen,
+						da->da_transport, timeo,
+						retrans, minor_version);
 			if (IS_ERR(clp))
 				continue;
 
@@ -1042,8 +1048,8 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
 	int nlen, rlen;
 	int tmp[2];
 	__be32 *p;
-	char *netid, *match_netid;
-	size_t len, match_netid_len;
+	char *netid;
+	size_t len;
 	char *startsep = "";
 	char *endsep = "";
 
@@ -1125,15 +1131,11 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
 	case AF_INET:
 		((struct sockaddr_in *)&da->da_addr)->sin_port = port;
 		da->da_addrlen = sizeof(struct sockaddr_in);
-		match_netid = "tcp";
-		match_netid_len = 3;
 		break;
 
 	case AF_INET6:
 		((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port;
 		da->da_addrlen = sizeof(struct sockaddr_in6);
-		match_netid = "tcp6";
-		match_netid_len = 4;
 		startsep = "[";
 		endsep = "]";
 		break;
@@ -1144,12 +1146,15 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
 		goto out_free_da;
 	}
 
-	if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) {
-		dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n",
-			__func__, netid, match_netid);
+	da->da_transport = xprt_find_transport_ident(netid);
+	if (da->da_transport < 0) {
+		dprintk("%s: ERROR: unknown r_netid \"%s\"\n",
+			__func__, netid);
 		goto out_free_da;
 	}
 
+	da->da_netid = netid;
+
 	/* save human readable address */
 	len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7;
 	da->da_remotestr = kzalloc(len, gfp_flags);
@@ -1161,7 +1166,6 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
 
 	dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr);
 	kfree(buf);
-	kfree(netid);
 	return da;
 
 out_free_da:
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v3 09/11] pNFS/flexfiles: Fix up layoutstats reporting for non-TCP transports
  2020-11-10 23:19               ` [PATCH v3 08/11] NFSv4/pNFS: Store the transport type in " trondmy
@ 2020-11-10 23:19                 ` trondmy
  2020-11-10 23:19                   ` [PATCH v3 10/11] SUNRPC: Fix up open coded kmemdup_nul() trondmy
  0 siblings, 1 reply; 23+ messages in thread
From: trondmy @ 2020-11-10 23:19 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

Ensure that we report the correct netid when using UDP or RDMA
transports to the DSes.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/flexfilelayout/flexfilelayout.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index a163533446fa..59ae36bf5cc0 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -2269,7 +2269,6 @@ ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da)
 	struct sockaddr *sap = (struct sockaddr *)&da->da_addr;
 	char portbuf[RPCBIND_MAXUADDRPLEN];
 	char addrbuf[RPCBIND_MAXUADDRLEN];
-	char *netid;
 	unsigned short port;
 	int len, netid_len;
 	__be32 *p;
@@ -2279,18 +2278,13 @@ ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da)
 		if (ff_layout_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0)
 			return;
 		port = ntohs(((struct sockaddr_in *)sap)->sin_port);
-		netid = "tcp";
-		netid_len = 3;
 		break;
 	case AF_INET6:
 		if (ff_layout_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0)
 			return;
 		port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port);
-		netid = "tcp6";
-		netid_len = 4;
 		break;
 	default:
-		/* we only support tcp and tcp6 */
 		WARN_ON_ONCE(1);
 		return;
 	}
@@ -2298,8 +2292,9 @@ ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da)
 	snprintf(portbuf, sizeof(portbuf), ".%u.%u", port >> 8, port & 0xff);
 	len = strlcat(addrbuf, portbuf, sizeof(addrbuf));
 
+	netid_len = strlen(da->da_netid);
 	p = xdr_reserve_space(xdr, 4 + netid_len);
-	xdr_encode_opaque(p, netid, netid_len);
+	xdr_encode_opaque(p, da->da_netid, netid_len);
 
 	p = xdr_reserve_space(xdr, 4 + len);
 	xdr_encode_opaque(p, addrbuf, len);
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v3 10/11] SUNRPC: Fix up open coded kmemdup_nul()
  2020-11-10 23:19                 ` [PATCH v3 09/11] pNFS/flexfiles: Fix up layoutstats reporting for non-TCP transports trondmy
@ 2020-11-10 23:19                   ` trondmy
  2020-11-10 23:19                     ` [PATCH v3 11/11] pNFS: Clean up open coded xdr string decoding trondmy
  0 siblings, 1 reply; 23+ messages in thread
From: trondmy @ 2020-11-10 23:19 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 net/sunrpc/xdr.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 71e03b930b70..b1cda6d85ded 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -1942,10 +1942,8 @@ ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str,
 
 	ret = xdr_stream_decode_opaque_inline(xdr, &p, maxlen);
 	if (ret > 0) {
-		char *s = kmalloc(ret + 1, gfp_flags);
+		char *s = kmemdup_nul(p, ret, gfp_flags);
 		if (s != NULL) {
-			memcpy(s, p, ret);
-			s[ret] = '\0';
 			*str = s;
 			return strlen(s);
 		}
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v3 11/11] pNFS: Clean up open coded xdr string decoding
  2020-11-10 23:19                   ` [PATCH v3 10/11] SUNRPC: Fix up open coded kmemdup_nul() trondmy
@ 2020-11-10 23:19                     ` trondmy
  0 siblings, 0 replies; 23+ messages in thread
From: trondmy @ 2020-11-10 23:19 UTC (permalink / raw)
  To: linux-nfs

From: Trond Myklebust <trond.myklebust@hammerspace.com>

Use the existing xdr_stream_decode_string_dup() to safely decode into
kmalloced strings.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/pnfs_nfs.c | 43 +++++++------------------------------------
 1 file changed, 7 insertions(+), 36 deletions(-)

diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index b96ecb395310..1babf56faf66 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -1045,9 +1045,8 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
 	struct nfs4_pnfs_ds_addr *da = NULL;
 	char *buf, *portstr;
 	__be16 port;
-	int nlen, rlen;
+	ssize_t nlen, rlen;
 	int tmp[2];
-	__be32 *p;
 	char *netid;
 	size_t len;
 	char *startsep = "";
@@ -1055,45 +1054,17 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
 
 
 	/* r_netid */
-	p = xdr_inline_decode(xdr, 4);
-	if (unlikely(!p))
+	nlen = xdr_stream_decode_string_dup(xdr, &netid, XDR_MAX_NETOBJ,
+					    gfp_flags);
+	if (unlikely(nlen < 0))
 		goto out_err;
-	nlen = be32_to_cpup(p++);
-
-	p = xdr_inline_decode(xdr, nlen);
-	if (unlikely(!p))
-		goto out_err;
-
-	netid = kmalloc(nlen+1, gfp_flags);
-	if (unlikely(!netid))
-		goto out_err;
-
-	netid[nlen] = '\0';
-	memcpy(netid, p, nlen);
 
 	/* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
-	p = xdr_inline_decode(xdr, 4);
-	if (unlikely(!p))
-		goto out_free_netid;
-	rlen = be32_to_cpup(p);
-
-	p = xdr_inline_decode(xdr, rlen);
-	if (unlikely(!p))
-		goto out_free_netid;
-
 	/* port is ".ABC.DEF", 8 chars max */
-	if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) {
-		dprintk("%s: Invalid address, length %d\n", __func__,
-			rlen);
+	rlen = xdr_stream_decode_string_dup(xdr, &buf, INET6_ADDRSTRLEN +
+					    IPV6_SCOPE_ID_LEN + 8, gfp_flags);
+	if (unlikely(rlen < 0))
 		goto out_free_netid;
-	}
-	buf = kmalloc(rlen + 1, gfp_flags);
-	if (!buf) {
-		dprintk("%s: Not enough memory\n", __func__);
-		goto out_free_netid;
-	}
-	buf[rlen] = '\0';
-	memcpy(buf, p, rlen);
 
 	/* replace port '.' with '-' */
 	portstr = strrchr(buf, '.');
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data channels
  2020-11-10 23:18 [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data channels trondmy
  2020-11-10 23:18 ` [PATCH v3 01/11] SUNRPC: xprt_load_transport() needs to support the netid "rdma6" trondmy
@ 2020-11-10 23:42 ` Trond Myklebust
  2020-11-13 12:48   ` Mkrtchyan, Tigran
  1 sibling, 1 reply; 23+ messages in thread
From: Trond Myklebust @ 2020-11-10 23:42 UTC (permalink / raw)
  To: linux-nfs

On Tue, 2020-11-10 at 18:18 -0500, trondmy@kernel.org wrote:
> From: Trond Myklebust <trond.myklebust@hammerspace.com>
> 
> Add support for connecting to the pNFS files/flexfiles data servers
> through RDMA, assuming that the GETDEVICEINFO call advertises that
> support.
> 
> v2: Fix layoutstats encoding for pNFS/flexfiles.
> v3: Move most of the netid handling into the SUNRPC and RDMA modules.
>     Fix up the mount code to benefit more from automated loading of
>     SUNRPC transport modules.
> 

Note that one cleanup that I did not perform, but which really could be
useful should we want to add more transport mechanisms, is to move the
code to parse stringified addresses (in particular IETF style
"universal addresses") into the transport modules so that the actual
parsing of mount and pNFS transport info can be automatically extended
when new transport modules are added.

-- 
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trond.myklebust@hammerspace.com



^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data channels
  2020-11-10 23:42 ` [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data channels Trond Myklebust
@ 2020-11-13 12:48   ` Mkrtchyan, Tigran
  2020-11-13 21:30     ` Mkrtchyan, Tigran
  0 siblings, 1 reply; 23+ messages in thread
From: Mkrtchyan, Tigran @ 2020-11-13 12:48 UTC (permalink / raw)
  To: trondmy; +Cc: linux-nfs



Hi Trond,

whit this changes (3ee69a14f92d74ced2647140b3799511ba4f3fa5) I see an infinite loop
of LAYOUTGET->GETDEVICEINFO->LAYOUTRETURN without any attempt to connect to a DS.

This is how the response to LAYOUTGET looks like.

Network File System, Ops(2): SEQUENCE GETDEVINFO
    [Program Version: 4]
    [V4 Procedure: COMPOUND (1)]
    Status: NFS4_OK (0)
    Tag: <EMPTY>
        length: 0
        contents: <EMPTY>
    Operations (count: 2)
        Opcode: SEQUENCE (53)
        Opcode: GETDEVINFO (47)
            Status: NFS4_OK (0)
            layout type: LAYOUT4_FLEX_FILES (4)
            r_netid: tcp
                length: 3
                contents: tcp
                fill bytes: opaque data
            r_addr: 131.169.191.143.125.49
                length: 22
                contents: 131.169.191.143.125.49
                fill bytes: opaque data
            version: 4
            minorversion: 1
            max_rsize: 1048576
            max_wsize: 1048576
            tightly coupled: Yes
            notify_mask: 0x00000006 (Change, Delete)
                notify_type: Change (1)
                notify_type: Delete (2)
    [Main Opcode: GETDEVINFO (47)]



The MDS is mounted with IPv4. I can provide the full packet trace, if needed.


Regards,
   Tigran.


----- Original Message -----
> From: "trondmy" <trondmy@hammerspace.com>
> To: "linux-nfs" <linux-nfs@vger.kernel.org>
> Sent: Wednesday, 11 November, 2020 00:42:31
> Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data channels

> On Tue, 2020-11-10 at 18:18 -0500, trondmy@kernel.org wrote:
>> From: Trond Myklebust <trond.myklebust@hammerspace.com>
>> 
>> Add support for connecting to the pNFS files/flexfiles data servers
>> through RDMA, assuming that the GETDEVICEINFO call advertises that
>> support.
>> 
>> v2: Fix layoutstats encoding for pNFS/flexfiles.
>> v3: Move most of the netid handling into the SUNRPC and RDMA modules.
>>     Fix up the mount code to benefit more from automated loading of
>>     SUNRPC transport modules.
>> 
> 
> Note that one cleanup that I did not perform, but which really could be
> useful should we want to add more transport mechanisms, is to move the
> code to parse stringified addresses (in particular IETF style
> "universal addresses") into the transport modules so that the actual
> parsing of mount and pNFS transport info can be automatically extended
> when new transport modules are added.
> 
> --
> Trond Myklebust
> Linux NFS client maintainer, Hammerspace
> trond.myklebust@hammerspace.com

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data channels
  2020-11-13 12:48   ` Mkrtchyan, Tigran
@ 2020-11-13 21:30     ` Mkrtchyan, Tigran
  2020-11-13 22:45       ` Trond Myklebust
  0 siblings, 1 reply; 23+ messages in thread
From: Mkrtchyan, Tigran @ 2020-11-13 21:30 UTC (permalink / raw)
  To: trondmy; +Cc: linux-nfs


After more testing, it looks like that client doesn't like
notification bitmap:


[31576.789492] --> _nfs4_proc_getdeviceinfo
[31576.789503] --> nfs41_call_sync_prepare data->seq_server 000000001d17c43e
[31576.789507] --> nfs4_alloc_slot used_slots=0000 highest_used=4294967295 max_slots=16
[31576.789510] <-- nfs4_alloc_slot used_slots=0001 highest_used=0 slotid=0
[31576.789527] encode_sequence: sessionid=2910695007:150995712:0:16777216 seqid=92462 slotid=0 max_slotid=0 cache_this=0
[31576.789991] decode_getdeviceinfo: unsupported notification
[31576.790003] --> nfs4_alloc_slot used_slots=0001 highest_used=0 max_slots=16
[31576.790007] <-- nfs4_alloc_slot used_slots=0003 highest_used=1 slotid=1
[31576.790010] nfs4_free_slot: slotid 1 highest_used_slotid 0
[31576.790013] nfs41_sequence_process: Error 0 free the slot
[31576.790017] nfs4_free_slot: slotid 0 highest_used_slotid 4294967295
[31576.790030] <-- _nfs4_proc_getdeviceinfo status=-5
[31576.790034] nfs4_get_device_info getdevice info returns -5
[31576.790084] <-- nfs4_get_device_info d 0000000000000000


Tigran.


----- Original Message -----
> From: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
> To: "trondmy" <trondmy@hammerspace.com>
> Cc: "linux-nfs" <linux-nfs@vger.kernel.org>
> Sent: Friday, 13 November, 2020 13:48:07
> Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data channels

> Hi Trond,
> 
> whit this changes (3ee69a14f92d74ced2647140b3799511ba4f3fa5) I see an infinite
> loop
> of LAYOUTGET->GETDEVICEINFO->LAYOUTRETURN without any attempt to connect to a
> DS.
> 
> This is how the response to LAYOUTGET looks like.
> 
> Network File System, Ops(2): SEQUENCE GETDEVINFO
>    [Program Version: 4]
>    [V4 Procedure: COMPOUND (1)]
>    Status: NFS4_OK (0)
>    Tag: <EMPTY>
>        length: 0
>        contents: <EMPTY>
>    Operations (count: 2)
>        Opcode: SEQUENCE (53)
>        Opcode: GETDEVINFO (47)
>            Status: NFS4_OK (0)
>            layout type: LAYOUT4_FLEX_FILES (4)
>            r_netid: tcp
>                length: 3
>                contents: tcp
>                fill bytes: opaque data
>            r_addr: 131.169.191.143.125.49
>                length: 22
>                contents: 131.169.191.143.125.49
>                fill bytes: opaque data
>            version: 4
>            minorversion: 1
>            max_rsize: 1048576
>            max_wsize: 1048576
>            tightly coupled: Yes
>            notify_mask: 0x00000006 (Change, Delete)
>                notify_type: Change (1)
>                notify_type: Delete (2)
>    [Main Opcode: GETDEVINFO (47)]
> 
> 
> 
> The MDS is mounted with IPv4. I can provide the full packet trace, if needed.
> 
> 
> Regards,
>   Tigran.
> 
> 
> ----- Original Message -----
>> From: "trondmy" <trondmy@hammerspace.com>
>> To: "linux-nfs" <linux-nfs@vger.kernel.org>
>> Sent: Wednesday, 11 November, 2020 00:42:31
>> Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data
>> channels
> 
>> On Tue, 2020-11-10 at 18:18 -0500, trondmy@kernel.org wrote:
>>> From: Trond Myklebust <trond.myklebust@hammerspace.com>
>>> 
>>> Add support for connecting to the pNFS files/flexfiles data servers
>>> through RDMA, assuming that the GETDEVICEINFO call advertises that
>>> support.
>>> 
>>> v2: Fix layoutstats encoding for pNFS/flexfiles.
>>> v3: Move most of the netid handling into the SUNRPC and RDMA modules.
>>>     Fix up the mount code to benefit more from automated loading of
>>>     SUNRPC transport modules.
>>> 
>> 
>> Note that one cleanup that I did not perform, but which really could be
>> useful should we want to add more transport mechanisms, is to move the
>> code to parse stringified addresses (in particular IETF style
>> "universal addresses") into the transport modules so that the actual
>> parsing of mount and pNFS transport info can be automatically extended
>> when new transport modules are added.
>> 
>> --
>> Trond Myklebust
>> Linux NFS client maintainer, Hammerspace
> > trond.myklebust@hammerspace.com

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data channels
  2020-11-13 21:30     ` Mkrtchyan, Tigran
@ 2020-11-13 22:45       ` Trond Myklebust
  2020-11-13 23:46         ` Mkrtchyan, Tigran
  0 siblings, 1 reply; 23+ messages in thread
From: Trond Myklebust @ 2020-11-13 22:45 UTC (permalink / raw)
  To: tigran.mkrtchyan; +Cc: linux-nfs

On Fri, 2020-11-13 at 22:30 +0100, Mkrtchyan, Tigran wrote:
> 
> After more testing, it looks like that client doesn't like
> notification bitmap:
> 
> 
> [31576.789492] --> _nfs4_proc_getdeviceinfo
> [31576.789503] --> nfs41_call_sync_prepare data->seq_server
> 000000001d17c43e
> [31576.789507] --> nfs4_alloc_slot used_slots=0000
> highest_used=4294967295 max_slots=16
> [31576.789510] <-- nfs4_alloc_slot used_slots=0001 highest_used=0
> slotid=0
> [31576.789527] encode_sequence:
> sessionid=2910695007:150995712:0:16777216 seqid=92462 slotid=0
> max_slotid=0 cache_this=0
> [31576.789991] decode_getdeviceinfo: unsupported notification

According to this, you appear to be returning a deviceinfo bitmap with
at least one non-zero entry that is not in the first 32-bit word. We
only ask for notifications for NOTIFY_DEVICEID4_CHANGE and
NOTIFY_DEVICEID4_DELETE, so we only expect bitmap[0] to have non-zero
entries.

> [31576.790003] --> nfs4_alloc_slot used_slots=0001 highest_used=0
> max_slots=16
> [31576.790007] <-- nfs4_alloc_slot used_slots=0003 highest_used=1
> slotid=1
> [31576.790010] nfs4_free_slot: slotid 1 highest_used_slotid 0
> [31576.790013] nfs41_sequence_process: Error 0 free the slot
> [31576.790017] nfs4_free_slot: slotid 0 highest_used_slotid
> 4294967295
> [31576.790030] <-- _nfs4_proc_getdeviceinfo status=-5
> [31576.790034] nfs4_get_device_info getdevice info returns -5
> [31576.790084] <-- nfs4_get_device_info d 0000000000000000
> 
> 
> Tigran.
> 
> 
> ----- Original Message -----
> > From: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
> > To: "trondmy" <trondmy@hammerspace.com>
> > Cc: "linux-nfs" <linux-nfs@vger.kernel.org>
> > Sent: Friday, 13 November, 2020 13:48:07
> > Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS
> > file+flexfiles data channels
> 
> > Hi Trond,
> > 
> > whit this changes (3ee69a14f92d74ced2647140b3799511ba4f3fa5) I see
> > an infinite
> > loop
> > of LAYOUTGET->GETDEVICEINFO->LAYOUTRETURN without any attempt to
> > connect to a
> > DS.
> > 
> > This is how the response to LAYOUTGET looks like.
> > 
> > Network File System, Ops(2): SEQUENCE GETDEVINFO
> >    [Program Version: 4]
> >    [V4 Procedure: COMPOUND (1)]
> >    Status: NFS4_OK (0)
> >    Tag: <EMPTY>
> >        length: 0
> >        contents: <EMPTY>
> >    Operations (count: 2)
> >        Opcode: SEQUENCE (53)
> >        Opcode: GETDEVINFO (47)
> >            Status: NFS4_OK (0)
> >            layout type: LAYOUT4_FLEX_FILES (4)
> >            r_netid: tcp
> >                length: 3
> >                contents: tcp
> >                fill bytes: opaque data
> >            r_addr: 131.169.191.143.125.49
> >                length: 22
> >                contents: 131.169.191.143.125.49
> >                fill bytes: opaque data
> >            version: 4
> >            minorversion: 1
> >            max_rsize: 1048576
> >            max_wsize: 1048576
> >            tightly coupled: Yes
> >            notify_mask: 0x00000006 (Change, Delete)
> >                notify_type: Change (1)
> >                notify_type: Delete (2)
> >    [Main Opcode: GETDEVINFO (47)]
> > 
> > 
> > 
> > The MDS is mounted with IPv4. I can provide the full packet trace,
> > if needed.
> > 
> > 
> > Regards,
> >   Tigran.
> > 
> > 
> > ----- Original Message -----
> > > From: "trondmy" <trondmy@hammerspace.com>
> > > To: "linux-nfs" <linux-nfs@vger.kernel.org>
> > > Sent: Wednesday, 11 November, 2020 00:42:31
> > > Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS
> > > file+flexfiles data
> > > channels
> > 
> > > On Tue, 2020-11-10 at 18:18 -0500, trondmy@kernel.org wrote:
> > > > From: Trond Myklebust <trond.myklebust@hammerspace.com>
> > > > 
> > > > Add support for connecting to the pNFS files/flexfiles data
> > > > servers
> > > > through RDMA, assuming that the GETDEVICEINFO call advertises
> > > > that
> > > > support.
> > > > 
> > > > v2: Fix layoutstats encoding for pNFS/flexfiles.
> > > > v3: Move most of the netid handling into the SUNRPC and RDMA
> > > > modules.
> > > >     Fix up the mount code to benefit more from automated
> > > > loading of
> > > >     SUNRPC transport modules.
> > > > 
> > > 
> > > Note that one cleanup that I did not perform, but which really
> > > could be
> > > useful should we want to add more transport mechanisms, is to
> > > move the
> > > code to parse stringified addresses (in particular IETF style
> > > "universal addresses") into the transport modules so that the
> > > actual
> > > parsing of mount and pNFS transport info can be automatically
> > > extended
> > > when new transport modules are added.
> > > 
> > > --
> > > Trond Myklebust
> > > Linux NFS client maintainer, Hammerspace
> > > trond.myklebust@hammerspace.com

-- 
Trond Myklebust
CTO, Hammerspace Inc
4984 El Camino Real, Suite 208
Los Altos, CA 94022
​
www.hammer.space


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data channels
  2020-11-13 22:45       ` Trond Myklebust
@ 2020-11-13 23:46         ` Mkrtchyan, Tigran
  2020-11-14 14:29           ` Trond Myklebust
  0 siblings, 1 reply; 23+ messages in thread
From: Mkrtchyan, Tigran @ 2020-11-13 23:46 UTC (permalink / raw)
  To: trondmy; +Cc: linux-nfs



----- Original Message -----
> From: "trondmy" <trondmy@hammerspace.com>
> To: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
> Cc: "linux-nfs" <linux-nfs@vger.kernel.org>
> Sent: Friday, 13 November, 2020 23:45:00
> Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data channels

> On Fri, 2020-11-13 at 22:30 +0100, Mkrtchyan, Tigran wrote:
>> 
>> After more testing, it looks like that client doesn't like
>> notification bitmap:
>> 
>> 
>> [31576.789492] --> _nfs4_proc_getdeviceinfo
>> [31576.789503] --> nfs41_call_sync_prepare data->seq_server
>> 000000001d17c43e
>> [31576.789507] --> nfs4_alloc_slot used_slots=0000
>> highest_used=4294967295 max_slots=16
>> [31576.789510] <-- nfs4_alloc_slot used_slots=0001 highest_used=0
>> slotid=0
>> [31576.789527] encode_sequence:
>> sessionid=2910695007:150995712:0:16777216 seqid=92462 slotid=0
>> max_slotid=0 cache_this=0
>> [31576.789991] decode_getdeviceinfo: unsupported notification
> 
> According to this, you appear to be returning a deviceinfo bitmap with
> at least one non-zero entry that is not in the first 32-bit word. We
> only ask for notifications for NOTIFY_DEVICEID4_CHANGE and
> NOTIFY_DEVICEID4_DELETE, so we only expect bitmap[0] to have non-zero
> entries.


according to packet capture only bitmap[0] has non zero bits set.
This is the reply of compound starting from nfs staus code, tag
length and so on.


0000   00 00 00 00 00 00 00 00 00 00 00 02 00 00 00 35
0010   00 00 00 00 5f ae 7d ad 00 03 00 09 00 00 00 00
0020   00 00 00 01 00 00 00 4c 00 00 00 00 00 00 00 0f
0030   00 00 00 0f 00 00 00 00 00 00 00 2f 00 00 00 00
0040   00 00 00 04 00 00 00 40 00 00 00 01 00 00 00 03
0050   74 63 70 00 00 00 00 16 31 33 31 2e 31 36 39 2e
0060   31 39 31 2e 31 34 33 2e 31 32 35 2e 34 39 00 00
0070   00 00 00 01 00 00 00 04 00 00 00 01 00 10 00 00
0080   00 10 00 00 00 00 00 01 00 00 00 02 00 00 00 06
0090   00 00 00 00


the last 12 bytes : bitmap size, bitmap[0], bitmap[1]


This part of code in the didn't change since 2010, and I
have no issues to use 5.8 kernel. I am pretty sure, that
tests with 5.9 did pass as expected. I will try to bisec it.

Tigran.

> 
>> [31576.790003] --> nfs4_alloc_slot used_slots=0001 highest_used=0
>> max_slots=16
>> [31576.790007] <-- nfs4_alloc_slot used_slots=0003 highest_used=1
>> slotid=1
>> [31576.790010] nfs4_free_slot: slotid 1 highest_used_slotid 0
>> [31576.790013] nfs41_sequence_process: Error 0 free the slot
>> [31576.790017] nfs4_free_slot: slotid 0 highest_used_slotid
>> 4294967295
>> [31576.790030] <-- _nfs4_proc_getdeviceinfo status=-5
>> [31576.790034] nfs4_get_device_info getdevice info returns -5
>> [31576.790084] <-- nfs4_get_device_info d 0000000000000000
>> 
>> 
>> Tigran.
>> 
>> 
>> ----- Original Message -----
>> > From: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
>> > To: "trondmy" <trondmy@hammerspace.com>
>> > Cc: "linux-nfs" <linux-nfs@vger.kernel.org>
>> > Sent: Friday, 13 November, 2020 13:48:07
>> > Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS
>> > file+flexfiles data channels
>> 
>> > Hi Trond,
>> > 
>> > whit this changes (3ee69a14f92d74ced2647140b3799511ba4f3fa5) I see
>> > an infinite
>> > loop
>> > of LAYOUTGET->GETDEVICEINFO->LAYOUTRETURN without any attempt to
>> > connect to a
>> > DS.
>> > 
>> > This is how the response to LAYOUTGET looks like.
>> > 
>> > Network File System, Ops(2): SEQUENCE GETDEVINFO
>> >    [Program Version: 4]
>> >    [V4 Procedure: COMPOUND (1)]
>> >    Status: NFS4_OK (0)
>> >    Tag: <EMPTY>
>> >        length: 0
>> >        contents: <EMPTY>
>> >    Operations (count: 2)
>> >        Opcode: SEQUENCE (53)
>> >        Opcode: GETDEVINFO (47)
>> >            Status: NFS4_OK (0)
>> >            layout type: LAYOUT4_FLEX_FILES (4)
>> >            r_netid: tcp
>> >                length: 3
>> >                contents: tcp
>> >                fill bytes: opaque data
>> >            r_addr: 131.169.191.143.125.49
>> >                length: 22
>> >                contents: 131.169.191.143.125.49
>> >                fill bytes: opaque data
>> >            version: 4
>> >            minorversion: 1
>> >            max_rsize: 1048576
>> >            max_wsize: 1048576
>> >            tightly coupled: Yes
>> >            notify_mask: 0x00000006 (Change, Delete)
>> >                notify_type: Change (1)
>> >                notify_type: Delete (2)
>> >    [Main Opcode: GETDEVINFO (47)]
>> > 
>> > 
>> > 
>> > The MDS is mounted with IPv4. I can provide the full packet trace,
>> > if needed.
>> > 
>> > 
>> > Regards,
>> >   Tigran.
>> > 
>> > 
>> > ----- Original Message -----
>> > > From: "trondmy" <trondmy@hammerspace.com>
>> > > To: "linux-nfs" <linux-nfs@vger.kernel.org>
>> > > Sent: Wednesday, 11 November, 2020 00:42:31
>> > > Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS
>> > > file+flexfiles data
>> > > channels
>> > 
>> > > On Tue, 2020-11-10 at 18:18 -0500, trondmy@kernel.org wrote:
>> > > > From: Trond Myklebust <trond.myklebust@hammerspace.com>
>> > > > 
>> > > > Add support for connecting to the pNFS files/flexfiles data
>> > > > servers
>> > > > through RDMA, assuming that the GETDEVICEINFO call advertises
>> > > > that
>> > > > support.
>> > > > 
>> > > > v2: Fix layoutstats encoding for pNFS/flexfiles.
>> > > > v3: Move most of the netid handling into the SUNRPC and RDMA
>> > > > modules.
>> > > >     Fix up the mount code to benefit more from automated
>> > > > loading of
>> > > >     SUNRPC transport modules.
>> > > > 
>> > > 
>> > > Note that one cleanup that I did not perform, but which really
>> > > could be
>> > > useful should we want to add more transport mechanisms, is to
>> > > move the
>> > > code to parse stringified addresses (in particular IETF style
>> > > "universal addresses") into the transport modules so that the
>> > > actual
>> > > parsing of mount and pNFS transport info can be automatically
>> > > extended
>> > > when new transport modules are added.
>> > > 
>> > > --
>> > > Trond Myklebust
>> > > Linux NFS client maintainer, Hammerspace
>> > > trond.myklebust@hammerspace.com
> 
> --
> Trond Myklebust
> CTO, Hammerspace Inc
> 4984 El Camino Real, Suite 208
> Los Altos, CA 94022
> 
> www.hammer.space

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data channels
  2020-11-13 23:46         ` Mkrtchyan, Tigran
@ 2020-11-14 14:29           ` Trond Myklebust
  2020-11-16 20:55             ` Mkrtchyan, Tigran
  0 siblings, 1 reply; 23+ messages in thread
From: Trond Myklebust @ 2020-11-14 14:29 UTC (permalink / raw)
  To: tigran.mkrtchyan; +Cc: linux-nfs

On Sat, 2020-11-14 at 00:46 +0100, Mkrtchyan, Tigran wrote:
> 
> 
> ----- Original Message -----
> > From: "trondmy" <trondmy@hammerspace.com>
> > To: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
> > Cc: "linux-nfs" <linux-nfs@vger.kernel.org>
> > Sent: Friday, 13 November, 2020 23:45:00
> > Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS
> > file+flexfiles data channels
> 
> > On Fri, 2020-11-13 at 22:30 +0100, Mkrtchyan, Tigran wrote:
> > > 
> > > After more testing, it looks like that client doesn't like
> > > notification bitmap:
> > > 
> > > 
> > > [31576.789492] --> _nfs4_proc_getdeviceinfo
> > > [31576.789503] --> nfs41_call_sync_prepare data->seq_server
> > > 000000001d17c43e
> > > [31576.789507] --> nfs4_alloc_slot used_slots=0000
> > > highest_used=4294967295 max_slots=16
> > > [31576.789510] <-- nfs4_alloc_slot used_slots=0001 highest_used=0
> > > slotid=0
> > > [31576.789527] encode_sequence:
> > > sessionid=2910695007:150995712:0:16777216 seqid=92462 slotid=0
> > > max_slotid=0 cache_this=0
> > > [31576.789991] decode_getdeviceinfo: unsupported notification
> > 
> > According to this, you appear to be returning a deviceinfo bitmap
> > with
> > at least one non-zero entry that is not in the first 32-bit word.
> > We
> > only ask for notifications for NOTIFY_DEVICEID4_CHANGE and
> > NOTIFY_DEVICEID4_DELETE, so we only expect bitmap[0] to have non-
> > zero
> > entries.
> 
> 
> according to packet capture only bitmap[0] has non zero bits set.
> This is the reply of compound starting from nfs staus code, tag
> length and so on.
> 
> 
> 0000   00 00 00 00 00 00 00 00 00 00 00 02 00 00 00 35
> 0010   00 00 00 00 5f ae 7d ad 00 03 00 09 00 00 00 00
> 0020   00 00 00 01 00 00 00 4c 00 00 00 00 00 00 00 0f
> 0030   00 00 00 0f 00 00 00 00 00 00 00 2f 00 00 00 00
> 0040   00 00 00 04 00 00 00 40 00 00 00 01 00 00 00 03
> 0050   74 63 70 00 00 00 00 16 31 33 31 2e 31 36 39 2e
> 0060   31 39 31 2e 31 34 33 2e 31 32 35 2e 34 39 00 00
> 0070   00 00 00 01 00 00 00 04 00 00 00 01 00 10 00 00
> 0080   00 10 00 00 00 00 00 01 00 00 00 02 00 00 00 06
> 0090   00 00 00 00
> 
> 
> the last 12 bytes : bitmap size, bitmap[0], bitmap[1]
> 
> 
> This part of code in the didn't change since 2010, and I
> have no issues to use 5.8 kernel. I am pretty sure, that
> tests with 5.9 did pass as expected. I will try to bisec it.

I don't think I've introduced this bug. I did not touch anything in the
getdeviceinfo proc or XDR code.
Does the following patch help?

8<-------------------------------------------------------
From e92b2d4e39e91d379ec1147115820ab5dfe4c89a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Fri, 13 Nov 2020 21:42:16 -0500
Subject: [PATCH] NFSv4: Fix the alignment of page data in the getdeviceinfo
 reply

We can fit the device_addr4 opaque data padding in the pages.

Fixes: cf500bac8fd4 ("SUNRPC: Introduce rpc_prepare_reply_pages()")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs4xdr.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index c6dbfcae7517..c8714381d511 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3009,15 +3009,19 @@ static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req,
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
+	uint32_t replen;
 
 	encode_compound_hdr(xdr, req, &hdr);
 	encode_sequence(xdr, &args->seq_args, &hdr);
+
+	replen = hdr.replen + op_decode_hdr_maxsz;
+
 	encode_getdeviceinfo(xdr, args, &hdr);
 
-	/* set up reply kvec. Subtract notification bitmap max size (2)
-	 * so that notification bitmap is put in xdr_buf tail */
+	/* set up reply kvec. device_addr4 opaque data is read into the
+	 * pages */
 	rpc_prepare_reply_pages(req, args->pdev->pages, args->pdev->pgbase,
-				args->pdev->pglen, hdr.replen - 2);
+				args->pdev->pglen, replen + 2);
 	encode_nops(&hdr);
 }
 
@@ -5848,7 +5852,9 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr,
 	 * and places the remaining xdr data in xdr_buf->tail
 	 */
 	pdev->mincount = be32_to_cpup(p);
-	if (xdr_read_pages(xdr, pdev->mincount) != pdev->mincount)
+	/* Calculate padding */
+	len = xdr_align_size(pdev->mincount);
+	if (xdr_read_pages(xdr, len) != len)
 		return -EIO;
 
 	/* Parse notification bitmap, verifying that it is zero. */
-- 
2.28.0



-- 
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trond.myklebust@hammerspace.com



^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data channels
  2020-11-14 14:29           ` Trond Myklebust
@ 2020-11-16 20:55             ` Mkrtchyan, Tigran
  2020-11-17 14:50               ` Mkrtchyan, Tigran
  0 siblings, 1 reply; 23+ messages in thread
From: Mkrtchyan, Tigran @ 2020-11-16 20:55 UTC (permalink / raw)
  To: trondmy; +Cc: linux-nfs


Hi Trond,

I am afraid, that the fix didn't work. I bisecting it.... 


Tigran.


----- Original Message -----
> From: "trondmy" <trondmy@hammerspace.com>
> To: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
> Cc: "linux-nfs" <linux-nfs@vger.kernel.org>
> Sent: Saturday, 14 November, 2020 15:29:01
> Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data channels

> On Sat, 2020-11-14 at 00:46 +0100, Mkrtchyan, Tigran wrote:
>> 
>> 
>> ----- Original Message -----
>> > From: "trondmy" <trondmy@hammerspace.com>
>> > To: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
>> > Cc: "linux-nfs" <linux-nfs@vger.kernel.org>
>> > Sent: Friday, 13 November, 2020 23:45:00
>> > Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS
>> > file+flexfiles data channels
>> 
>> > On Fri, 2020-11-13 at 22:30 +0100, Mkrtchyan, Tigran wrote:
>> > > 
>> > > After more testing, it looks like that client doesn't like
>> > > notification bitmap:
>> > > 
>> > > 
>> > > [31576.789492] --> _nfs4_proc_getdeviceinfo
>> > > [31576.789503] --> nfs41_call_sync_prepare data->seq_server
>> > > 000000001d17c43e
>> > > [31576.789507] --> nfs4_alloc_slot used_slots=0000
>> > > highest_used=4294967295 max_slots=16
>> > > [31576.789510] <-- nfs4_alloc_slot used_slots=0001 highest_used=0
>> > > slotid=0
>> > > [31576.789527] encode_sequence:
>> > > sessionid=2910695007:150995712:0:16777216 seqid=92462 slotid=0
>> > > max_slotid=0 cache_this=0
>> > > [31576.789991] decode_getdeviceinfo: unsupported notification
>> > 
>> > According to this, you appear to be returning a deviceinfo bitmap
>> > with
>> > at least one non-zero entry that is not in the first 32-bit word.
>> > We
>> > only ask for notifications for NOTIFY_DEVICEID4_CHANGE and
>> > NOTIFY_DEVICEID4_DELETE, so we only expect bitmap[0] to have non-
>> > zero
>> > entries.
>> 
>> 
>> according to packet capture only bitmap[0] has non zero bits set.
>> This is the reply of compound starting from nfs staus code, tag
>> length and so on.
>> 
>> 
>> 0000   00 00 00 00 00 00 00 00 00 00 00 02 00 00 00 35
>> 0010   00 00 00 00 5f ae 7d ad 00 03 00 09 00 00 00 00
>> 0020   00 00 00 01 00 00 00 4c 00 00 00 00 00 00 00 0f
>> 0030   00 00 00 0f 00 00 00 00 00 00 00 2f 00 00 00 00
>> 0040   00 00 00 04 00 00 00 40 00 00 00 01 00 00 00 03
>> 0050   74 63 70 00 00 00 00 16 31 33 31 2e 31 36 39 2e
>> 0060   31 39 31 2e 31 34 33 2e 31 32 35 2e 34 39 00 00
>> 0070   00 00 00 01 00 00 00 04 00 00 00 01 00 10 00 00
>> 0080   00 10 00 00 00 00 00 01 00 00 00 02 00 00 00 06
>> 0090   00 00 00 00
>> 
>> 
>> the last 12 bytes : bitmap size, bitmap[0], bitmap[1]
>> 
>> 
>> This part of code in the didn't change since 2010, and I
>> have no issues to use 5.8 kernel. I am pretty sure, that
>> tests with 5.9 did pass as expected. I will try to bisec it.
> 
> I don't think I've introduced this bug. I did not touch anything in the
> getdeviceinfo proc or XDR code.
> Does the following patch help?
> 
> 8<-------------------------------------------------------
> From e92b2d4e39e91d379ec1147115820ab5dfe4c89a Mon Sep 17 00:00:00 2001
> From: Trond Myklebust <trond.myklebust@hammerspace.com>
> Date: Fri, 13 Nov 2020 21:42:16 -0500
> Subject: [PATCH] NFSv4: Fix the alignment of page data in the getdeviceinfo
> reply
> 
> We can fit the device_addr4 opaque data padding in the pages.
> 
> Fixes: cf500bac8fd4 ("SUNRPC: Introduce rpc_prepare_reply_pages()")
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> ---
> fs/nfs/nfs4xdr.c | 14 ++++++++++----
> 1 file changed, 10 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> index c6dbfcae7517..c8714381d511 100644
> --- a/fs/nfs/nfs4xdr.c
> +++ b/fs/nfs/nfs4xdr.c
> @@ -3009,15 +3009,19 @@ static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst
> *req,
> 	struct compound_hdr hdr = {
> 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
> 	};
> +	uint32_t replen;
> 
> 	encode_compound_hdr(xdr, req, &hdr);
> 	encode_sequence(xdr, &args->seq_args, &hdr);
> +
> +	replen = hdr.replen + op_decode_hdr_maxsz;
> +
> 	encode_getdeviceinfo(xdr, args, &hdr);
> 
> -	/* set up reply kvec. Subtract notification bitmap max size (2)
> -	 * so that notification bitmap is put in xdr_buf tail */
> +	/* set up reply kvec. device_addr4 opaque data is read into the
> +	 * pages */
> 	rpc_prepare_reply_pages(req, args->pdev->pages, args->pdev->pgbase,
> -				args->pdev->pglen, hdr.replen - 2);
> +				args->pdev->pglen, replen + 2);
> 	encode_nops(&hdr);
> }
> 
> @@ -5848,7 +5852,9 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr,
> 	 * and places the remaining xdr data in xdr_buf->tail
> 	 */
> 	pdev->mincount = be32_to_cpup(p);
> -	if (xdr_read_pages(xdr, pdev->mincount) != pdev->mincount)
> +	/* Calculate padding */
> +	len = xdr_align_size(pdev->mincount);
> +	if (xdr_read_pages(xdr, len) != len)
> 		return -EIO;
> 
> 	/* Parse notification bitmap, verifying that it is zero. */
> --
> 2.28.0
> 
> 
> 
> --
> Trond Myklebust
> Linux NFS client maintainer, Hammerspace
> trond.myklebust@hammerspace.com

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data channels
  2020-11-16 20:55             ` Mkrtchyan, Tigran
@ 2020-11-17 14:50               ` Mkrtchyan, Tigran
  2020-11-26 17:17                 ` Mkrtchyan, Tigran
  0 siblings, 1 reply; 23+ messages in thread
From: Mkrtchyan, Tigran @ 2020-11-17 14:50 UTC (permalink / raw)
  To: trondmy; +Cc: linux-nfs, Anna Schumaker



Here is the result:


$ git bisect bad 
c567552612ece787b178e3b147b5854ad422a836 is the first bad commit
commit c567552612ece787b178e3b147b5854ad422a836
Author: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date:   Wed May 28 13:41:22 2014 -0400

    NFS: Add READ_PLUS data segment support
    
    This patch adds client support for decoding a single NFS4_CONTENT_DATA
    segment returned by the server. This is the simplest implementation
    possible, since it does not account for any hole segments in the reply.
    
    Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>

 fs/nfs/nfs42xdr.c         | 141 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/nfs4client.c       |   2 +
 fs/nfs/nfs4proc.c         |  43 +++++++++++++-
 fs/nfs/nfs4xdr.c          |   1 +
 include/linux/nfs4.h      |   2 +-
 include/linux/nfs_fs_sb.h |   1 +
 include/linux/nfs_xdr.h   |   2 +-
 7 files changed, 187 insertions(+), 5 deletions(-)


Regards,
   Tigran.



----- Original Message -----
> From: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
> To: "trondmy" <trondmy@hammerspace.com>
> Cc: "linux-nfs" <linux-nfs@vger.kernel.org>
> Sent: Monday, 16 November, 2020 21:55:50
> Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data channels

> Hi Trond,
> 
> I am afraid, that the fix didn't work. I bisecting it....
> 
> 
> Tigran.
> 
> 
> ----- Original Message -----
>> From: "trondmy" <trondmy@hammerspace.com>
>> To: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
>> Cc: "linux-nfs" <linux-nfs@vger.kernel.org>
>> Sent: Saturday, 14 November, 2020 15:29:01
>> Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data
>> channels
> 
>> On Sat, 2020-11-14 at 00:46 +0100, Mkrtchyan, Tigran wrote:
>>> 
>>> 
>>> ----- Original Message -----
>>> > From: "trondmy" <trondmy@hammerspace.com>
>>> > To: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
>>> > Cc: "linux-nfs" <linux-nfs@vger.kernel.org>
>>> > Sent: Friday, 13 November, 2020 23:45:00
>>> > Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS
>>> > file+flexfiles data channels
>>> 
>>> > On Fri, 2020-11-13 at 22:30 +0100, Mkrtchyan, Tigran wrote:
>>> > > 
>>> > > After more testing, it looks like that client doesn't like
>>> > > notification bitmap:
>>> > > 
>>> > > 
>>> > > [31576.789492] --> _nfs4_proc_getdeviceinfo
>>> > > [31576.789503] --> nfs41_call_sync_prepare data->seq_server
>>> > > 000000001d17c43e
>>> > > [31576.789507] --> nfs4_alloc_slot used_slots=0000
>>> > > highest_used=4294967295 max_slots=16
>>> > > [31576.789510] <-- nfs4_alloc_slot used_slots=0001 highest_used=0
>>> > > slotid=0
>>> > > [31576.789527] encode_sequence:
>>> > > sessionid=2910695007:150995712:0:16777216 seqid=92462 slotid=0
>>> > > max_slotid=0 cache_this=0
>>> > > [31576.789991] decode_getdeviceinfo: unsupported notification
>>> > 
>>> > According to this, you appear to be returning a deviceinfo bitmap
>>> > with
>>> > at least one non-zero entry that is not in the first 32-bit word.
>>> > We
>>> > only ask for notifications for NOTIFY_DEVICEID4_CHANGE and
>>> > NOTIFY_DEVICEID4_DELETE, so we only expect bitmap[0] to have non-
>>> > zero
>>> > entries.
>>> 
>>> 
>>> according to packet capture only bitmap[0] has non zero bits set.
>>> This is the reply of compound starting from nfs staus code, tag
>>> length and so on.
>>> 
>>> 
>>> 0000   00 00 00 00 00 00 00 00 00 00 00 02 00 00 00 35
>>> 0010   00 00 00 00 5f ae 7d ad 00 03 00 09 00 00 00 00
>>> 0020   00 00 00 01 00 00 00 4c 00 00 00 00 00 00 00 0f
>>> 0030   00 00 00 0f 00 00 00 00 00 00 00 2f 00 00 00 00
>>> 0040   00 00 00 04 00 00 00 40 00 00 00 01 00 00 00 03
>>> 0050   74 63 70 00 00 00 00 16 31 33 31 2e 31 36 39 2e
>>> 0060   31 39 31 2e 31 34 33 2e 31 32 35 2e 34 39 00 00
>>> 0070   00 00 00 01 00 00 00 04 00 00 00 01 00 10 00 00
>>> 0080   00 10 00 00 00 00 00 01 00 00 00 02 00 00 00 06
>>> 0090   00 00 00 00
>>> 
>>> 
>>> the last 12 bytes : bitmap size, bitmap[0], bitmap[1]
>>> 
>>> 
>>> This part of code in the didn't change since 2010, and I
>>> have no issues to use 5.8 kernel. I am pretty sure, that
>>> tests with 5.9 did pass as expected. I will try to bisec it.
>> 
>> I don't think I've introduced this bug. I did not touch anything in the
>> getdeviceinfo proc or XDR code.
>> Does the following patch help?
>> 
>> 8<-------------------------------------------------------
>> From e92b2d4e39e91d379ec1147115820ab5dfe4c89a Mon Sep 17 00:00:00 2001
>> From: Trond Myklebust <trond.myklebust@hammerspace.com>
>> Date: Fri, 13 Nov 2020 21:42:16 -0500
>> Subject: [PATCH] NFSv4: Fix the alignment of page data in the getdeviceinfo
>> reply
>> 
>> We can fit the device_addr4 opaque data padding in the pages.
>> 
>> Fixes: cf500bac8fd4 ("SUNRPC: Introduce rpc_prepare_reply_pages()")
>> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
>> ---
>> fs/nfs/nfs4xdr.c | 14 ++++++++++----
>> 1 file changed, 10 insertions(+), 4 deletions(-)
>> 
>> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
>> index c6dbfcae7517..c8714381d511 100644
>> --- a/fs/nfs/nfs4xdr.c
>> +++ b/fs/nfs/nfs4xdr.c
>> @@ -3009,15 +3009,19 @@ static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst
>> *req,
>> 	struct compound_hdr hdr = {
>> 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
>> 	};
>> +	uint32_t replen;
>> 
>> 	encode_compound_hdr(xdr, req, &hdr);
>> 	encode_sequence(xdr, &args->seq_args, &hdr);
>> +
>> +	replen = hdr.replen + op_decode_hdr_maxsz;
>> +
>> 	encode_getdeviceinfo(xdr, args, &hdr);
>> 
>> -	/* set up reply kvec. Subtract notification bitmap max size (2)
>> -	 * so that notification bitmap is put in xdr_buf tail */
>> +	/* set up reply kvec. device_addr4 opaque data is read into the
>> +	 * pages */
>> 	rpc_prepare_reply_pages(req, args->pdev->pages, args->pdev->pgbase,
>> -				args->pdev->pglen, hdr.replen - 2);
>> +				args->pdev->pglen, replen + 2);
>> 	encode_nops(&hdr);
>> }
>> 
>> @@ -5848,7 +5852,9 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr,
>> 	 * and places the remaining xdr data in xdr_buf->tail
>> 	 */
>> 	pdev->mincount = be32_to_cpup(p);
>> -	if (xdr_read_pages(xdr, pdev->mincount) != pdev->mincount)
>> +	/* Calculate padding */
>> +	len = xdr_align_size(pdev->mincount);
>> +	if (xdr_read_pages(xdr, len) != len)
>> 		return -EIO;
>> 
>> 	/* Parse notification bitmap, verifying that it is zero. */
>> --
>> 2.28.0
>> 
>> 
>> 
>> --
>> Trond Myklebust
>> Linux NFS client maintainer, Hammerspace
> > trond.myklebust@hammerspace.com

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data channels
  2020-11-17 14:50               ` Mkrtchyan, Tigran
@ 2020-11-26 17:17                 ` Mkrtchyan, Tigran
  2020-12-01 10:59                   ` Mkrtchyan, Tigran
  0 siblings, 1 reply; 23+ messages in thread
From: Mkrtchyan, Tigran @ 2020-11-26 17:17 UTC (permalink / raw)
  To: trondmy; +Cc: linux-nfs, Anna Schumaker




I have added some debug info Ind got this:

decode_getdeviceinfo: layout type 4
decode_getdeviceinfo: layout size 64
decode_getdeviceinfo: layout notification bitmap size 1094994757

So it looks like that xdr_read_pages set xdr pointer to a wrong position
and next read, which is bitmap size

5857         pdev->mincount = be32_to_cpup(p);
5858         dprintk("%s: layout size %u\n", __func__, pdev->mincount);
5859         if (xdr_read_pages(xdr, pdev->mincount) != pdev->mincount)
5860                 return -EIO;
5861 
5862         /* Parse notification bitmap, verifying that it is zero. */
5863         p = xdr_inline_decode(xdr, 4);
5864         if (unlikely(!p))
5865                 return -EIO;
5866         len = be32_to_cpup(p);
5867         dprintk("%s: layout notification bitmap size %u\n", __func__, len);
5868         if (len) {
5869                 uint32_t i;


Tigran.


----- Original Message -----
> From: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
> To: "trondmy" <trondmy@hammerspace.com>
> Cc: "linux-nfs" <linux-nfs@vger.kernel.org>, "Anna Schumaker" <anna.schumaker@netapp.com>
> Sent: Tuesday, 17 November, 2020 15:50:57
> Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data channels

> Here is the result:
> 
> 
> $ git bisect bad
> c567552612ece787b178e3b147b5854ad422a836 is the first bad commit
> commit c567552612ece787b178e3b147b5854ad422a836
> Author: Anna Schumaker <Anna.Schumaker@Netapp.com>
> Date:   Wed May 28 13:41:22 2014 -0400
> 
>    NFS: Add READ_PLUS data segment support
>    
>    This patch adds client support for decoding a single NFS4_CONTENT_DATA
>    segment returned by the server. This is the simplest implementation
>    possible, since it does not account for any hole segments in the reply.
>    
>    Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
> 
> fs/nfs/nfs42xdr.c         | 141 ++++++++++++++++++++++++++++++++++++++++++++++
> fs/nfs/nfs4client.c       |   2 +
> fs/nfs/nfs4proc.c         |  43 +++++++++++++-
> fs/nfs/nfs4xdr.c          |   1 +
> include/linux/nfs4.h      |   2 +-
> include/linux/nfs_fs_sb.h |   1 +
> include/linux/nfs_xdr.h   |   2 +-
> 7 files changed, 187 insertions(+), 5 deletions(-)
> 
> 
> Regards,
>   Tigran.
> 
> 
> 
> ----- Original Message -----
>> From: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
>> To: "trondmy" <trondmy@hammerspace.com>
>> Cc: "linux-nfs" <linux-nfs@vger.kernel.org>
>> Sent: Monday, 16 November, 2020 21:55:50
>> Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data
>> channels
> 
>> Hi Trond,
>> 
>> I am afraid, that the fix didn't work. I bisecting it....
>> 
>> 
>> Tigran.
>> 
>> 
>> ----- Original Message -----
>>> From: "trondmy" <trondmy@hammerspace.com>
>>> To: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
>>> Cc: "linux-nfs" <linux-nfs@vger.kernel.org>
>>> Sent: Saturday, 14 November, 2020 15:29:01
>>> Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data
>>> channels
>> 
>>> On Sat, 2020-11-14 at 00:46 +0100, Mkrtchyan, Tigran wrote:
>>>> 
>>>> 
>>>> ----- Original Message -----
>>>> > From: "trondmy" <trondmy@hammerspace.com>
>>>> > To: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
>>>> > Cc: "linux-nfs" <linux-nfs@vger.kernel.org>
>>>> > Sent: Friday, 13 November, 2020 23:45:00
>>>> > Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS
>>>> > file+flexfiles data channels
>>>> 
>>>> > On Fri, 2020-11-13 at 22:30 +0100, Mkrtchyan, Tigran wrote:
>>>> > > 
>>>> > > After more testing, it looks like that client doesn't like
>>>> > > notification bitmap:
>>>> > > 
>>>> > > 
>>>> > > [31576.789492] --> _nfs4_proc_getdeviceinfo
>>>> > > [31576.789503] --> nfs41_call_sync_prepare data->seq_server
>>>> > > 000000001d17c43e
>>>> > > [31576.789507] --> nfs4_alloc_slot used_slots=0000
>>>> > > highest_used=4294967295 max_slots=16
>>>> > > [31576.789510] <-- nfs4_alloc_slot used_slots=0001 highest_used=0
>>>> > > slotid=0
>>>> > > [31576.789527] encode_sequence:
>>>> > > sessionid=2910695007:150995712:0:16777216 seqid=92462 slotid=0
>>>> > > max_slotid=0 cache_this=0
>>>> > > [31576.789991] decode_getdeviceinfo: unsupported notification
>>>> > 
>>>> > According to this, you appear to be returning a deviceinfo bitmap
>>>> > with
>>>> > at least one non-zero entry that is not in the first 32-bit word.
>>>> > We
>>>> > only ask for notifications for NOTIFY_DEVICEID4_CHANGE and
>>>> > NOTIFY_DEVICEID4_DELETE, so we only expect bitmap[0] to have non-
>>>> > zero
>>>> > entries.
>>>> 
>>>> 
>>>> according to packet capture only bitmap[0] has non zero bits set.
>>>> This is the reply of compound starting from nfs staus code, tag
>>>> length and so on.
>>>> 
>>>> 
>>>> 0000   00 00 00 00 00 00 00 00 00 00 00 02 00 00 00 35
>>>> 0010   00 00 00 00 5f ae 7d ad 00 03 00 09 00 00 00 00
>>>> 0020   00 00 00 01 00 00 00 4c 00 00 00 00 00 00 00 0f
>>>> 0030   00 00 00 0f 00 00 00 00 00 00 00 2f 00 00 00 00
>>>> 0040   00 00 00 04 00 00 00 40 00 00 00 01 00 00 00 03
>>>> 0050   74 63 70 00 00 00 00 16 31 33 31 2e 31 36 39 2e
>>>> 0060   31 39 31 2e 31 34 33 2e 31 32 35 2e 34 39 00 00
>>>> 0070   00 00 00 01 00 00 00 04 00 00 00 01 00 10 00 00
>>>> 0080   00 10 00 00 00 00 00 01 00 00 00 02 00 00 00 06
>>>> 0090   00 00 00 00
>>>> 
>>>> 
>>>> the last 12 bytes : bitmap size, bitmap[0], bitmap[1]
>>>> 
>>>> 
>>>> This part of code in the didn't change since 2010, and I
>>>> have no issues to use 5.8 kernel. I am pretty sure, that
>>>> tests with 5.9 did pass as expected. I will try to bisec it.
>>> 
>>> I don't think I've introduced this bug. I did not touch anything in the
>>> getdeviceinfo proc or XDR code.
>>> Does the following patch help?
>>> 
>>> 8<-------------------------------------------------------
>>> From e92b2d4e39e91d379ec1147115820ab5dfe4c89a Mon Sep 17 00:00:00 2001
>>> From: Trond Myklebust <trond.myklebust@hammerspace.com>
>>> Date: Fri, 13 Nov 2020 21:42:16 -0500
>>> Subject: [PATCH] NFSv4: Fix the alignment of page data in the getdeviceinfo
>>> reply
>>> 
>>> We can fit the device_addr4 opaque data padding in the pages.
>>> 
>>> Fixes: cf500bac8fd4 ("SUNRPC: Introduce rpc_prepare_reply_pages()")
>>> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
>>> ---
>>> fs/nfs/nfs4xdr.c | 14 ++++++++++----
>>> 1 file changed, 10 insertions(+), 4 deletions(-)
>>> 
>>> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
>>> index c6dbfcae7517..c8714381d511 100644
>>> --- a/fs/nfs/nfs4xdr.c
>>> +++ b/fs/nfs/nfs4xdr.c
>>> @@ -3009,15 +3009,19 @@ static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst
>>> *req,
>>> 	struct compound_hdr hdr = {
>>> 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
>>> 	};
>>> +	uint32_t replen;
>>> 
>>> 	encode_compound_hdr(xdr, req, &hdr);
>>> 	encode_sequence(xdr, &args->seq_args, &hdr);
>>> +
>>> +	replen = hdr.replen + op_decode_hdr_maxsz;
>>> +
>>> 	encode_getdeviceinfo(xdr, args, &hdr);
>>> 
>>> -	/* set up reply kvec. Subtract notification bitmap max size (2)
>>> -	 * so that notification bitmap is put in xdr_buf tail */
>>> +	/* set up reply kvec. device_addr4 opaque data is read into the
>>> +	 * pages */
>>> 	rpc_prepare_reply_pages(req, args->pdev->pages, args->pdev->pgbase,
>>> -				args->pdev->pglen, hdr.replen - 2);
>>> +				args->pdev->pglen, replen + 2);
>>> 	encode_nops(&hdr);
>>> }
>>> 
>>> @@ -5848,7 +5852,9 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr,
>>> 	 * and places the remaining xdr data in xdr_buf->tail
>>> 	 */
>>> 	pdev->mincount = be32_to_cpup(p);
>>> -	if (xdr_read_pages(xdr, pdev->mincount) != pdev->mincount)
>>> +	/* Calculate padding */
>>> +	len = xdr_align_size(pdev->mincount);
>>> +	if (xdr_read_pages(xdr, len) != len)
>>> 		return -EIO;
>>> 
>>> 	/* Parse notification bitmap, verifying that it is zero. */
>>> --
>>> 2.28.0
>>> 
>>> 
>>> 
>>> --
>>> Trond Myklebust
>>> Linux NFS client maintainer, Hammerspace
> > > trond.myklebust@hammerspace.com

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data channels
  2020-11-26 17:17                 ` Mkrtchyan, Tigran
@ 2020-12-01 10:59                   ` Mkrtchyan, Tigran
  2020-12-01 14:44                     ` Mkrtchyan, Tigran
  0 siblings, 1 reply; 23+ messages in thread
From: Mkrtchyan, Tigran @ 2020-12-01 10:59 UTC (permalink / raw)
  To: trondmy; +Cc: linux-nfs, Anna Schumaker



More investigation...

After xdr_read_pages call the xdr stream points to the
beginning of RPC package and return the xid value on
the next read (which should be the size of the notification bitmap).


Regards,
   Tigran.

----- Original Message -----
> From: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
> To: "trondmy" <trondmy@hammerspace.com>
> Cc: "linux-nfs" <linux-nfs@vger.kernel.org>, "Anna Schumaker" <anna.schumaker@netapp.com>
> Sent: Thursday, 26 November, 2020 18:17:41
> Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data channels

> I have added some debug info Ind got this:
> 
> decode_getdeviceinfo: layout type 4
> decode_getdeviceinfo: layout size 64
> decode_getdeviceinfo: layout notification bitmap size 1094994757
> 
> So it looks like that xdr_read_pages set xdr pointer to a wrong position
> and next read, which is bitmap size
> 
> 5857         pdev->mincount = be32_to_cpup(p);
> 5858         dprintk("%s: layout size %u\n", __func__, pdev->mincount);
> 5859         if (xdr_read_pages(xdr, pdev->mincount) != pdev->mincount)
> 5860                 return -EIO;
> 5861
> 5862         /* Parse notification bitmap, verifying that it is zero. */
> 5863         p = xdr_inline_decode(xdr, 4);
> 5864         if (unlikely(!p))
> 5865                 return -EIO;
> 5866         len = be32_to_cpup(p);
> 5867         dprintk("%s: layout notification bitmap size %u\n", __func__, len);
> 5868         if (len) {
> 5869                 uint32_t i;
> 
> 
> Tigran.
> 
> 
> ----- Original Message -----
>> From: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
>> To: "trondmy" <trondmy@hammerspace.com>
>> Cc: "linux-nfs" <linux-nfs@vger.kernel.org>, "Anna Schumaker"
>> <anna.schumaker@netapp.com>
>> Sent: Tuesday, 17 November, 2020 15:50:57
>> Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data
>> channels
> 
>> Here is the result:
>> 
>> 
>> $ git bisect bad
>> c567552612ece787b178e3b147b5854ad422a836 is the first bad commit
>> commit c567552612ece787b178e3b147b5854ad422a836
>> Author: Anna Schumaker <Anna.Schumaker@Netapp.com>
>> Date:   Wed May 28 13:41:22 2014 -0400
>> 
>>    NFS: Add READ_PLUS data segment support
>>    
>>    This patch adds client support for decoding a single NFS4_CONTENT_DATA
>>    segment returned by the server. This is the simplest implementation
>>    possible, since it does not account for any hole segments in the reply.
>>    
>>    Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
>> 
>> fs/nfs/nfs42xdr.c         | 141 ++++++++++++++++++++++++++++++++++++++++++++++
>> fs/nfs/nfs4client.c       |   2 +
>> fs/nfs/nfs4proc.c         |  43 +++++++++++++-
>> fs/nfs/nfs4xdr.c          |   1 +
>> include/linux/nfs4.h      |   2 +-
>> include/linux/nfs_fs_sb.h |   1 +
>> include/linux/nfs_xdr.h   |   2 +-
>> 7 files changed, 187 insertions(+), 5 deletions(-)
>> 
>> 
>> Regards,
>>   Tigran.
>> 
>> 
>> 
>> ----- Original Message -----
>>> From: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
>>> To: "trondmy" <trondmy@hammerspace.com>
>>> Cc: "linux-nfs" <linux-nfs@vger.kernel.org>
>>> Sent: Monday, 16 November, 2020 21:55:50
>>> Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data
>>> channels
>> 
>>> Hi Trond,
>>> 
>>> I am afraid, that the fix didn't work. I bisecting it....
>>> 
>>> 
>>> Tigran.
>>> 
>>> 
>>> ----- Original Message -----
>>>> From: "trondmy" <trondmy@hammerspace.com>
>>>> To: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
>>>> Cc: "linux-nfs" <linux-nfs@vger.kernel.org>
>>>> Sent: Saturday, 14 November, 2020 15:29:01
>>>> Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data
>>>> channels
>>> 
>>>> On Sat, 2020-11-14 at 00:46 +0100, Mkrtchyan, Tigran wrote:
>>>>> 
>>>>> 
>>>>> ----- Original Message -----
>>>>> > From: "trondmy" <trondmy@hammerspace.com>
>>>>> > To: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
>>>>> > Cc: "linux-nfs" <linux-nfs@vger.kernel.org>
>>>>> > Sent: Friday, 13 November, 2020 23:45:00
>>>>> > Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS
>>>>> > file+flexfiles data channels
>>>>> 
>>>>> > On Fri, 2020-11-13 at 22:30 +0100, Mkrtchyan, Tigran wrote:
>>>>> > > 
>>>>> > > After more testing, it looks like that client doesn't like
>>>>> > > notification bitmap:
>>>>> > > 
>>>>> > > 
>>>>> > > [31576.789492] --> _nfs4_proc_getdeviceinfo
>>>>> > > [31576.789503] --> nfs41_call_sync_prepare data->seq_server
>>>>> > > 000000001d17c43e
>>>>> > > [31576.789507] --> nfs4_alloc_slot used_slots=0000
>>>>> > > highest_used=4294967295 max_slots=16
>>>>> > > [31576.789510] <-- nfs4_alloc_slot used_slots=0001 highest_used=0
>>>>> > > slotid=0
>>>>> > > [31576.789527] encode_sequence:
>>>>> > > sessionid=2910695007:150995712:0:16777216 seqid=92462 slotid=0
>>>>> > > max_slotid=0 cache_this=0
>>>>> > > [31576.789991] decode_getdeviceinfo: unsupported notification
>>>>> > 
>>>>> > According to this, you appear to be returning a deviceinfo bitmap
>>>>> > with
>>>>> > at least one non-zero entry that is not in the first 32-bit word.
>>>>> > We
>>>>> > only ask for notifications for NOTIFY_DEVICEID4_CHANGE and
>>>>> > NOTIFY_DEVICEID4_DELETE, so we only expect bitmap[0] to have non-
>>>>> > zero
>>>>> > entries.
>>>>> 
>>>>> 
>>>>> according to packet capture only bitmap[0] has non zero bits set.
>>>>> This is the reply of compound starting from nfs staus code, tag
>>>>> length and so on.
>>>>> 
>>>>> 
>>>>> 0000   00 00 00 00 00 00 00 00 00 00 00 02 00 00 00 35
>>>>> 0010   00 00 00 00 5f ae 7d ad 00 03 00 09 00 00 00 00
>>>>> 0020   00 00 00 01 00 00 00 4c 00 00 00 00 00 00 00 0f
>>>>> 0030   00 00 00 0f 00 00 00 00 00 00 00 2f 00 00 00 00
>>>>> 0040   00 00 00 04 00 00 00 40 00 00 00 01 00 00 00 03
>>>>> 0050   74 63 70 00 00 00 00 16 31 33 31 2e 31 36 39 2e
>>>>> 0060   31 39 31 2e 31 34 33 2e 31 32 35 2e 34 39 00 00
>>>>> 0070   00 00 00 01 00 00 00 04 00 00 00 01 00 10 00 00
>>>>> 0080   00 10 00 00 00 00 00 01 00 00 00 02 00 00 00 06
>>>>> 0090   00 00 00 00
>>>>> 
>>>>> 
>>>>> the last 12 bytes : bitmap size, bitmap[0], bitmap[1]
>>>>> 
>>>>> 
>>>>> This part of code in the didn't change since 2010, and I
>>>>> have no issues to use 5.8 kernel. I am pretty sure, that
>>>>> tests with 5.9 did pass as expected. I will try to bisec it.
>>>> 
>>>> I don't think I've introduced this bug. I did not touch anything in the
>>>> getdeviceinfo proc or XDR code.
>>>> Does the following patch help?
>>>> 
>>>> 8<-------------------------------------------------------
>>>> From e92b2d4e39e91d379ec1147115820ab5dfe4c89a Mon Sep 17 00:00:00 2001
>>>> From: Trond Myklebust <trond.myklebust@hammerspace.com>
>>>> Date: Fri, 13 Nov 2020 21:42:16 -0500
>>>> Subject: [PATCH] NFSv4: Fix the alignment of page data in the getdeviceinfo
>>>> reply
>>>> 
>>>> We can fit the device_addr4 opaque data padding in the pages.
>>>> 
>>>> Fixes: cf500bac8fd4 ("SUNRPC: Introduce rpc_prepare_reply_pages()")
>>>> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
>>>> ---
>>>> fs/nfs/nfs4xdr.c | 14 ++++++++++----
>>>> 1 file changed, 10 insertions(+), 4 deletions(-)
>>>> 
>>>> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
>>>> index c6dbfcae7517..c8714381d511 100644
>>>> --- a/fs/nfs/nfs4xdr.c
>>>> +++ b/fs/nfs/nfs4xdr.c
>>>> @@ -3009,15 +3009,19 @@ static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst
>>>> *req,
>>>> 	struct compound_hdr hdr = {
>>>> 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
>>>> 	};
>>>> +	uint32_t replen;
>>>> 
>>>> 	encode_compound_hdr(xdr, req, &hdr);
>>>> 	encode_sequence(xdr, &args->seq_args, &hdr);
>>>> +
>>>> +	replen = hdr.replen + op_decode_hdr_maxsz;
>>>> +
>>>> 	encode_getdeviceinfo(xdr, args, &hdr);
>>>> 
>>>> -	/* set up reply kvec. Subtract notification bitmap max size (2)
>>>> -	 * so that notification bitmap is put in xdr_buf tail */
>>>> +	/* set up reply kvec. device_addr4 opaque data is read into the
>>>> +	 * pages */
>>>> 	rpc_prepare_reply_pages(req, args->pdev->pages, args->pdev->pgbase,
>>>> -				args->pdev->pglen, hdr.replen - 2);
>>>> +				args->pdev->pglen, replen + 2);
>>>> 	encode_nops(&hdr);
>>>> }
>>>> 
>>>> @@ -5848,7 +5852,9 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr,
>>>> 	 * and places the remaining xdr data in xdr_buf->tail
>>>> 	 */
>>>> 	pdev->mincount = be32_to_cpup(p);
>>>> -	if (xdr_read_pages(xdr, pdev->mincount) != pdev->mincount)
>>>> +	/* Calculate padding */
>>>> +	len = xdr_align_size(pdev->mincount);
>>>> +	if (xdr_read_pages(xdr, len) != len)
>>>> 		return -EIO;
>>>> 
>>>> 	/* Parse notification bitmap, verifying that it is zero. */
>>>> --
>>>> 2.28.0
>>>> 
>>>> 
>>>> 
>>>> --
>>>> Trond Myklebust
>>>> Linux NFS client maintainer, Hammerspace
> > > > trond.myklebust@hammerspace.com

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data channels
  2020-12-01 10:59                   ` Mkrtchyan, Tigran
@ 2020-12-01 14:44                     ` Mkrtchyan, Tigran
  0 siblings, 0 replies; 23+ messages in thread
From: Mkrtchyan, Tigran @ 2020-12-01 14:44 UTC (permalink / raw)
  To: trondmy; +Cc: linux-nfs, Anna Schumaker



Actually, this was a co-incidence, which means that buf points to
more or less a random place.

Tigran.

----- Original Message -----
> From: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
> To: "trondmy" <trondmy@hammerspace.com>
> Cc: "linux-nfs" <linux-nfs@vger.kernel.org>, "Anna Schumaker" <anna.schumaker@netapp.com>
> Sent: Tuesday, 1 December, 2020 11:59:22
> Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data channels

> More investigation...
> 
> After xdr_read_pages call the xdr stream points to the
> beginning of RPC package and return the xid value on
> the next read (which should be the size of the notification bitmap).
> 
> 
> Regards,
>   Tigran.
> 
> ----- Original Message -----
>> From: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
>> To: "trondmy" <trondmy@hammerspace.com>
>> Cc: "linux-nfs" <linux-nfs@vger.kernel.org>, "Anna Schumaker"
>> <anna.schumaker@netapp.com>
>> Sent: Thursday, 26 November, 2020 18:17:41
>> Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data
>> channels
> 
>> I have added some debug info Ind got this:
>> 
>> decode_getdeviceinfo: layout type 4
>> decode_getdeviceinfo: layout size 64
>> decode_getdeviceinfo: layout notification bitmap size 1094994757
>> 
>> So it looks like that xdr_read_pages set xdr pointer to a wrong position
>> and next read, which is bitmap size
>> 
>> 5857         pdev->mincount = be32_to_cpup(p);
>> 5858         dprintk("%s: layout size %u\n", __func__, pdev->mincount);
>> 5859         if (xdr_read_pages(xdr, pdev->mincount) != pdev->mincount)
>> 5860                 return -EIO;
>> 5861
>> 5862         /* Parse notification bitmap, verifying that it is zero. */
>> 5863         p = xdr_inline_decode(xdr, 4);
>> 5864         if (unlikely(!p))
>> 5865                 return -EIO;
>> 5866         len = be32_to_cpup(p);
>> 5867         dprintk("%s: layout notification bitmap size %u\n", __func__, len);
>> 5868         if (len) {
>> 5869                 uint32_t i;
>> 
>> 
>> Tigran.
>> 
>> 
>> ----- Original Message -----
>>> From: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
>>> To: "trondmy" <trondmy@hammerspace.com>
>>> Cc: "linux-nfs" <linux-nfs@vger.kernel.org>, "Anna Schumaker"
>>> <anna.schumaker@netapp.com>
>>> Sent: Tuesday, 17 November, 2020 15:50:57
>>> Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data
>>> channels
>> 
>>> Here is the result:
>>> 
>>> 
>>> $ git bisect bad
>>> c567552612ece787b178e3b147b5854ad422a836 is the first bad commit
>>> commit c567552612ece787b178e3b147b5854ad422a836
>>> Author: Anna Schumaker <Anna.Schumaker@Netapp.com>
>>> Date:   Wed May 28 13:41:22 2014 -0400
>>> 
>>>    NFS: Add READ_PLUS data segment support
>>>    
>>>    This patch adds client support for decoding a single NFS4_CONTENT_DATA
>>>    segment returned by the server. This is the simplest implementation
>>>    possible, since it does not account for any hole segments in the reply.
>>>    
>>>    Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
>>> 
>>> fs/nfs/nfs42xdr.c         | 141 ++++++++++++++++++++++++++++++++++++++++++++++
>>> fs/nfs/nfs4client.c       |   2 +
>>> fs/nfs/nfs4proc.c         |  43 +++++++++++++-
>>> fs/nfs/nfs4xdr.c          |   1 +
>>> include/linux/nfs4.h      |   2 +-
>>> include/linux/nfs_fs_sb.h |   1 +
>>> include/linux/nfs_xdr.h   |   2 +-
>>> 7 files changed, 187 insertions(+), 5 deletions(-)
>>> 
>>> 
>>> Regards,
>>>   Tigran.
>>> 
>>> 
>>> 
>>> ----- Original Message -----
>>>> From: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
>>>> To: "trondmy" <trondmy@hammerspace.com>
>>>> Cc: "linux-nfs" <linux-nfs@vger.kernel.org>
>>>> Sent: Monday, 16 November, 2020 21:55:50
>>>> Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data
>>>> channels
>>> 
>>>> Hi Trond,
>>>> 
>>>> I am afraid, that the fix didn't work. I bisecting it....
>>>> 
>>>> 
>>>> Tigran.
>>>> 
>>>> 
>>>> ----- Original Message -----
>>>>> From: "trondmy" <trondmy@hammerspace.com>
>>>>> To: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
>>>>> Cc: "linux-nfs" <linux-nfs@vger.kernel.org>
>>>>> Sent: Saturday, 14 November, 2020 15:29:01
>>>>> Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data
>>>>> channels
>>>> 
>>>>> On Sat, 2020-11-14 at 00:46 +0100, Mkrtchyan, Tigran wrote:
>>>>>> 
>>>>>> 
>>>>>> ----- Original Message -----
>>>>>> > From: "trondmy" <trondmy@hammerspace.com>
>>>>>> > To: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
>>>>>> > Cc: "linux-nfs" <linux-nfs@vger.kernel.org>
>>>>>> > Sent: Friday, 13 November, 2020 23:45:00
>>>>>> > Subject: Re: [PATCH v3 00/11] Add RDMA support to the pNFS
>>>>>> > file+flexfiles data channels
>>>>>> 
>>>>>> > On Fri, 2020-11-13 at 22:30 +0100, Mkrtchyan, Tigran wrote:
>>>>>> > > 
>>>>>> > > After more testing, it looks like that client doesn't like
>>>>>> > > notification bitmap:
>>>>>> > > 
>>>>>> > > 
>>>>>> > > [31576.789492] --> _nfs4_proc_getdeviceinfo
>>>>>> > > [31576.789503] --> nfs41_call_sync_prepare data->seq_server
>>>>>> > > 000000001d17c43e
>>>>>> > > [31576.789507] --> nfs4_alloc_slot used_slots=0000
>>>>>> > > highest_used=4294967295 max_slots=16
>>>>>> > > [31576.789510] <-- nfs4_alloc_slot used_slots=0001 highest_used=0
>>>>>> > > slotid=0
>>>>>> > > [31576.789527] encode_sequence:
>>>>>> > > sessionid=2910695007:150995712:0:16777216 seqid=92462 slotid=0
>>>>>> > > max_slotid=0 cache_this=0
>>>>>> > > [31576.789991] decode_getdeviceinfo: unsupported notification
>>>>>> > 
>>>>>> > According to this, you appear to be returning a deviceinfo bitmap
>>>>>> > with
>>>>>> > at least one non-zero entry that is not in the first 32-bit word.
>>>>>> > We
>>>>>> > only ask for notifications for NOTIFY_DEVICEID4_CHANGE and
>>>>>> > NOTIFY_DEVICEID4_DELETE, so we only expect bitmap[0] to have non-
>>>>>> > zero
>>>>>> > entries.
>>>>>> 
>>>>>> 
>>>>>> according to packet capture only bitmap[0] has non zero bits set.
>>>>>> This is the reply of compound starting from nfs staus code, tag
>>>>>> length and so on.
>>>>>> 
>>>>>> 
>>>>>> 0000   00 00 00 00 00 00 00 00 00 00 00 02 00 00 00 35
>>>>>> 0010   00 00 00 00 5f ae 7d ad 00 03 00 09 00 00 00 00
>>>>>> 0020   00 00 00 01 00 00 00 4c 00 00 00 00 00 00 00 0f
>>>>>> 0030   00 00 00 0f 00 00 00 00 00 00 00 2f 00 00 00 00
>>>>>> 0040   00 00 00 04 00 00 00 40 00 00 00 01 00 00 00 03
>>>>>> 0050   74 63 70 00 00 00 00 16 31 33 31 2e 31 36 39 2e
>>>>>> 0060   31 39 31 2e 31 34 33 2e 31 32 35 2e 34 39 00 00
>>>>>> 0070   00 00 00 01 00 00 00 04 00 00 00 01 00 10 00 00
>>>>>> 0080   00 10 00 00 00 00 00 01 00 00 00 02 00 00 00 06
>>>>>> 0090   00 00 00 00
>>>>>> 
>>>>>> 
>>>>>> the last 12 bytes : bitmap size, bitmap[0], bitmap[1]
>>>>>> 
>>>>>> 
>>>>>> This part of code in the didn't change since 2010, and I
>>>>>> have no issues to use 5.8 kernel. I am pretty sure, that
>>>>>> tests with 5.9 did pass as expected. I will try to bisec it.
>>>>> 
>>>>> I don't think I've introduced this bug. I did not touch anything in the
>>>>> getdeviceinfo proc or XDR code.
>>>>> Does the following patch help?
>>>>> 
>>>>> 8<-------------------------------------------------------
>>>>> From e92b2d4e39e91d379ec1147115820ab5dfe4c89a Mon Sep 17 00:00:00 2001
>>>>> From: Trond Myklebust <trond.myklebust@hammerspace.com>
>>>>> Date: Fri, 13 Nov 2020 21:42:16 -0500
>>>>> Subject: [PATCH] NFSv4: Fix the alignment of page data in the getdeviceinfo
>>>>> reply
>>>>> 
>>>>> We can fit the device_addr4 opaque data padding in the pages.
>>>>> 
>>>>> Fixes: cf500bac8fd4 ("SUNRPC: Introduce rpc_prepare_reply_pages()")
>>>>> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
>>>>> ---
>>>>> fs/nfs/nfs4xdr.c | 14 ++++++++++----
>>>>> 1 file changed, 10 insertions(+), 4 deletions(-)
>>>>> 
>>>>> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
>>>>> index c6dbfcae7517..c8714381d511 100644
>>>>> --- a/fs/nfs/nfs4xdr.c
>>>>> +++ b/fs/nfs/nfs4xdr.c
>>>>> @@ -3009,15 +3009,19 @@ static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst
>>>>> *req,
>>>>> 	struct compound_hdr hdr = {
>>>>> 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
>>>>> 	};
>>>>> +	uint32_t replen;
>>>>> 
>>>>> 	encode_compound_hdr(xdr, req, &hdr);
>>>>> 	encode_sequence(xdr, &args->seq_args, &hdr);
>>>>> +
>>>>> +	replen = hdr.replen + op_decode_hdr_maxsz;
>>>>> +
>>>>> 	encode_getdeviceinfo(xdr, args, &hdr);
>>>>> 
>>>>> -	/* set up reply kvec. Subtract notification bitmap max size (2)
>>>>> -	 * so that notification bitmap is put in xdr_buf tail */
>>>>> +	/* set up reply kvec. device_addr4 opaque data is read into the
>>>>> +	 * pages */
>>>>> 	rpc_prepare_reply_pages(req, args->pdev->pages, args->pdev->pgbase,
>>>>> -				args->pdev->pglen, hdr.replen - 2);
>>>>> +				args->pdev->pglen, replen + 2);
>>>>> 	encode_nops(&hdr);
>>>>> }
>>>>> 
>>>>> @@ -5848,7 +5852,9 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr,
>>>>> 	 * and places the remaining xdr data in xdr_buf->tail
>>>>> 	 */
>>>>> 	pdev->mincount = be32_to_cpup(p);
>>>>> -	if (xdr_read_pages(xdr, pdev->mincount) != pdev->mincount)
>>>>> +	/* Calculate padding */
>>>>> +	len = xdr_align_size(pdev->mincount);
>>>>> +	if (xdr_read_pages(xdr, len) != len)
>>>>> 		return -EIO;
>>>>> 
>>>>> 	/* Parse notification bitmap, verifying that it is zero. */
>>>>> --
>>>>> 2.28.0
>>>>> 
>>>>> 
>>>>> 
>>>>> --
>>>>> Trond Myklebust
>>>>> Linux NFS client maintainer, Hammerspace
> > > > > trond.myklebust@hammerspace.com

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2020-12-01 14:45 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-10 23:18 [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data channels trondmy
2020-11-10 23:18 ` [PATCH v3 01/11] SUNRPC: xprt_load_transport() needs to support the netid "rdma6" trondmy
2020-11-10 23:18   ` [PATCH v3 02/11] SUNRPC: Close a race with transport setup and module put trondmy
2020-11-10 23:18     ` [PATCH v3 03/11] SUNRPC: Add a helper to return the transport identifier given a netid trondmy
2020-11-10 23:18       ` [PATCH v3 04/11] NFS: Switch mount code to use xprt_find_transport_ident() trondmy
2020-11-10 23:19         ` [PATCH v3 05/11] SUNRPC: Remove unused function xprt_load_transport() trondmy
2020-11-10 23:19           ` [PATCH v3 06/11] NFSv4/pNFS: Use connections to a DS that are all of the same protocol family trondmy
2020-11-10 23:19             ` [PATCH v3 07/11] pNFS: Add helpers for allocation/free of struct nfs4_pnfs_ds_addr trondmy
2020-11-10 23:19               ` [PATCH v3 08/11] NFSv4/pNFS: Store the transport type in " trondmy
2020-11-10 23:19                 ` [PATCH v3 09/11] pNFS/flexfiles: Fix up layoutstats reporting for non-TCP transports trondmy
2020-11-10 23:19                   ` [PATCH v3 10/11] SUNRPC: Fix up open coded kmemdup_nul() trondmy
2020-11-10 23:19                     ` [PATCH v3 11/11] pNFS: Clean up open coded xdr string decoding trondmy
2020-11-10 23:42 ` [PATCH v3 00/11] Add RDMA support to the pNFS file+flexfiles data channels Trond Myklebust
2020-11-13 12:48   ` Mkrtchyan, Tigran
2020-11-13 21:30     ` Mkrtchyan, Tigran
2020-11-13 22:45       ` Trond Myklebust
2020-11-13 23:46         ` Mkrtchyan, Tigran
2020-11-14 14:29           ` Trond Myklebust
2020-11-16 20:55             ` Mkrtchyan, Tigran
2020-11-17 14:50               ` Mkrtchyan, Tigran
2020-11-26 17:17                 ` Mkrtchyan, Tigran
2020-12-01 10:59                   ` Mkrtchyan, Tigran
2020-12-01 14:44                     ` Mkrtchyan, Tigran

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).