Linux-NFS Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH 0/9] Client container fixes
@ 2019-04-24 21:46 Trond Myklebust
  2019-04-24 21:46 ` [PATCH 1/9] SUNRPC: Cache cred of process creating the rpc_client Trond Myklebust
  0 siblings, 1 reply; 19+ messages in thread
From: Trond Myklebust @ 2019-04-24 21:46 UTC (permalink / raw)
  To: Anna Schumaker; +Cc: linux-nfs

This patch set aims to fix up the NFS client when invoked from inside a
container. It aims to ensure that upcalls use the correct user namespace
when talking to the rpc.gssd and rpc.idmapd daemons.

Trond Myklebust (9):
  SUNRPC: Cache cred of process creating the rpc_client
  NFS: Store the credential of the mount process in the nfs_server
  SUNRPC: Use the client user namespace when encoding creds
  SUNRPC: Use namespace of listening daemon in the client AUTH_GSS
    upcall
  NFS: Convert NFSv3 to use the container user namespace
  NFSv4: Convert the NFS client idmapper to use the container user
    namespace
  NFS: Convert NFSv2 to use the container user namespace
  NFS: When mounting, don't share filesystems between different user
    namespaces
  lockd: Store the lockd client credential in struct nlm_host

 fs/lockd/clntlock.c            |   2 +-
 fs/lockd/host.c                |  10 ++-
 fs/lockd/mon.c                 |   1 +
 fs/nfs/client.c                |   7 ++
 fs/nfs/internal.h              |   1 +
 fs/nfs/mount_clnt.c            |   2 +
 fs/nfs/nfs2xdr.c               |  58 +++++++++-----
 fs/nfs/nfs3client.c            |   1 +
 fs/nfs/nfs3xdr.c               | 142 ++++++++++++++++++++-------------
 fs/nfs/nfs4client.c            |   6 ++
 fs/nfs/nfs4idmap.c             |  27 +++++--
 fs/nfs/super.c                 |  17 ++++
 fs/nfsd/nfs4callback.c         |   1 +
 include/linux/lockd/bind.h     |   1 +
 include/linux/lockd/lockd.h    |   4 +-
 include/linux/nfs_fs_sb.h      |   3 +
 include/linux/sunrpc/clnt.h    |   2 +
 net/sunrpc/auth_gss/auth_gss.c |  63 +++++++++++----
 net/sunrpc/auth_unix.c         |   9 ++-
 net/sunrpc/clnt.c              |   7 ++
 net/sunrpc/rpcb_clnt.c         |   9 ++-
 21 files changed, 267 insertions(+), 106 deletions(-)

-- 
2.21.0


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 1/9] SUNRPC: Cache cred of process creating the rpc_client
  2019-04-24 21:46 [PATCH 0/9] Client container fixes Trond Myklebust
@ 2019-04-24 21:46 ` Trond Myklebust
  2019-04-24 21:46   ` [PATCH 2/9] NFS: Store the credential of the mount process in the nfs_server Trond Myklebust
  2019-06-14 18:52   ` [PATCH 1/9] SUNRPC: Cache cred of process creating the rpc_client Ido Schimmel
  0 siblings, 2 replies; 19+ messages in thread
From: Trond Myklebust @ 2019-04-24 21:46 UTC (permalink / raw)
  To: Anna Schumaker; +Cc: linux-nfs

When converting kuids to AUTH_UNIX creds, etc we will want to use the
same user namespace as the process that created the rpc client.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/lockd/host.c             | 1 +
 fs/lockd/mon.c              | 1 +
 fs/nfs/client.c             | 1 +
 fs/nfs/mount_clnt.c         | 2 ++
 fs/nfsd/nfs4callback.c      | 1 +
 include/linux/sunrpc/clnt.h | 2 ++
 net/sunrpc/clnt.c           | 7 +++++++
 net/sunrpc/rpcb_clnt.c      | 9 +++++++--
 8 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index f0b5c987d6ae..d46081123f7c 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -458,6 +458,7 @@ nlm_bind_host(struct nlm_host *host)
 			.authflavor	= RPC_AUTH_UNIX,
 			.flags		= (RPC_CLNT_CREATE_NOPING |
 					   RPC_CLNT_CREATE_AUTOBIND),
+			.cred		= current_cred(),
 		};
 
 		/*
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 654594ef4f94..1eabd91870e6 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -82,6 +82,7 @@ static struct rpc_clnt *nsm_create(struct net *net, const char *nodename)
 		.version		= NSM_VERSION,
 		.authflavor		= RPC_AUTH_NULL,
 		.flags			= RPC_CLNT_CREATE_NOPING,
+		.cred			= current_cred(),
 	};
 
 	return rpc_create(&args);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 90d71fda65ce..3ce44d5088a2 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -500,6 +500,7 @@ int nfs_create_rpc_client(struct nfs_client *clp,
 		.program	= &nfs_program,
 		.version	= clp->rpc_ops->version,
 		.authflavor	= flavor,
+		.cred		= current_cred(),
 	};
 
 	if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags))
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index d979ff4fee7e..cb7c10e9721e 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -163,6 +163,7 @@ int nfs_mount(struct nfs_mount_request *info)
 		.program	= &mnt_program,
 		.version	= info->version,
 		.authflavor	= RPC_AUTH_UNIX,
+		.cred		= current_cred(),
 	};
 	struct rpc_clnt		*mnt_clnt;
 	int			status;
@@ -249,6 +250,7 @@ void nfs_umount(const struct nfs_mount_request *info)
 		.version	= info->version,
 		.authflavor	= RPC_AUTH_UNIX,
 		.flags		= RPC_CLNT_CREATE_NOPING,
+		.cred		= current_cred(),
 	};
 	struct rpc_message msg	= {
 		.rpc_argp	= info->dirpath,
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index d219159b98af..70f1cf9c76b4 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -868,6 +868,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
 		.program	= &cb_program,
 		.version	= 1,
 		.flags		= (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
+		.cred		= current_cred(),
 	};
 	struct rpc_clnt *client;
 	const struct cred *cred;
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 98bc9883b230..7cf616fac9f6 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -71,6 +71,7 @@ struct rpc_clnt {
 	struct dentry		*cl_debugfs;	/* debugfs directory */
 #endif
 	struct rpc_xprt_iter	cl_xpi;
+	const struct cred	*cl_cred;
 };
 
 /*
@@ -125,6 +126,7 @@ struct rpc_create_args {
 	unsigned long		flags;
 	char			*client_name;
 	struct svc_xprt		*bc_xprt;	/* NFSv4.1 backchannel */
+	const struct cred	*cred;
 };
 
 struct rpc_add_xprt_test {
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 8ff11dc98d7f..3d062db7baa1 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -394,6 +394,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
 	if (err)
 		goto out_no_clid;
 
+	clnt->cl_cred	  = get_cred(args->cred);
 	clnt->cl_procinfo = version->procs;
 	clnt->cl_maxproc  = version->nrprocs;
 	clnt->cl_prog     = args->prognumber ? : program->number;
@@ -439,6 +440,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
 out_no_path:
 	rpc_free_iostats(clnt->cl_metrics);
 out_no_stats:
+	put_cred(clnt->cl_cred);
 	rpc_free_clid(clnt);
 out_no_clid:
 	kfree(clnt);
@@ -627,6 +629,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
 	new->cl_discrtry = clnt->cl_discrtry;
 	new->cl_chatty = clnt->cl_chatty;
 	new->cl_principal = clnt->cl_principal;
+	new->cl_cred = get_cred(clnt->cl_cred);
 	return new;
 
 out_err:
@@ -648,6 +651,7 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *clnt)
 		.prognumber	= clnt->cl_prog,
 		.version	= clnt->cl_vers,
 		.authflavor	= clnt->cl_auth->au_flavor,
+		.cred		= clnt->cl_cred,
 	};
 	return __rpc_clone_client(&args, clnt);
 }
@@ -669,6 +673,7 @@ rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
 		.prognumber	= clnt->cl_prog,
 		.version	= clnt->cl_vers,
 		.authflavor	= flavor,
+		.cred		= clnt->cl_cred,
 	};
 	return __rpc_clone_client(&args, clnt);
 }
@@ -882,6 +887,7 @@ rpc_free_client(struct rpc_clnt *clnt)
 	xprt_put(rcu_dereference_raw(clnt->cl_xprt));
 	xprt_iter_destroy(&clnt->cl_xpi);
 	rpciod_down();
+	put_cred(clnt->cl_cred);
 	rpc_free_clid(clnt);
 	kfree(clnt);
 	return parent;
@@ -946,6 +952,7 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
 		.prognumber	= program->number,
 		.version	= vers,
 		.authflavor	= old->cl_auth->au_flavor,
+		.cred		= old->cl_cred,
 	};
 	struct rpc_clnt *clnt;
 	int err;
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 41a971ac1c63..5107fedb40f2 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -240,6 +240,7 @@ static int rpcb_create_local_unix(struct net *net)
 		.program	= &rpcb_program,
 		.version	= RPCBVERS_2,
 		.authflavor	= RPC_AUTH_NULL,
+		.cred		= current_cred(),
 		/*
 		 * We turn off the idle timeout to prevent the kernel
 		 * from automatically disconnecting the socket.
@@ -299,6 +300,7 @@ static int rpcb_create_local_net(struct net *net)
 		.program	= &rpcb_program,
 		.version	= RPCBVERS_2,
 		.authflavor	= RPC_AUTH_UNIX,
+		.cred		= current_cred(),
 		.flags		= RPC_CLNT_CREATE_NOPING,
 	};
 	struct rpc_clnt *clnt, *clnt4;
@@ -358,7 +360,8 @@ int rpcb_create_local(struct net *net)
 static struct rpc_clnt *rpcb_create(struct net *net, const char *nodename,
 				    const char *hostname,
 				    struct sockaddr *srvaddr, size_t salen,
-				    int proto, u32 version)
+				    int proto, u32 version,
+				    const struct cred *cred)
 {
 	struct rpc_create_args args = {
 		.net		= net,
@@ -370,6 +373,7 @@ static struct rpc_clnt *rpcb_create(struct net *net, const char *nodename,
 		.program	= &rpcb_program,
 		.version	= version,
 		.authflavor	= RPC_AUTH_UNIX,
+		.cred		= cred,
 		.flags		= (RPC_CLNT_CREATE_NOPING |
 					RPC_CLNT_CREATE_NONPRIVPORT),
 	};
@@ -744,7 +748,8 @@ void rpcb_getport_async(struct rpc_task *task)
 	rpcb_clnt = rpcb_create(xprt->xprt_net,
 				clnt->cl_nodename,
 				xprt->servername, sap, salen,
-				xprt->prot, bind_version);
+				xprt->prot, bind_version,
+				clnt->cl_cred);
 	if (IS_ERR(rpcb_clnt)) {
 		status = PTR_ERR(rpcb_clnt);
 		dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n",
-- 
2.21.0


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 2/9] NFS: Store the credential of the mount process in the nfs_server
  2019-04-24 21:46 ` [PATCH 1/9] SUNRPC: Cache cred of process creating the rpc_client Trond Myklebust
@ 2019-04-24 21:46   ` Trond Myklebust
  2019-04-24 21:46     ` [PATCH 3/9] SUNRPC: Use the client user namespace when encoding creds Trond Myklebust
  2019-06-14 18:52   ` [PATCH 1/9] SUNRPC: Cache cred of process creating the rpc_client Ido Schimmel
  1 sibling, 1 reply; 19+ messages in thread
From: Trond Myklebust @ 2019-04-24 21:46 UTC (permalink / raw)
  To: Anna Schumaker; +Cc: linux-nfs

Store the credential of the mount process so that we can determine
information such as the user namespace.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/client.c           | 8 +++++++-
 fs/nfs/internal.h         | 1 +
 fs/nfs/nfs3client.c       | 1 +
 fs/nfs/nfs4client.c       | 6 ++++++
 include/linux/nfs_fs_sb.h | 3 +++
 5 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 3ce44d5088a2..d1c003dd7e43 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -500,7 +500,7 @@ int nfs_create_rpc_client(struct nfs_client *clp,
 		.program	= &nfs_program,
 		.version	= clp->rpc_ops->version,
 		.authflavor	= flavor,
-		.cred		= current_cred(),
+		.cred		= cl_init->cred,
 	};
 
 	if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags))
@@ -653,6 +653,7 @@ static int nfs_init_server(struct nfs_server *server,
 		.proto = data->nfs_server.protocol,
 		.net = data->net,
 		.timeparms = &timeparms,
+		.cred = server->cred,
 	};
 	struct nfs_client *clp;
 	int error;
@@ -921,6 +922,7 @@ void nfs_free_server(struct nfs_server *server)
 	ida_destroy(&server->lockowner_id);
 	ida_destroy(&server->openowner_id);
 	nfs_free_iostats(server->io_stats);
+	put_cred(server->cred);
 	kfree(server);
 	nfs_release_automount_timer();
 }
@@ -941,6 +943,8 @@ struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info,
 	if (!server)
 		return ERR_PTR(-ENOMEM);
 
+	server->cred = get_cred(current_cred());
+
 	error = -ENOMEM;
 	fattr = nfs_alloc_fattr();
 	if (fattr == NULL)
@@ -1007,6 +1011,8 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
 	if (!server)
 		return ERR_PTR(-ENOMEM);
 
+	server->cred = get_cred(source->cred);
+
 	error = -ENOMEM;
 	fattr_fsinfo = nfs_alloc_fattr();
 	if (fattr_fsinfo == NULL)
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index c7cf23ae6597..22af82091421 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -84,6 +84,7 @@ struct nfs_client_initdata {
 	u32 minorversion;
 	struct net *net;
 	const struct rpc_timeout *timeparms;
+	const struct cred *cred;
 };
 
 /*
diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c
index 7879f2a0fcfd..1afdb0f7473f 100644
--- a/fs/nfs/nfs3client.c
+++ b/fs/nfs/nfs3client.c
@@ -91,6 +91,7 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
 		.proto = ds_proto,
 		.net = mds_clp->cl_net,
 		.timeparms = &ds_timeout,
+		.cred = mds_srv->cred,
 	};
 	struct nfs_client *clp;
 	char buf[INET6_ADDRSTRLEN + 1];
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 1339ede979af..3ce246346f02 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -870,6 +870,7 @@ static int nfs4_set_client(struct nfs_server *server,
 		.minorversion = minorversion,
 		.net = net,
 		.timeparms = timeparms,
+		.cred = server->cred,
 	};
 	struct nfs_client *clp;
 
@@ -931,6 +932,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
 		.minorversion = minor_version,
 		.net = mds_clp->cl_net,
 		.timeparms = &ds_timeout,
+		.cred = mds_srv->cred,
 	};
 	char buf[INET6_ADDRSTRLEN + 1];
 
@@ -1107,6 +1109,8 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info,
 	if (!server)
 		return ERR_PTR(-ENOMEM);
 
+	server->cred = get_cred(current_cred());
+
 	auth_probe = mount_info->parsed->auth_info.flavor_len < 1;
 
 	/* set up the general RPC client */
@@ -1143,6 +1147,8 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 	parent_server = NFS_SB(data->sb);
 	parent_client = parent_server->nfs_client;
 
+	server->cred = get_cred(parent_server->cred);
+
 	/* Initialise the client representation from the parent server */
 	nfs_server_copy_userdata(server, parent_server);
 
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index c827d31298cc..add00f12a340 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -231,6 +231,9 @@ struct nfs_server {
 
 	/* XDR related information */
 	unsigned int		read_hdrsize;
+
+	/* User namespace info */
+	const struct cred	*cred;
 };
 
 /* Server capabilities */
-- 
2.21.0


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 3/9] SUNRPC: Use the client user namespace when encoding creds
  2019-04-24 21:46   ` [PATCH 2/9] NFS: Store the credential of the mount process in the nfs_server Trond Myklebust
@ 2019-04-24 21:46     ` Trond Myklebust
  2019-04-24 21:46       ` [PATCH 4/9] SUNRPC: Use namespace of listening daemon in the client AUTH_GSS upcall Trond Myklebust
  0 siblings, 1 reply; 19+ messages in thread
From: Trond Myklebust @ 2019-04-24 21:46 UTC (permalink / raw)
  To: Anna Schumaker; +Cc: linux-nfs

When encoding AUTH_UNIX creds and AUTH_GSS upcalls, use the user namespace
of the process that created the rpc client.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 net/sunrpc/auth_gss/auth_gss.c | 11 ++++++++---
 net/sunrpc/auth_unix.c         |  9 +++++----
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 3fd56c0c90ae..e3601dc6c180 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -412,7 +412,10 @@ gss_upcall_callback(struct rpc_task *task)
 
 static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg)
 {
-	uid_t uid = from_kuid(&init_user_ns, gss_msg->uid);
+	struct user_namespace *userns = gss_msg->auth->client->cl_cred ?
+		gss_msg->auth->client->cl_cred->user_ns : &init_user_ns;
+
+	uid_t uid = from_kuid_munged(userns, gss_msg->uid);
 	memcpy(gss_msg->databuf, &uid, sizeof(uid));
 	gss_msg->msg.data = gss_msg->databuf;
 	gss_msg->msg.len = sizeof(uid);
@@ -424,13 +427,15 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
 				const char *service_name,
 				const char *target_name)
 {
+	struct user_namespace *userns = gss_msg->auth->client->cl_cred ?
+		gss_msg->auth->client->cl_cred->user_ns : &init_user_ns;
 	struct gss_api_mech *mech = gss_msg->auth->mech;
 	char *p = gss_msg->databuf;
 	size_t buflen = sizeof(gss_msg->databuf);
 	int len;
 
 	len = scnprintf(p, buflen, "mech=%s uid=%d", mech->gm_name,
-			from_kuid(&init_user_ns, gss_msg->uid));
+			from_kuid_munged(userns, gss_msg->uid));
 	buflen -= len;
 	p += len;
 	gss_msg->msg.len = len;
@@ -707,7 +712,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 		goto err;
 	}
 
-	uid = make_kuid(&init_user_ns, id);
+	uid = make_kuid(current_user_ns(), id);
 	if (!uid_valid(uid)) {
 		err = -EINVAL;
 		goto err;
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index d4018e5a24c5..e7df1f782b2e 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -107,6 +107,8 @@ unx_marshal(struct rpc_task *task, struct xdr_stream *xdr)
 	__be32		*p, *cred_len, *gidarr_len;
 	int		i;
 	struct group_info *gi = cred->cr_cred->group_info;
+	struct user_namespace *userns = clnt->cl_cred ?
+		clnt->cl_cred->user_ns : &init_user_ns;
 
 	/* Credential */
 
@@ -122,14 +124,13 @@ unx_marshal(struct rpc_task *task, struct xdr_stream *xdr)
 	p = xdr_reserve_space(xdr, 3 * sizeof(*p));
 	if (!p)
 		goto marshal_failed;
-	*p++ = cpu_to_be32(from_kuid(&init_user_ns, cred->cr_cred->fsuid));
-	*p++ = cpu_to_be32(from_kgid(&init_user_ns, cred->cr_cred->fsgid));
+	*p++ = cpu_to_be32(from_kuid_munged(userns, cred->cr_cred->fsuid));
+	*p++ = cpu_to_be32(from_kgid_munged(userns, cred->cr_cred->fsgid));
 
 	gidarr_len = p++;
 	if (gi)
 		for (i = 0; i < UNX_NGROUPS && i < gi->ngroups; i++)
-			*p++ = cpu_to_be32(from_kgid(&init_user_ns,
-						     gi->gid[i]));
+			*p++ = cpu_to_be32(from_kgid_munged(userns, gi->gid[i]));
 	*gidarr_len = cpu_to_be32(p - gidarr_len - 1);
 	*cred_len = cpu_to_be32((p - cred_len - 1) << 2);
 	p = xdr_reserve_space(xdr, (p - gidarr_len - 1) << 2);
-- 
2.21.0


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 4/9] SUNRPC: Use namespace of listening daemon in the client AUTH_GSS upcall
  2019-04-24 21:46     ` [PATCH 3/9] SUNRPC: Use the client user namespace when encoding creds Trond Myklebust
@ 2019-04-24 21:46       ` Trond Myklebust
  2019-04-24 21:46         ` [PATCH 5/9] NFS: Convert NFSv3 to use the container user namespace Trond Myklebust
  0 siblings, 1 reply; 19+ messages in thread
From: Trond Myklebust @ 2019-04-24 21:46 UTC (permalink / raw)
  To: Anna Schumaker; +Cc: linux-nfs

When the client needs to talk to rpc.gssd, we should ensure that the
uid argument is encoded to match the user namespace of the daemon.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 net/sunrpc/auth_gss/auth_gss.c | 60 +++++++++++++++++++++++++---------
 1 file changed, 44 insertions(+), 16 deletions(-)

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index e3601dc6c180..6baa8fd77c29 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -269,6 +269,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
 struct gss_upcall_msg {
 	refcount_t count;
 	kuid_t	uid;
+	const char *service_name;
 	struct rpc_pipe_msg msg;
 	struct list_head list;
 	struct gss_auth *auth;
@@ -316,6 +317,7 @@ gss_release_msg(struct gss_upcall_msg *gss_msg)
 		gss_put_ctx(gss_msg->ctx);
 	rpc_destroy_wait_queue(&gss_msg->rpc_waitqueue);
 	gss_put_auth(gss_msg->auth);
+	kfree_const(gss_msg->service_name);
 	kfree(gss_msg);
 }
 
@@ -410,10 +412,10 @@ gss_upcall_callback(struct rpc_task *task)
 	gss_release_msg(gss_msg);
 }
 
-static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg)
+static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg,
+			      const struct cred *cred)
 {
-	struct user_namespace *userns = gss_msg->auth->client->cl_cred ?
-		gss_msg->auth->client->cl_cred->user_ns : &init_user_ns;
+	struct user_namespace *userns = cred->user_ns;
 
 	uid_t uid = from_kuid_munged(userns, gss_msg->uid);
 	memcpy(gss_msg->databuf, &uid, sizeof(uid));
@@ -423,12 +425,24 @@ static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg)
 	BUILD_BUG_ON(sizeof(uid) > sizeof(gss_msg->databuf));
 }
 
+static ssize_t
+gss_v0_upcall(struct file *file, struct rpc_pipe_msg *msg,
+		char __user *buf, size_t buflen)
+{
+	struct gss_upcall_msg *gss_msg = container_of(msg,
+						      struct gss_upcall_msg,
+						      msg);
+	if (msg->copied == 0)
+		gss_encode_v0_msg(gss_msg, file->f_cred);
+	return rpc_pipe_generic_upcall(file, msg, buf, buflen);
+}
+
 static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
 				const char *service_name,
-				const char *target_name)
+				const char *target_name,
+				const struct cred *cred)
 {
-	struct user_namespace *userns = gss_msg->auth->client->cl_cred ?
-		gss_msg->auth->client->cl_cred->user_ns : &init_user_ns;
+	struct user_namespace *userns = cred->user_ns;
 	struct gss_api_mech *mech = gss_msg->auth->mech;
 	char *p = gss_msg->databuf;
 	size_t buflen = sizeof(gss_msg->databuf);
@@ -496,6 +510,25 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
 	return -ENOMEM;
 }
 
+static ssize_t
+gss_v1_upcall(struct file *file, struct rpc_pipe_msg *msg,
+		char __user *buf, size_t buflen)
+{
+	struct gss_upcall_msg *gss_msg = container_of(msg,
+						      struct gss_upcall_msg,
+						      msg);
+	int err;
+	if (msg->copied == 0) {
+		err = gss_encode_v1_msg(gss_msg,
+					gss_msg->service_name,
+					gss_msg->auth->target_name,
+					file->f_cred);
+		if (err)
+			return err;
+	}
+	return rpc_pipe_generic_upcall(file, msg, buf, buflen);
+}
+
 static struct gss_upcall_msg *
 gss_alloc_msg(struct gss_auth *gss_auth,
 		kuid_t uid, const char *service_name)
@@ -518,16 +551,11 @@ gss_alloc_msg(struct gss_auth *gss_auth,
 	refcount_set(&gss_msg->count, 1);
 	gss_msg->uid = uid;
 	gss_msg->auth = gss_auth;
-	switch (vers) {
-	case 0:
-		gss_encode_v0_msg(gss_msg);
-		break;
-	default:
-		err = gss_encode_v1_msg(gss_msg, service_name, gss_auth->target_name);
-		if (err)
+	if (service_name) {
+		gss_msg->service_name = kstrdup_const(service_name, GFP_NOFS);
+		if (!gss_msg->service_name)
 			goto err_put_pipe_version;
 	}
-	kref_get(&gss_auth->kref);
 	return gss_msg;
 err_put_pipe_version:
 	put_pipe_version(gss_auth->net);
@@ -2121,7 +2149,7 @@ static const struct rpc_credops gss_nullops = {
 };
 
 static const struct rpc_pipe_ops gss_upcall_ops_v0 = {
-	.upcall		= rpc_pipe_generic_upcall,
+	.upcall		= gss_v0_upcall,
 	.downcall	= gss_pipe_downcall,
 	.destroy_msg	= gss_pipe_destroy_msg,
 	.open_pipe	= gss_pipe_open_v0,
@@ -2129,7 +2157,7 @@ static const struct rpc_pipe_ops gss_upcall_ops_v0 = {
 };
 
 static const struct rpc_pipe_ops gss_upcall_ops_v1 = {
-	.upcall		= rpc_pipe_generic_upcall,
+	.upcall		= gss_v1_upcall,
 	.downcall	= gss_pipe_downcall,
 	.destroy_msg	= gss_pipe_destroy_msg,
 	.open_pipe	= gss_pipe_open_v1,
-- 
2.21.0


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 5/9] NFS: Convert NFSv3 to use the container user namespace
  2019-04-24 21:46       ` [PATCH 4/9] SUNRPC: Use namespace of listening daemon in the client AUTH_GSS upcall Trond Myklebust
@ 2019-04-24 21:46         ` Trond Myklebust
  2019-04-24 21:46           ` [PATCH 6/9] NFSv4: Convert the NFS client idmapper " Trond Myklebust
  0 siblings, 1 reply; 19+ messages in thread
From: Trond Myklebust @ 2019-04-24 21:46 UTC (permalink / raw)
  To: Anna Schumaker; +Cc: linux-nfs

When mapping NFS identities, we want to substitute for the uids and
gids on the wire as we would for the AUTH_UNIX creds.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs3xdr.c | 142 ++++++++++++++++++++++++++++-------------------
 1 file changed, 86 insertions(+), 56 deletions(-)

diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 110358f4986d..abbbdde97e31 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -104,6 +104,20 @@ static const umode_t nfs_type2fmt[] = {
 	[NF3FIFO] = S_IFIFO,
 };
 
+static struct user_namespace *rpc_userns(const struct rpc_clnt *clnt)
+{
+	if (clnt && clnt->cl_cred)
+		return clnt->cl_cred->user_ns;
+	return &init_user_ns;
+}
+
+static struct user_namespace *rpc_rqst_userns(const struct rpc_rqst *rqstp)
+{
+	if (rqstp->rq_task)
+		return rpc_userns(rqstp->rq_task->tk_client);
+	return &init_user_ns;
+}
+
 /*
  * Encode/decode NFSv3 basic data types
  *
@@ -516,7 +530,8 @@ static __be32 *xdr_decode_nfstime3(__be32 *p, struct timespec *timep)
  *		set_mtime	mtime;
  *	};
  */
-static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
+static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr,
+		struct user_namespace *userns)
 {
 	struct timespec ts;
 	u32 nbytes;
@@ -551,13 +566,13 @@ static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
 
 	if (attr->ia_valid & ATTR_UID) {
 		*p++ = xdr_one;
-		*p++ = cpu_to_be32(from_kuid(&init_user_ns, attr->ia_uid));
+		*p++ = cpu_to_be32(from_kuid_munged(userns, attr->ia_uid));
 	} else
 		*p++ = xdr_zero;
 
 	if (attr->ia_valid & ATTR_GID) {
 		*p++ = xdr_one;
-		*p++ = cpu_to_be32(from_kgid(&init_user_ns, attr->ia_gid));
+		*p++ = cpu_to_be32(from_kgid_munged(userns, attr->ia_gid));
 	} else
 		*p++ = xdr_zero;
 
@@ -606,7 +621,8 @@ static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
  *		nfstime3	ctime;
  *	};
  */
-static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
+static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr,
+		struct user_namespace *userns)
 {
 	umode_t fmode;
 	__be32 *p;
@@ -619,10 +635,10 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
 
 	fattr->mode = (be32_to_cpup(p++) & ~S_IFMT) | fmode;
 	fattr->nlink = be32_to_cpup(p++);
-	fattr->uid = make_kuid(&init_user_ns, be32_to_cpup(p++));
+	fattr->uid = make_kuid(userns, be32_to_cpup(p++));
 	if (!uid_valid(fattr->uid))
 		goto out_uid;
-	fattr->gid = make_kgid(&init_user_ns, be32_to_cpup(p++));
+	fattr->gid = make_kgid(userns, be32_to_cpup(p++));
 	if (!gid_valid(fattr->gid))
 		goto out_gid;
 
@@ -659,7 +675,8 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
  *		void;
  *	};
  */
-static int decode_post_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
+static int decode_post_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
+		struct user_namespace *userns)
 {
 	__be32 *p;
 
@@ -667,7 +684,7 @@ static int decode_post_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
 	if (unlikely(!p))
 		return -EIO;
 	if (*p != xdr_zero)
-		return decode_fattr3(xdr, fattr);
+		return decode_fattr3(xdr, fattr, userns);
 	return 0;
 }
 
@@ -728,14 +745,15 @@ static int decode_pre_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
 	return 0;
 }
 
-static int decode_wcc_data(struct xdr_stream *xdr, struct nfs_fattr *fattr)
+static int decode_wcc_data(struct xdr_stream *xdr, struct nfs_fattr *fattr,
+		struct user_namespace *userns)
 {
 	int error;
 
 	error = decode_pre_op_attr(xdr, fattr);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(xdr, fattr);
+	error = decode_post_op_attr(xdr, fattr, userns);
 out:
 	return error;
 }
@@ -837,7 +855,7 @@ static void nfs3_xdr_enc_setattr3args(struct rpc_rqst *req,
 {
 	const struct nfs3_sattrargs *args = data;
 	encode_nfs_fh3(xdr, args->fh);
-	encode_sattr3(xdr, args->sattr);
+	encode_sattr3(xdr, args->sattr, rpc_rqst_userns(req));
 	encode_sattrguard3(xdr, args);
 }
 
@@ -998,13 +1016,14 @@ static void nfs3_xdr_enc_write3args(struct rpc_rqst *req,
  *	};
  */
 static void encode_createhow3(struct xdr_stream *xdr,
-			      const struct nfs3_createargs *args)
+			      const struct nfs3_createargs *args,
+			      struct user_namespace *userns)
 {
 	encode_uint32(xdr, args->createmode);
 	switch (args->createmode) {
 	case NFS3_CREATE_UNCHECKED:
 	case NFS3_CREATE_GUARDED:
-		encode_sattr3(xdr, args->sattr);
+		encode_sattr3(xdr, args->sattr, userns);
 		break;
 	case NFS3_CREATE_EXCLUSIVE:
 		encode_createverf3(xdr, args->verifier);
@@ -1021,7 +1040,7 @@ static void nfs3_xdr_enc_create3args(struct rpc_rqst *req,
 	const struct nfs3_createargs *args = data;
 
 	encode_diropargs3(xdr, args->fh, args->name, args->len);
-	encode_createhow3(xdr, args);
+	encode_createhow3(xdr, args, rpc_rqst_userns(req));
 }
 
 /*
@@ -1039,7 +1058,7 @@ static void nfs3_xdr_enc_mkdir3args(struct rpc_rqst *req,
 	const struct nfs3_mkdirargs *args = data;
 
 	encode_diropargs3(xdr, args->fh, args->name, args->len);
-	encode_sattr3(xdr, args->sattr);
+	encode_sattr3(xdr, args->sattr, rpc_rqst_userns(req));
 }
 
 /*
@@ -1056,11 +1075,12 @@ static void nfs3_xdr_enc_mkdir3args(struct rpc_rqst *req,
  *	};
  */
 static void encode_symlinkdata3(struct xdr_stream *xdr,
-				const void *data)
+				const void *data,
+				struct user_namespace *userns)
 {
 	const struct nfs3_symlinkargs *args = data;
 
-	encode_sattr3(xdr, args->sattr);
+	encode_sattr3(xdr, args->sattr, userns);
 	encode_nfspath3(xdr, args->pages, args->pathlen);
 }
 
@@ -1071,7 +1091,7 @@ static void nfs3_xdr_enc_symlink3args(struct rpc_rqst *req,
 	const struct nfs3_symlinkargs *args = data;
 
 	encode_diropargs3(xdr, args->fromfh, args->fromname, args->fromlen);
-	encode_symlinkdata3(xdr, args);
+	encode_symlinkdata3(xdr, args, rpc_rqst_userns(req));
 	xdr->buf->flags |= XDRBUF_WRITE;
 }
 
@@ -1100,24 +1120,26 @@ static void nfs3_xdr_enc_symlink3args(struct rpc_rqst *req,
  *	};
  */
 static void encode_devicedata3(struct xdr_stream *xdr,
-			       const struct nfs3_mknodargs *args)
+			       const struct nfs3_mknodargs *args,
+			       struct user_namespace *userns)
 {
-	encode_sattr3(xdr, args->sattr);
+	encode_sattr3(xdr, args->sattr, userns);
 	encode_specdata3(xdr, args->rdev);
 }
 
 static void encode_mknoddata3(struct xdr_stream *xdr,
-			      const struct nfs3_mknodargs *args)
+			      const struct nfs3_mknodargs *args,
+			      struct user_namespace *userns)
 {
 	encode_ftype3(xdr, args->type);
 	switch (args->type) {
 	case NF3CHR:
 	case NF3BLK:
-		encode_devicedata3(xdr, args);
+		encode_devicedata3(xdr, args, userns);
 		break;
 	case NF3SOCK:
 	case NF3FIFO:
-		encode_sattr3(xdr, args->sattr);
+		encode_sattr3(xdr, args->sattr, userns);
 		break;
 	case NF3REG:
 	case NF3DIR:
@@ -1134,7 +1156,7 @@ static void nfs3_xdr_enc_mknod3args(struct rpc_rqst *req,
 	const struct nfs3_mknodargs *args = data;
 
 	encode_diropargs3(xdr, args->fh, args->name, args->len);
-	encode_mknoddata3(xdr, args);
+	encode_mknoddata3(xdr, args, rpc_rqst_userns(req));
 }
 
 /*
@@ -1379,7 +1401,7 @@ static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req,
 		goto out;
 	if (status != NFS3_OK)
 		goto out_default;
-	error = decode_fattr3(xdr, result);
+	error = decode_fattr3(xdr, result, rpc_rqst_userns(req));
 out:
 	return error;
 out_default:
@@ -1414,7 +1436,7 @@ static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req,
 	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_wcc_data(xdr, result);
+	error = decode_wcc_data(xdr, result, rpc_rqst_userns(req));
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
@@ -1449,6 +1471,7 @@ static int nfs3_xdr_dec_lookup3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
 				   void *data)
 {
+	struct user_namespace *userns = rpc_rqst_userns(req);
 	struct nfs3_diropres *result = data;
 	enum nfs_stat status;
 	int error;
@@ -1461,14 +1484,14 @@ static int nfs3_xdr_dec_lookup3res(struct rpc_rqst *req,
 	error = decode_nfs_fh3(xdr, result->fh);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(xdr, result->fattr);
+	error = decode_post_op_attr(xdr, result->fattr, userns);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(xdr, result->dir_attr);
+	error = decode_post_op_attr(xdr, result->dir_attr, userns);
 out:
 	return error;
 out_default:
-	error = decode_post_op_attr(xdr, result->dir_attr);
+	error = decode_post_op_attr(xdr, result->dir_attr, userns);
 	if (unlikely(error))
 		goto out;
 	return nfs3_stat_to_errno(status);
@@ -1504,7 +1527,7 @@ static int nfs3_xdr_dec_access3res(struct rpc_rqst *req,
 	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(xdr, result->fattr);
+	error = decode_post_op_attr(xdr, result->fattr, rpc_rqst_userns(req));
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
@@ -1545,7 +1568,7 @@ static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req,
 	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(xdr, result);
+	error = decode_post_op_attr(xdr, result, rpc_rqst_userns(req));
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
@@ -1623,7 +1646,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
 	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(xdr, result->fattr);
+	error = decode_post_op_attr(xdr, result->fattr, rpc_rqst_userns(req));
 	if (unlikely(error))
 		goto out;
 	result->op_status = status;
@@ -1694,7 +1717,7 @@ static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
 	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_wcc_data(xdr, result->fattr);
+	error = decode_wcc_data(xdr, result->fattr, rpc_rqst_userns(req));
 	if (unlikely(error))
 		goto out;
 	result->op_status = status;
@@ -1728,14 +1751,15 @@ static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
  *	};
  */
 static int decode_create3resok(struct xdr_stream *xdr,
-			       struct nfs3_diropres *result)
+			       struct nfs3_diropres *result,
+			       struct user_namespace *userns)
 {
 	int error;
 
 	error = decode_post_op_fh3(xdr, result->fh);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(xdr, result->fattr);
+	error = decode_post_op_attr(xdr, result->fattr, userns);
 	if (unlikely(error))
 		goto out;
 	/* The server isn't required to return a file handle.
@@ -1744,7 +1768,7 @@ static int decode_create3resok(struct xdr_stream *xdr,
 	 * values for the new object. */
 	if (result->fh->size == 0)
 		result->fattr->valid = 0;
-	error = decode_wcc_data(xdr, result->dir_attr);
+	error = decode_wcc_data(xdr, result->dir_attr, userns);
 out:
 	return error;
 }
@@ -1753,6 +1777,7 @@ static int nfs3_xdr_dec_create3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
 				   void *data)
 {
+	struct user_namespace *userns = rpc_rqst_userns(req);
 	struct nfs3_diropres *result = data;
 	enum nfs_stat status;
 	int error;
@@ -1762,11 +1787,11 @@ static int nfs3_xdr_dec_create3res(struct rpc_rqst *req,
 		goto out;
 	if (status != NFS3_OK)
 		goto out_default;
-	error = decode_create3resok(xdr, result);
+	error = decode_create3resok(xdr, result, userns);
 out:
 	return error;
 out_default:
-	error = decode_wcc_data(xdr, result->dir_attr);
+	error = decode_wcc_data(xdr, result->dir_attr, userns);
 	if (unlikely(error))
 		goto out;
 	return nfs3_stat_to_errno(status);
@@ -1801,7 +1826,7 @@ static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req,
 	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_wcc_data(xdr, result->dir_attr);
+	error = decode_wcc_data(xdr, result->dir_attr, rpc_rqst_userns(req));
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
@@ -1836,6 +1861,7 @@ static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
 				   void *data)
 {
+	struct user_namespace *userns = rpc_rqst_userns(req);
 	struct nfs_renameres *result = data;
 	enum nfs_stat status;
 	int error;
@@ -1843,10 +1869,10 @@ static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
 	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_wcc_data(xdr, result->old_fattr);
+	error = decode_wcc_data(xdr, result->old_fattr, userns);
 	if (unlikely(error))
 		goto out;
-	error = decode_wcc_data(xdr, result->new_fattr);
+	error = decode_wcc_data(xdr, result->new_fattr, userns);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
@@ -1880,6 +1906,7 @@ static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
 static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
 				 void *data)
 {
+	struct user_namespace *userns = rpc_rqst_userns(req);
 	struct nfs3_linkres *result = data;
 	enum nfs_stat status;
 	int error;
@@ -1887,10 +1914,10 @@ static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
 	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(xdr, result->fattr);
+	error = decode_post_op_attr(xdr, result->fattr, userns);
 	if (unlikely(error))
 		goto out;
-	error = decode_wcc_data(xdr, result->dir_attr);
+	error = decode_wcc_data(xdr, result->dir_attr, userns);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
@@ -1939,6 +1966,7 @@ static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
 int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 		       bool plus)
 {
+	struct user_namespace *userns = rpc_userns(entry->server->client);
 	struct nfs_entry old = *entry;
 	__be32 *p;
 	int error;
@@ -1973,7 +2001,7 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 
 	if (plus) {
 		entry->fattr->valid = 0;
-		error = decode_post_op_attr(xdr, entry->fattr);
+		error = decode_post_op_attr(xdr, entry->fattr, userns);
 		if (unlikely(error))
 			return error;
 		if (entry->fattr->valid & NFS_ATTR_FATTR_V3)
@@ -2045,11 +2073,12 @@ static int decode_dirlist3(struct xdr_stream *xdr)
 }
 
 static int decode_readdir3resok(struct xdr_stream *xdr,
-				struct nfs3_readdirres *result)
+				struct nfs3_readdirres *result,
+				struct user_namespace *userns)
 {
 	int error;
 
-	error = decode_post_op_attr(xdr, result->dir_attr);
+	error = decode_post_op_attr(xdr, result->dir_attr, userns);
 	if (unlikely(error))
 		goto out;
 	/* XXX: do we need to check if result->verf != NULL ? */
@@ -2074,11 +2103,11 @@ static int nfs3_xdr_dec_readdir3res(struct rpc_rqst *req,
 		goto out;
 	if (status != NFS3_OK)
 		goto out_default;
-	error = decode_readdir3resok(xdr, result);
+	error = decode_readdir3resok(xdr, result, rpc_rqst_userns(req));
 out:
 	return error;
 out_default:
-	error = decode_post_op_attr(xdr, result->dir_attr);
+	error = decode_post_op_attr(xdr, result->dir_attr, rpc_rqst_userns(req));
 	if (unlikely(error))
 		goto out;
 	return nfs3_stat_to_errno(status);
@@ -2138,7 +2167,7 @@ static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req,
 	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(xdr, result->fattr);
+	error = decode_post_op_attr(xdr, result->fattr, rpc_rqst_userns(req));
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
@@ -2212,7 +2241,7 @@ static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req,
 	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(xdr, result->fattr);
+	error = decode_post_op_attr(xdr, result->fattr, rpc_rqst_userns(req));
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
@@ -2273,7 +2302,7 @@ static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
 	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(xdr, result->fattr);
+	error = decode_post_op_attr(xdr, result->fattr, rpc_rqst_userns(req));
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
@@ -2315,7 +2344,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
 	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_wcc_data(xdr, result->fattr);
+	error = decode_wcc_data(xdr, result->fattr, rpc_rqst_userns(req));
 	if (unlikely(error))
 		goto out;
 	result->op_status = status;
@@ -2331,14 +2360,15 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
 #ifdef CONFIG_NFS_V3_ACL
 
 static inline int decode_getacl3resok(struct xdr_stream *xdr,
-				      struct nfs3_getaclres *result)
+				      struct nfs3_getaclres *result,
+				      struct user_namespace *userns)
 {
 	struct posix_acl **acl;
 	unsigned int *aclcnt;
 	size_t hdrlen;
 	int error;
 
-	error = decode_post_op_attr(xdr, result->fattr);
+	error = decode_post_op_attr(xdr, result->fattr, userns);
 	if (unlikely(error))
 		goto out;
 	error = decode_uint32(xdr, &result->mask);
@@ -2386,7 +2416,7 @@ static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req,
 		goto out;
 	if (status != NFS3_OK)
 		goto out_default;
-	error = decode_getacl3resok(xdr, result);
+	error = decode_getacl3resok(xdr, result, rpc_rqst_userns(req));
 out:
 	return error;
 out_default:
@@ -2405,7 +2435,7 @@ static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
 		goto out;
 	if (status != NFS3_OK)
 		goto out_default;
-	error = decode_post_op_attr(xdr, result);
+	error = decode_post_op_attr(xdr, result, rpc_rqst_userns(req));
 out:
 	return error;
 out_default:
-- 
2.21.0


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 6/9] NFSv4: Convert the NFS client idmapper to use the container user namespace
  2019-04-24 21:46         ` [PATCH 5/9] NFS: Convert NFSv3 to use the container user namespace Trond Myklebust
@ 2019-04-24 21:46           ` " Trond Myklebust
  2019-04-24 21:46             ` [PATCH 7/9] NFS: Convert NFSv2 " Trond Myklebust
  2019-04-25 14:32             ` [PATCH 6/9] NFSv4: Convert the NFS client idmapper to use the container user namespace bfields
  0 siblings, 2 replies; 19+ messages in thread
From: Trond Myklebust @ 2019-04-24 21:46 UTC (permalink / raw)
  To: Anna Schumaker; +Cc: linux-nfs

When mapping NFS identities using the NFSv4 idmapper, we want to substitute
for the uids and gids that would normally go on the wire as part of a
NFSv3 request. So we use the same mapping in the NFSv4 upcall as we
use in the NFSv3 RPC call (i.e. the mapping stored in the rpc_clnt cred).

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs4idmap.c | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index bf34ddaa2ad7..4884fdae28fb 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -69,8 +69,16 @@ struct idmap {
 	struct rpc_pipe		*idmap_pipe;
 	struct idmap_legacy_upcalldata *idmap_upcall_data;
 	struct mutex		idmap_mutex;
+	const struct cred	*cred;
 };
 
+static struct user_namespace *idmap_userns(const struct idmap *idmap)
+{
+	if (idmap && idmap->cred)
+		return idmap->cred->user_ns;
+	return &init_user_ns;
+}
+
 /**
  * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields
  * @fattr: fully initialised struct nfs_fattr
@@ -271,14 +279,15 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen,
 					 const char *type, struct idmap *idmap)
 {
 	char *desc;
-	struct key *rkey;
+	struct key *rkey = ERR_PTR(-EAGAIN);
 	ssize_t ret;
 
 	ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc);
 	if (ret < 0)
 		return ERR_PTR(ret);
 
-	rkey = request_key(&key_type_id_resolver, desc, "");
+	if (!idmap->cred || idmap->cred->user_ns == &init_user_ns)
+		rkey = request_key(&key_type_id_resolver, desc, "");
 	if (IS_ERR(rkey)) {
 		mutex_lock(&idmap->idmap_mutex);
 		rkey = request_key_with_auxdata(&key_type_id_resolver_legacy,
@@ -452,6 +461,9 @@ nfs_idmap_new(struct nfs_client *clp)
 	if (idmap == NULL)
 		return -ENOMEM;
 
+	mutex_init(&idmap->idmap_mutex);
+	idmap->cred = get_cred(clp->cl_rpcclient->cl_cred);
+
 	rpc_init_pipe_dir_object(&idmap->idmap_pdo,
 			&nfs_idmap_pipe_dir_object_ops,
 			idmap);
@@ -462,7 +474,6 @@ nfs_idmap_new(struct nfs_client *clp)
 		goto err;
 	}
 	idmap->idmap_pipe = pipe;
-	mutex_init(&idmap->idmap_mutex);
 
 	error = rpc_add_pipe_dir_object(clp->cl_net,
 			&clp->cl_rpcclient->cl_pipedir_objects,
@@ -475,6 +486,7 @@ nfs_idmap_new(struct nfs_client *clp)
 err_destroy_pipe:
 	rpc_destroy_pipe_data(idmap->idmap_pipe);
 err:
+	put_cred(idmap->cred);
 	kfree(idmap);
 	return error;
 }
@@ -491,6 +503,7 @@ nfs_idmap_delete(struct nfs_client *clp)
 			&clp->cl_rpcclient->cl_pipedir_objects,
 			&idmap->idmap_pdo);
 	rpc_destroy_pipe_data(idmap->idmap_pipe);
+	put_cred(idmap->cred);
 	kfree(idmap);
 }
 
@@ -735,7 +748,7 @@ int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_
 	if (!nfs_map_string_to_numeric(name, namelen, &id))
 		ret = nfs_idmap_lookup_id(name, namelen, "uid", &id, idmap);
 	if (ret == 0) {
-		*uid = make_kuid(&init_user_ns, id);
+		*uid = make_kuid(idmap_userns(idmap), id);
 		if (!uid_valid(*uid))
 			ret = -ERANGE;
 	}
@@ -752,7 +765,7 @@ int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size
 	if (!nfs_map_string_to_numeric(name, namelen, &id))
 		ret = nfs_idmap_lookup_id(name, namelen, "gid", &id, idmap);
 	if (ret == 0) {
-		*gid = make_kgid(&init_user_ns, id);
+		*gid = make_kgid(idmap_userns(idmap), id);
 		if (!gid_valid(*gid))
 			ret = -ERANGE;
 	}
@@ -766,7 +779,7 @@ int nfs_map_uid_to_name(const struct nfs_server *server, kuid_t uid, char *buf,
 	int ret = -EINVAL;
 	__u32 id;
 
-	id = from_kuid(&init_user_ns, uid);
+	id = from_kuid_munged(idmap_userns(idmap), uid);
 	if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
 		ret = nfs_idmap_lookup_name(id, "user", buf, buflen, idmap);
 	if (ret < 0)
@@ -780,7 +793,7 @@ int nfs_map_gid_to_group(const struct nfs_server *server, kgid_t gid, char *buf,
 	int ret = -EINVAL;
 	__u32 id;
 
-	id = from_kgid(&init_user_ns, gid);
+	id = from_kgid_munged(idmap_userns(idmap), gid);
 	if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
 		ret = nfs_idmap_lookup_name(id, "group", buf, buflen, idmap);
 	if (ret < 0)
-- 
2.21.0


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 7/9] NFS: Convert NFSv2 to use the container user namespace
  2019-04-24 21:46           ` [PATCH 6/9] NFSv4: Convert the NFS client idmapper " Trond Myklebust
@ 2019-04-24 21:46             ` " Trond Myklebust
  2019-04-24 21:46               ` [PATCH 8/9] NFS: When mounting, don't share filesystems between different user namespaces Trond Myklebust
  2019-04-25 14:32             ` [PATCH 6/9] NFSv4: Convert the NFS client idmapper to use the container user namespace bfields
  1 sibling, 1 reply; 19+ messages in thread
From: Trond Myklebust @ 2019-04-24 21:46 UTC (permalink / raw)
  To: Anna Schumaker; +Cc: linux-nfs

When mapping NFS identities, we want to substitute for the uids and
gids on the wire as we would for the AUTH_UNIX creds.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs2xdr.c | 58 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 39 insertions(+), 19 deletions(-)

diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index a7ed29de0a40..572794dab4b1 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -76,6 +76,20 @@ static int nfs_stat_to_errno(enum nfs_stat);
  * or decoded inline.
  */
 
+static struct user_namespace *rpc_userns(const struct rpc_clnt *clnt)
+{
+	if (clnt && clnt->cl_cred)
+		return clnt->cl_cred->user_ns;
+	return &init_user_ns;
+}
+
+static struct user_namespace *rpc_rqst_userns(const struct rpc_rqst *rqstp)
+{
+	if (rqstp->rq_task)
+		return rpc_userns(rqstp->rq_task->tk_client);
+	return &init_user_ns;
+}
+
 /*
  *	typedef opaque	nfsdata<>;
  */
@@ -248,7 +262,8 @@ static __be32 *xdr_decode_time(__be32 *p, struct timespec *timep)
  *	};
  *
  */
-static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
+static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
+		struct user_namespace *userns)
 {
 	u32 rdev, type;
 	__be32 *p;
@@ -263,10 +278,10 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
 
 	fattr->mode = be32_to_cpup(p++);
 	fattr->nlink = be32_to_cpup(p++);
-	fattr->uid = make_kuid(&init_user_ns, be32_to_cpup(p++));
+	fattr->uid = make_kuid(userns, be32_to_cpup(p++));
 	if (!uid_valid(fattr->uid))
 		goto out_uid;
-	fattr->gid = make_kgid(&init_user_ns, be32_to_cpup(p++));
+	fattr->gid = make_kgid(userns, be32_to_cpup(p++));
 	if (!gid_valid(fattr->gid))
 		goto out_gid;
 		
@@ -321,7 +336,8 @@ static __be32 *xdr_time_not_set(__be32 *p)
 	return p;
 }
 
-static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr)
+static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr,
+		struct user_namespace *userns)
 {
 	struct timespec ts;
 	__be32 *p;
@@ -333,11 +349,11 @@ static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr)
 	else
 		*p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
 	if (attr->ia_valid & ATTR_UID)
-		*p++ = cpu_to_be32(from_kuid(&init_user_ns, attr->ia_uid));
+		*p++ = cpu_to_be32(from_kuid_munged(userns, attr->ia_uid));
 	else
 		*p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
 	if (attr->ia_valid & ATTR_GID)
-		*p++ = cpu_to_be32(from_kgid(&init_user_ns, attr->ia_gid));
+		*p++ = cpu_to_be32(from_kgid_munged(userns, attr->ia_gid));
 	else
 		*p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
 	if (attr->ia_valid & ATTR_SIZE)
@@ -451,7 +467,8 @@ static int decode_path(struct xdr_stream *xdr)
  *	};
  */
 static int decode_attrstat(struct xdr_stream *xdr, struct nfs_fattr *result,
-			   __u32 *op_status)
+			   __u32 *op_status,
+			   struct user_namespace *userns)
 {
 	enum nfs_stat status;
 	int error;
@@ -463,7 +480,7 @@ static int decode_attrstat(struct xdr_stream *xdr, struct nfs_fattr *result,
 		*op_status = status;
 	if (status != NFS_OK)
 		goto out_default;
-	error = decode_fattr(xdr, result);
+	error = decode_fattr(xdr, result, userns);
 out:
 	return error;
 out_default:
@@ -498,19 +515,21 @@ static void encode_diropargs(struct xdr_stream *xdr, const struct nfs_fh *fh,
  *		void;
  *	};
  */
-static int decode_diropok(struct xdr_stream *xdr, struct nfs_diropok *result)
+static int decode_diropok(struct xdr_stream *xdr, struct nfs_diropok *result,
+		struct user_namespace *userns)
 {
 	int error;
 
 	error = decode_fhandle(xdr, result->fh);
 	if (unlikely(error))
 		goto out;
-	error = decode_fattr(xdr, result->fattr);
+	error = decode_fattr(xdr, result->fattr, userns);
 out:
 	return error;
 }
 
-static int decode_diropres(struct xdr_stream *xdr, struct nfs_diropok *result)
+static int decode_diropres(struct xdr_stream *xdr, struct nfs_diropok *result,
+		struct user_namespace *userns)
 {
 	enum nfs_stat status;
 	int error;
@@ -520,7 +539,7 @@ static int decode_diropres(struct xdr_stream *xdr, struct nfs_diropok *result)
 		goto out;
 	if (status != NFS_OK)
 		goto out_default;
-	error = decode_diropok(xdr, result);
+	error = decode_diropok(xdr, result, userns);
 out:
 	return error;
 out_default:
@@ -559,7 +578,7 @@ static void nfs2_xdr_enc_sattrargs(struct rpc_rqst *req,
 	const struct nfs_sattrargs *args = data;
 
 	encode_fhandle(xdr, args->fh);
-	encode_sattr(xdr, args->sattr);
+	encode_sattr(xdr, args->sattr, rpc_rqst_userns(req));
 }
 
 static void nfs2_xdr_enc_diropargs(struct rpc_rqst *req,
@@ -674,7 +693,7 @@ static void nfs2_xdr_enc_createargs(struct rpc_rqst *req,
 	const struct nfs_createargs *args = data;
 
 	encode_diropargs(xdr, args->fh, args->name, args->len);
-	encode_sattr(xdr, args->sattr);
+	encode_sattr(xdr, args->sattr, rpc_rqst_userns(req));
 }
 
 static void nfs2_xdr_enc_removeargs(struct rpc_rqst *req,
@@ -741,7 +760,7 @@ static void nfs2_xdr_enc_symlinkargs(struct rpc_rqst *req,
 
 	encode_diropargs(xdr, args->fromfh, args->fromname, args->fromlen);
 	encode_path(xdr, args->pages, args->pathlen);
-	encode_sattr(xdr, args->sattr);
+	encode_sattr(xdr, args->sattr, rpc_rqst_userns(req));
 }
 
 /*
@@ -803,13 +822,13 @@ static int nfs2_xdr_dec_stat(struct rpc_rqst *req, struct xdr_stream *xdr,
 static int nfs2_xdr_dec_attrstat(struct rpc_rqst *req, struct xdr_stream *xdr,
 				 void *result)
 {
-	return decode_attrstat(xdr, result, NULL);
+	return decode_attrstat(xdr, result, NULL, rpc_rqst_userns(req));
 }
 
 static int nfs2_xdr_dec_diropres(struct rpc_rqst *req, struct xdr_stream *xdr,
 				 void *result)
 {
-	return decode_diropres(xdr, result);
+	return decode_diropres(xdr, result, rpc_rqst_userns(req));
 }
 
 /*
@@ -864,7 +883,7 @@ static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr,
 	result->op_status = status;
 	if (status != NFS_OK)
 		goto out_default;
-	error = decode_fattr(xdr, result->fattr);
+	error = decode_fattr(xdr, result->fattr, rpc_rqst_userns(req));
 	if (unlikely(error))
 		goto out;
 	error = decode_nfsdata(xdr, result);
@@ -881,7 +900,8 @@ static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr,
 
 	/* All NFSv2 writes are "file sync" writes */
 	result->verf->committed = NFS_FILE_SYNC;
-	return decode_attrstat(xdr, result->fattr, &result->op_status);
+	return decode_attrstat(xdr, result->fattr, &result->op_status,
+			rpc_rqst_userns(req));
 }
 
 /**
-- 
2.21.0


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 8/9] NFS: When mounting, don't share filesystems between different user namespaces
  2019-04-24 21:46             ` [PATCH 7/9] NFS: Convert NFSv2 " Trond Myklebust
@ 2019-04-24 21:46               ` Trond Myklebust
  2019-04-24 21:46                 ` [PATCH 9/9] lockd: Store the lockd client credential in struct nlm_host Trond Myklebust
  0 siblings, 1 reply; 19+ messages in thread
From: Trond Myklebust @ 2019-04-24 21:46 UTC (permalink / raw)
  To: Anna Schumaker; +Cc: linux-nfs

If two different containers that share the same network namespace attempt
to mount the same filesystem, we should not allow them to share the same
super block if they do not share the same user namespace, since the
user mappings on the wire will need to differ.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/super.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index c27ac96a95bd..1730abc1e9ed 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2476,6 +2476,21 @@ static int nfs_compare_super_address(struct nfs_server *server1,
 	return 1;
 }
 
+static int nfs_compare_userns(const struct nfs_server *old,
+		const struct nfs_server *new)
+{
+	const struct user_namespace *oldns = &init_user_ns;
+	const struct user_namespace *newns = &init_user_ns;
+
+	if (old->client && old->client->cl_cred)
+		oldns = old->client->cl_cred->user_ns;
+	if (new->client && new->client->cl_cred)
+		newns = new->client->cl_cred->user_ns;
+	if (oldns != newns)
+		return 0;
+	return 1;
+}
+
 static int nfs_compare_super(struct super_block *sb, void *data)
 {
 	struct nfs_sb_mountdata *sb_mntdata = data;
@@ -2489,6 +2504,8 @@ static int nfs_compare_super(struct super_block *sb, void *data)
 		return 0;
 	if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0)
 		return 0;
+	if (!nfs_compare_userns(old, server))
+		return 0;
 	return nfs_compare_mount_options(sb, server, mntflags);
 }
 
-- 
2.21.0


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 9/9] lockd: Store the lockd client credential in struct nlm_host
  2019-04-24 21:46               ` [PATCH 8/9] NFS: When mounting, don't share filesystems between different user namespaces Trond Myklebust
@ 2019-04-24 21:46                 ` Trond Myklebust
  0 siblings, 0 replies; 19+ messages in thread
From: Trond Myklebust @ 2019-04-24 21:46 UTC (permalink / raw)
  To: Anna Schumaker; +Cc: linux-nfs

When we create a new lockd client, we want to be able to pass the
correct credential of the process that created the struct nlm_host.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/lockd/clntlock.c         |  2 +-
 fs/lockd/host.c             | 11 +++++++++--
 include/linux/lockd/bind.h  |  1 +
 include/linux/lockd/lockd.h |  4 +++-
 4 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index c2a128678e6e..70f520b41a19 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -63,7 +63,7 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init)
 	host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen,
 				   nlm_init->protocol, nlm_version,
 				   nlm_init->hostname, nlm_init->noresvport,
-				   nlm_init->net);
+				   nlm_init->net, nlm_init->cred);
 	if (host == NULL)
 		goto out_nohost;
 	if (host->h_rpcclnt == NULL && nlm_bind_host(host) == NULL)
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index d46081123f7c..7d46fafdbbe5 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -60,6 +60,7 @@ struct nlm_lookup_host_info {
 	const size_t		hostname_len;	/* it's length */
 	const int		noresvport;	/* use non-priv port */
 	struct net		*net;		/* network namespace to bind */
+	const struct cred	*cred;
 };
 
 /*
@@ -162,6 +163,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
 	host->h_nsmhandle  = nsm;
 	host->h_addrbuf    = nsm->sm_addrbuf;
 	host->net	   = ni->net;
+	host->h_cred	   = get_cred(ni->cred),
 	strlcpy(host->nodename, utsname()->nodename, sizeof(host->nodename));
 
 out:
@@ -188,6 +190,7 @@ static void nlm_destroy_host_locked(struct nlm_host *host)
 	clnt = host->h_rpcclnt;
 	if (clnt != NULL)
 		rpc_shutdown_client(clnt);
+	put_cred(host->h_cred);
 	kfree(host);
 
 	ln->nrhosts--;
@@ -202,6 +205,8 @@ static void nlm_destroy_host_locked(struct nlm_host *host)
  * @version: NLM protocol version
  * @hostname: '\0'-terminated hostname of server
  * @noresvport: 1 if non-privileged port should be used
+ * @net: pointer to net namespace
+ * @cred: pointer to cred
  *
  * Returns an nlm_host structure that matches the passed-in
  * [server address, transport protocol, NLM version, server hostname].
@@ -214,7 +219,8 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
 				     const u32 version,
 				     const char *hostname,
 				     int noresvport,
-				     struct net *net)
+				     struct net *net,
+				     const struct cred *cred)
 {
 	struct nlm_lookup_host_info ni = {
 		.server		= 0,
@@ -226,6 +232,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
 		.hostname_len	= strlen(hostname),
 		.noresvport	= noresvport,
 		.net		= net,
+		.cred		= cred,
 	};
 	struct hlist_head *chain;
 	struct nlm_host	*host;
@@ -458,7 +465,7 @@ nlm_bind_host(struct nlm_host *host)
 			.authflavor	= RPC_AUTH_UNIX,
 			.flags		= (RPC_CLNT_CREATE_NOPING |
 					   RPC_CLNT_CREATE_AUTOBIND),
-			.cred		= current_cred(),
+			.cred		= host->h_cred,
 		};
 
 		/*
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 053a4ef3d431..8c0cf1059443 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -46,6 +46,7 @@ struct nlmclnt_initdata {
 	int			noresvport;
 	struct net		*net;
 	const struct nlmclnt_operations	*nlmclnt_ops;
+	const struct cred	*cred;
 };
 
 /*
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index b065ef406770..c9b422dde542 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -70,6 +70,7 @@ struct nlm_host {
 	struct nsm_handle	*h_nsmhandle;	/* NSM status handle */
 	char			*h_addrbuf;	/* address eyecatcher */
 	struct net		*net;		/* host net */
+	const struct cred	*h_cred;
 	char			nodename[UNX_MAXNODENAME + 1];
 	const struct nlmclnt_operations	*h_nlmclnt_ops;	/* Callback ops for NLM users */
 };
@@ -229,7 +230,8 @@ struct nlm_host  *nlmclnt_lookup_host(const struct sockaddr *sap,
 					const u32 version,
 					const char *hostname,
 					int noresvport,
-					struct net *net);
+					struct net *net,
+					const struct cred *cred);
 void		  nlmclnt_release_host(struct nlm_host *);
 struct nlm_host  *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
 					const char *hostname,
-- 
2.21.0


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 6/9] NFSv4: Convert the NFS client idmapper to use the container user namespace
  2019-04-24 21:46           ` [PATCH 6/9] NFSv4: Convert the NFS client idmapper " Trond Myklebust
  2019-04-24 21:46             ` [PATCH 7/9] NFS: Convert NFSv2 " Trond Myklebust
@ 2019-04-25 14:32             ` bfields
  2019-04-25 15:00               ` Trond Myklebust
  1 sibling, 1 reply; 19+ messages in thread
From: bfields @ 2019-04-25 14:32 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: Anna Schumaker, linux-nfs

On Wed, Apr 24, 2019 at 05:46:47PM -0400, Trond Myklebust wrote:
> When mapping NFS identities using the NFSv4 idmapper, we want to substitute
> for the uids and gids that would normally go on the wire as part of a
> NFSv3 request. So we use the same mapping in the NFSv4 upcall as we
> use in the NFSv3 RPC call (i.e. the mapping stored in the rpc_clnt cred).

I'm a little lost.  Do I have it right that the following id's are all
the same?:

	- id's used on the wire
	- id's used in NFSv4 idmapper upcalls
	- id's used in the namespace of the mounting process

And is it assumed that those are all in one namespace?  So different
containers can't use different on-the-wire id's?

--b.

> 
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> ---
>  fs/nfs/nfs4idmap.c | 27 ++++++++++++++++++++-------
>  1 file changed, 20 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
> index bf34ddaa2ad7..4884fdae28fb 100644
> --- a/fs/nfs/nfs4idmap.c
> +++ b/fs/nfs/nfs4idmap.c
> @@ -69,8 +69,16 @@ struct idmap {
>  	struct rpc_pipe		*idmap_pipe;
>  	struct idmap_legacy_upcalldata *idmap_upcall_data;
>  	struct mutex		idmap_mutex;
> +	const struct cred	*cred;
>  };
>  
> +static struct user_namespace *idmap_userns(const struct idmap *idmap)
> +{
> +	if (idmap && idmap->cred)
> +		return idmap->cred->user_ns;
> +	return &init_user_ns;
> +}
> +
>  /**
>   * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields
>   * @fattr: fully initialised struct nfs_fattr
> @@ -271,14 +279,15 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen,
>  					 const char *type, struct idmap *idmap)
>  {
>  	char *desc;
> -	struct key *rkey;
> +	struct key *rkey = ERR_PTR(-EAGAIN);
>  	ssize_t ret;
>  
>  	ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc);
>  	if (ret < 0)
>  		return ERR_PTR(ret);
>  
> -	rkey = request_key(&key_type_id_resolver, desc, "");
> +	if (!idmap->cred || idmap->cred->user_ns == &init_user_ns)
> +		rkey = request_key(&key_type_id_resolver, desc, "");
>  	if (IS_ERR(rkey)) {
>  		mutex_lock(&idmap->idmap_mutex);
>  		rkey = request_key_with_auxdata(&key_type_id_resolver_legacy,
> @@ -452,6 +461,9 @@ nfs_idmap_new(struct nfs_client *clp)
>  	if (idmap == NULL)
>  		return -ENOMEM;
>  
> +	mutex_init(&idmap->idmap_mutex);
> +	idmap->cred = get_cred(clp->cl_rpcclient->cl_cred);
> +
>  	rpc_init_pipe_dir_object(&idmap->idmap_pdo,
>  			&nfs_idmap_pipe_dir_object_ops,
>  			idmap);
> @@ -462,7 +474,6 @@ nfs_idmap_new(struct nfs_client *clp)
>  		goto err;
>  	}
>  	idmap->idmap_pipe = pipe;
> -	mutex_init(&idmap->idmap_mutex);
>  
>  	error = rpc_add_pipe_dir_object(clp->cl_net,
>  			&clp->cl_rpcclient->cl_pipedir_objects,
> @@ -475,6 +486,7 @@ nfs_idmap_new(struct nfs_client *clp)
>  err_destroy_pipe:
>  	rpc_destroy_pipe_data(idmap->idmap_pipe);
>  err:
> +	put_cred(idmap->cred);
>  	kfree(idmap);
>  	return error;
>  }
> @@ -491,6 +503,7 @@ nfs_idmap_delete(struct nfs_client *clp)
>  			&clp->cl_rpcclient->cl_pipedir_objects,
>  			&idmap->idmap_pdo);
>  	rpc_destroy_pipe_data(idmap->idmap_pipe);
> +	put_cred(idmap->cred);
>  	kfree(idmap);
>  }
>  
> @@ -735,7 +748,7 @@ int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_
>  	if (!nfs_map_string_to_numeric(name, namelen, &id))
>  		ret = nfs_idmap_lookup_id(name, namelen, "uid", &id, idmap);
>  	if (ret == 0) {
> -		*uid = make_kuid(&init_user_ns, id);
> +		*uid = make_kuid(idmap_userns(idmap), id);
>  		if (!uid_valid(*uid))
>  			ret = -ERANGE;
>  	}
> @@ -752,7 +765,7 @@ int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size
>  	if (!nfs_map_string_to_numeric(name, namelen, &id))
>  		ret = nfs_idmap_lookup_id(name, namelen, "gid", &id, idmap);
>  	if (ret == 0) {
> -		*gid = make_kgid(&init_user_ns, id);
> +		*gid = make_kgid(idmap_userns(idmap), id);
>  		if (!gid_valid(*gid))
>  			ret = -ERANGE;
>  	}
> @@ -766,7 +779,7 @@ int nfs_map_uid_to_name(const struct nfs_server *server, kuid_t uid, char *buf,
>  	int ret = -EINVAL;
>  	__u32 id;
>  
> -	id = from_kuid(&init_user_ns, uid);
> +	id = from_kuid_munged(idmap_userns(idmap), uid);
>  	if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
>  		ret = nfs_idmap_lookup_name(id, "user", buf, buflen, idmap);
>  	if (ret < 0)
> @@ -780,7 +793,7 @@ int nfs_map_gid_to_group(const struct nfs_server *server, kgid_t gid, char *buf,
>  	int ret = -EINVAL;
>  	__u32 id;
>  
> -	id = from_kgid(&init_user_ns, gid);
> +	id = from_kgid_munged(idmap_userns(idmap), gid);
>  	if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
>  		ret = nfs_idmap_lookup_name(id, "group", buf, buflen, idmap);
>  	if (ret < 0)
> -- 
> 2.21.0

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 6/9] NFSv4: Convert the NFS client idmapper to use the container user namespace
  2019-04-25 14:32             ` [PATCH 6/9] NFSv4: Convert the NFS client idmapper to use the container user namespace bfields
@ 2019-04-25 15:00               ` Trond Myklebust
  2019-04-25 15:33                 ` bfields
  0 siblings, 1 reply; 19+ messages in thread
From: Trond Myklebust @ 2019-04-25 15:00 UTC (permalink / raw)
  To: bfields; +Cc: linux-nfs, Anna.Schumaker

On Thu, 2019-04-25 at 10:32 -0400, J. Bruce Fields wrote:
> On Wed, Apr 24, 2019 at 05:46:47PM -0400, Trond Myklebust wrote:
> > When mapping NFS identities using the NFSv4 idmapper, we want to
> > substitute
> > for the uids and gids that would normally go on the wire as part of
> > a
> > NFSv3 request. So we use the same mapping in the NFSv4 upcall as we
> > use in the NFSv3 RPC call (i.e. the mapping stored in the rpc_clnt
> > cred).
> 
> I'm a little lost.  Do I have it right that the following id's are
> all
> the same?:
> 
> 	- id's used on the wire
> 	- id's used in NFSv4 idmapper upcalls
> 	- id's used in the namespace of the mounting process
> 
> And is it assumed that those are all in one namespace?  So different
> containers can't use different on-the-wire id's?

The assumption is that if you have enough privileges to mount a
filesystem using the NFS client, then you would also have enough
privileges to run a userspace client, so there is little point in
restricting the NFS client.

So the guiding principle is that a NFS client mount that is started in
a container should behave as if it were started by a process in a "real
VM". That means that the root uid/gid in the container maps to a root
uid/gid on the wire.
Ditto, if there is a need to run the idmapper in the container, then
the expectation is that processes running as 'user' with uid 'u', will
see their creds mapped correctly by the idmapper. Again, that's what
you would see if the process were running in a VM instead of a
container.

Does that all make sense?

So this means that any orchestrator software which may be setting up
NFS mounts as part of setting up the container has 2 options:

   1. Either perform the mount outside the container namespaces, in which
      case the container process uids/gids are mapped from their
      user_namespace into the user_namespace of the orchestrator, and the
      uids/gids on the wire will reflect those mapped uids/gids (so uid 0
      in the container will be mapped to uid xxx where xxx is decided by
      the orchestrator).
   2. Perform the mount inside the container namespaces, in which case the
      container process uids/gids go on the wire as-is.

Cheers
  Trond

> 
> --b.
> 
> > Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> > ---
> >  fs/nfs/nfs4idmap.c | 27 ++++++++++++++++++++-------
> >  1 file changed, 20 insertions(+), 7 deletions(-)
> > 
> > diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
> > index bf34ddaa2ad7..4884fdae28fb 100644
> > --- a/fs/nfs/nfs4idmap.c
> > +++ b/fs/nfs/nfs4idmap.c
> > @@ -69,8 +69,16 @@ struct idmap {
> >  	struct rpc_pipe		*idmap_pipe;
> >  	struct idmap_legacy_upcalldata *idmap_upcall_data;
> >  	struct mutex		idmap_mutex;
> > +	const struct cred	*cred;
> >  };
> >  
> > +static struct user_namespace *idmap_userns(const struct idmap
> > *idmap)
> > +{
> > +	if (idmap && idmap->cred)
> > +		return idmap->cred->user_ns;
> > +	return &init_user_ns;
> > +}
> > +
> >  /**
> >   * nfs_fattr_init_names - initialise the nfs_fattr
> > owner_name/group_name fields
> >   * @fattr: fully initialised struct nfs_fattr
> > @@ -271,14 +279,15 @@ static struct key
> > *nfs_idmap_request_key(const char *name, size_t namelen,
> >  					 const char *type, struct idmap
> > *idmap)
> >  {
> >  	char *desc;
> > -	struct key *rkey;
> > +	struct key *rkey = ERR_PTR(-EAGAIN);
> >  	ssize_t ret;
> >  
> >  	ret = nfs_idmap_get_desc(name, namelen, type, strlen(type),
> > &desc);
> >  	if (ret < 0)
> >  		return ERR_PTR(ret);
> >  
> > -	rkey = request_key(&key_type_id_resolver, desc, "");
> > +	if (!idmap->cred || idmap->cred->user_ns == &init_user_ns)
> > +		rkey = request_key(&key_type_id_resolver, desc, "");
> >  	if (IS_ERR(rkey)) {
> >  		mutex_lock(&idmap->idmap_mutex);
> >  		rkey =
> > request_key_with_auxdata(&key_type_id_resolver_legacy,
> > @@ -452,6 +461,9 @@ nfs_idmap_new(struct nfs_client *clp)
> >  	if (idmap == NULL)
> >  		return -ENOMEM;
> >  
> > +	mutex_init(&idmap->idmap_mutex);
> > +	idmap->cred = get_cred(clp->cl_rpcclient->cl_cred);
> > +
> >  	rpc_init_pipe_dir_object(&idmap->idmap_pdo,
> >  			&nfs_idmap_pipe_dir_object_ops,
> >  			idmap);
> > @@ -462,7 +474,6 @@ nfs_idmap_new(struct nfs_client *clp)
> >  		goto err;
> >  	}
> >  	idmap->idmap_pipe = pipe;
> > -	mutex_init(&idmap->idmap_mutex);
> >  
> >  	error = rpc_add_pipe_dir_object(clp->cl_net,
> >  			&clp->cl_rpcclient->cl_pipedir_objects,
> > @@ -475,6 +486,7 @@ nfs_idmap_new(struct nfs_client *clp)
> >  err_destroy_pipe:
> >  	rpc_destroy_pipe_data(idmap->idmap_pipe);
> >  err:
> > +	put_cred(idmap->cred);
> >  	kfree(idmap);
> >  	return error;
> >  }
> > @@ -491,6 +503,7 @@ nfs_idmap_delete(struct nfs_client *clp)
> >  			&clp->cl_rpcclient->cl_pipedir_objects,
> >  			&idmap->idmap_pdo);
> >  	rpc_destroy_pipe_data(idmap->idmap_pipe);
> > +	put_cred(idmap->cred);
> >  	kfree(idmap);
> >  }
> >  
> > @@ -735,7 +748,7 @@ int nfs_map_name_to_uid(const struct nfs_server
> > *server, const char *name, size_
> >  	if (!nfs_map_string_to_numeric(name, namelen, &id))
> >  		ret = nfs_idmap_lookup_id(name, namelen, "uid", &id,
> > idmap);
> >  	if (ret == 0) {
> > -		*uid = make_kuid(&init_user_ns, id);
> > +		*uid = make_kuid(idmap_userns(idmap), id);
> >  		if (!uid_valid(*uid))
> >  			ret = -ERANGE;
> >  	}
> > @@ -752,7 +765,7 @@ int nfs_map_group_to_gid(const struct
> > nfs_server *server, const char *name, size
> >  	if (!nfs_map_string_to_numeric(name, namelen, &id))
> >  		ret = nfs_idmap_lookup_id(name, namelen, "gid", &id,
> > idmap);
> >  	if (ret == 0) {
> > -		*gid = make_kgid(&init_user_ns, id);
> > +		*gid = make_kgid(idmap_userns(idmap), id);
> >  		if (!gid_valid(*gid))
> >  			ret = -ERANGE;
> >  	}
> > @@ -766,7 +779,7 @@ int nfs_map_uid_to_name(const struct nfs_server
> > *server, kuid_t uid, char *buf,
> >  	int ret = -EINVAL;
> >  	__u32 id;
> >  
> > -	id = from_kuid(&init_user_ns, uid);
> > +	id = from_kuid_munged(idmap_userns(idmap), uid);
> >  	if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
> >  		ret = nfs_idmap_lookup_name(id, "user", buf, buflen,
> > idmap);
> >  	if (ret < 0)
> > @@ -780,7 +793,7 @@ int nfs_map_gid_to_group(const struct
> > nfs_server *server, kgid_t gid, char *buf,
> >  	int ret = -EINVAL;
> >  	__u32 id;
> >  
> > -	id = from_kgid(&init_user_ns, gid);
> > +	id = from_kgid_munged(idmap_userns(idmap), gid);
> >  	if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
> >  		ret = nfs_idmap_lookup_name(id, "group", buf, buflen,
> > idmap);
> >  	if (ret < 0)
> > -- 
> > 2.21.0
-- 
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trond.myklebust@hammerspace.com



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 6/9] NFSv4: Convert the NFS client idmapper to use the container user namespace
  2019-04-25 15:00               ` Trond Myklebust
@ 2019-04-25 15:33                 ` bfields
  2019-04-25 16:40                   ` Trond Myklebust
  0 siblings, 1 reply; 19+ messages in thread
From: bfields @ 2019-04-25 15:33 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs, Anna.Schumaker

On Thu, Apr 25, 2019 at 03:00:22PM +0000, Trond Myklebust wrote:
> The assumption is that if you have enough privileges to mount a
> filesystem using the NFS client, then you would also have enough
> privileges to run a userspace client, so there is little point in
> restricting the NFS client.
> 
> So the guiding principle is that a NFS client mount that is started in
> a container should behave as if it were started by a process in a "real
> VM". That means that the root uid/gid in the container maps to a root
> uid/gid on the wire.
> Ditto, if there is a need to run the idmapper in the container, then
> the expectation is that processes running as 'user' with uid 'u', will
> see their creds mapped correctly by the idmapper. Again, that's what
> you would see if the process were running in a VM instead of a
> container.
> 
> Does that all make sense?

Yes, thanks!

I thought there was a problem that the idmapper depended on
keyring usermodehelper calls that it was hard to pass namespace
information to.  Did that get fixed and I missed it or or forgot?

> 
> So this means that any orchestrator software which may be setting up
> NFS mounts as part of setting up the container has 2 options:
> 
>    1. Either perform the mount outside the container namespaces, in which
>       case the container process uids/gids are mapped from their
>       user_namespace into the user_namespace of the orchestrator, and the
>       uids/gids on the wire will reflect those mapped uids/gids (so uid 0
>       in the container will be mapped to uid xxx where xxx is decided by
>       the orchestrator).
>    2. Perform the mount inside the container namespaces, in which case the
>       container process uids/gids go on the wire as-is.

OK, great, that sounds perfect.

--b.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 6/9] NFSv4: Convert the NFS client idmapper to use the container user namespace
  2019-04-25 15:33                 ` bfields
@ 2019-04-25 16:40                   ` Trond Myklebust
  2019-04-25 16:45                     ` bfields
  0 siblings, 1 reply; 19+ messages in thread
From: Trond Myklebust @ 2019-04-25 16:40 UTC (permalink / raw)
  To: bfields; +Cc: linux-nfs, Anna.Schumaker

On Thu, 2019-04-25 at 11:33 -0400, bfields@fieldses.org wrote:
> On Thu, Apr 25, 2019 at 03:00:22PM +0000, Trond Myklebust wrote:
> > The assumption is that if you have enough privileges to mount a
> > filesystem using the NFS client, then you would also have enough
> > privileges to run a userspace client, so there is little point in
> > restricting the NFS client.
> > 
> > So the guiding principle is that a NFS client mount that is started
> > in
> > a container should behave as if it were started by a process in a
> > "real
> > VM". That means that the root uid/gid in the container maps to a
> > root
> > uid/gid on the wire.
> > Ditto, if there is a need to run the idmapper in the container,
> > then
> > the expectation is that processes running as 'user' with uid 'u',
> > will
> > see their creds mapped correctly by the idmapper. Again, that's
> > what
> > you would see if the process were running in a VM instead of a
> > container.
> > 
> > Does that all make sense?
> 
> Yes, thanks!
> 
> I thought there was a problem that the idmapper depended on
> keyring usermodehelper calls that it was hard to pass namespace
> information to.  Did that get fixed and I missed it or or forgot?

No, you are correct, and so we assume the container is using rpc.idmapd
(and rpc.gssd) if it is running NFSv4 with RPCSEC_GSS.

If the keyring upcall gets fixed, then we may allow it to be used in
future kernels.

> > So this means that any orchestrator software which may be setting
> > up
> > NFS mounts as part of setting up the container has 2 options:
> > 
> >    1. Either perform the mount outside the container namespaces, in
> > which
> >       case the container process uids/gids are mapped from their
> >       user_namespace into the user_namespace of the orchestrator,
> > and the
> >       uids/gids on the wire will reflect those mapped uids/gids (so
> > uid 0
> >       in the container will be mapped to uid xxx where xxx is
> > decided by
> >       the orchestrator).
> >    2. Perform the mount inside the container namespaces, in which
> > case the
> >       container process uids/gids go on the wire as-is.
> 
> OK, great, that sounds perfect.
> 
> --b.
-- 
Trond Myklebust
CTO, Hammerspace Inc
4300 El Camino Real, Suite 105
Los Altos, CA 94022
www.hammer.space



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 6/9] NFSv4: Convert the NFS client idmapper to use the container user namespace
  2019-04-25 16:40                   ` Trond Myklebust
@ 2019-04-25 16:45                     ` bfields
  2019-04-25 16:48                       ` Trond Myklebust
  0 siblings, 1 reply; 19+ messages in thread
From: bfields @ 2019-04-25 16:45 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs, Anna.Schumaker

On Thu, Apr 25, 2019 at 04:40:18PM +0000, Trond Myklebust wrote:
> On Thu, 2019-04-25 at 11:33 -0400, bfields@fieldses.org wrote:
> > On Thu, Apr 25, 2019 at 03:00:22PM +0000, Trond Myklebust wrote:
> > > The assumption is that if you have enough privileges to mount a
> > > filesystem using the NFS client, then you would also have enough
> > > privileges to run a userspace client, so there is little point in
> > > restricting the NFS client.
> > > 
> > > So the guiding principle is that a NFS client mount that is started
> > > in
> > > a container should behave as if it were started by a process in a
> > > "real
> > > VM". That means that the root uid/gid in the container maps to a
> > > root
> > > uid/gid on the wire.
> > > Ditto, if there is a need to run the idmapper in the container,
> > > then
> > > the expectation is that processes running as 'user' with uid 'u',
> > > will
> > > see their creds mapped correctly by the idmapper. Again, that's
> > > what
> > > you would see if the process were running in a VM instead of a
> > > container.
> > > 
> > > Does that all make sense?
> > 
> > Yes, thanks!
> > 
> > I thought there was a problem that the idmapper depended on
> > keyring usermodehelper calls that it was hard to pass namespace
> > information to.  Did that get fixed and I missed it or or forgot?
> 
> No, you are correct, and so we assume the container is using rpc.idmapd
> (and rpc.gssd) if it is running NFSv4 with RPCSEC_GSS.
> 
> If the keyring upcall gets fixed, then we may allow it to be used in
> future kernels.

OK, got it.  Is there anything we lose by using idmapd?  (IDMAP_NAMESZ
might be a limitation?)

--b.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 6/9] NFSv4: Convert the NFS client idmapper to use the container user namespace
  2019-04-25 16:45                     ` bfields
@ 2019-04-25 16:48                       ` Trond Myklebust
  2019-04-25 20:16                         ` bfields
  0 siblings, 1 reply; 19+ messages in thread
From: Trond Myklebust @ 2019-04-25 16:48 UTC (permalink / raw)
  To: bfields; +Cc: linux-nfs, Anna.Schumaker

On Thu, 2019-04-25 at 12:45 -0400, bfields@fieldses.org wrote:
> On Thu, Apr 25, 2019 at 04:40:18PM +0000, Trond Myklebust wrote:
> > On Thu, 2019-04-25 at 11:33 -0400, bfields@fieldses.org wrote:
> > > On Thu, Apr 25, 2019 at 03:00:22PM +0000, Trond Myklebust wrote:
> > > > The assumption is that if you have enough privileges to mount a
> > > > filesystem using the NFS client, then you would also have
> > > > enough
> > > > privileges to run a userspace client, so there is little point
> > > > in
> > > > restricting the NFS client.
> > > > 
> > > > So the guiding principle is that a NFS client mount that is
> > > > started
> > > > in
> > > > a container should behave as if it were started by a process in
> > > > a
> > > > "real
> > > > VM". That means that the root uid/gid in the container maps to
> > > > a
> > > > root
> > > > uid/gid on the wire.
> > > > Ditto, if there is a need to run the idmapper in the container,
> > > > then
> > > > the expectation is that processes running as 'user' with uid
> > > > 'u',
> > > > will
> > > > see their creds mapped correctly by the idmapper. Again, that's
> > > > what
> > > > you would see if the process were running in a VM instead of a
> > > > container.
> > > > 
> > > > Does that all make sense?
> > > 
> > > Yes, thanks!
> > > 
> > > I thought there was a problem that the idmapper depended on
> > > keyring usermodehelper calls that it was hard to pass namespace
> > > information to.  Did that get fixed and I missed it or or forgot?
> > 
> > No, you are correct, and so we assume the container is using
> > rpc.idmapd
> > (and rpc.gssd) if it is running NFSv4 with RPCSEC_GSS.
> > 
> > If the keyring upcall gets fixed, then we may allow it to be used
> > in
> > future kernels.
> 
> OK, got it.  Is there anything we lose by using
> idmapd?  (IDMAP_NAMESZ
> might be a limitation?)

IDMAP_NAMESZ should be set to 128 by default, so should work for most
cases. I don't think there are any further limitations.

-- 
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trond.myklebust@hammerspace.com



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 6/9] NFSv4: Convert the NFS client idmapper to use the container user namespace
  2019-04-25 16:48                       ` Trond Myklebust
@ 2019-04-25 20:16                         ` bfields
  0 siblings, 0 replies; 19+ messages in thread
From: bfields @ 2019-04-25 20:16 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs, Anna.Schumaker

On Thu, Apr 25, 2019 at 04:48:54PM +0000, Trond Myklebust wrote:
> On Thu, 2019-04-25 at 12:45 -0400, bfields@fieldses.org wrote:
> > On Thu, Apr 25, 2019 at 04:40:18PM +0000, Trond Myklebust wrote:
> > > On Thu, 2019-04-25 at 11:33 -0400, bfields@fieldses.org wrote:
> > > > On Thu, Apr 25, 2019 at 03:00:22PM +0000, Trond Myklebust wrote:
> > > > > The assumption is that if you have enough privileges to mount a
> > > > > filesystem using the NFS client, then you would also have
> > > > > enough
> > > > > privileges to run a userspace client, so there is little point
> > > > > in
> > > > > restricting the NFS client.
> > > > > 
> > > > > So the guiding principle is that a NFS client mount that is
> > > > > started
> > > > > in
> > > > > a container should behave as if it were started by a process in
> > > > > a
> > > > > "real
> > > > > VM". That means that the root uid/gid in the container maps to
> > > > > a
> > > > > root
> > > > > uid/gid on the wire.
> > > > > Ditto, if there is a need to run the idmapper in the container,
> > > > > then
> > > > > the expectation is that processes running as 'user' with uid
> > > > > 'u',
> > > > > will
> > > > > see their creds mapped correctly by the idmapper. Again, that's
> > > > > what
> > > > > you would see if the process were running in a VM instead of a
> > > > > container.
> > > > > 
> > > > > Does that all make sense?
> > > > 
> > > > Yes, thanks!
> > > > 
> > > > I thought there was a problem that the idmapper depended on
> > > > keyring usermodehelper calls that it was hard to pass namespace
> > > > information to.  Did that get fixed and I missed it or or forgot?
> > > 
> > > No, you are correct, and so we assume the container is using
> > > rpc.idmapd
> > > (and rpc.gssd) if it is running NFSv4 with RPCSEC_GSS.
> > > 
> > > If the keyring upcall gets fixed, then we may allow it to be used
> > > in
> > > future kernels.
> > 
> > OK, got it.  Is there anything we lose by using
> > idmapd?  (IDMAP_NAMESZ
> > might be a limitation?)
> 
> IDMAP_NAMESZ should be set to 128 by default, so should work for most
> cases. I don't think there are any further limitations.

Makes sense.  Looks good to me!

--b.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/9] SUNRPC: Cache cred of process creating the rpc_client
  2019-04-24 21:46 ` [PATCH 1/9] SUNRPC: Cache cred of process creating the rpc_client Trond Myklebust
  2019-04-24 21:46   ` [PATCH 2/9] NFS: Store the credential of the mount process in the nfs_server Trond Myklebust
@ 2019-06-14 18:52   ` Ido Schimmel
  2019-06-20 12:33     ` Ido Schimmel
  1 sibling, 1 reply; 19+ messages in thread
From: Ido Schimmel @ 2019-06-14 18:52 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: Anna Schumaker, linux-nfs

On Wed, Apr 24, 2019 at 05:46:42PM -0400, Trond Myklebust wrote:
> When converting kuids to AUTH_UNIX creds, etc we will want to use the
> same user namespace as the process that created the rpc client.
> 
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>

Hi,

Since upgrading to v5.2-rc1 I started encountering these memory leaks
[1]. Bisection using this reproducer [2] points to this patch. Attached
the full bisection log [3].

Please let me know if more information is required.

Thanks

[1]
unreferenced object 0xffffa35dfaea3000 (size 192):
  comm "mount", pid 428, jiffies 4294703475 (age 7.578s)
  hex dump (first 32 bytes):
    03 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<00000000b43bed74>] prepare_exec_creds+0x6/0x40
    [<00000000422eb980>] __do_execve_file.isra.56+0x124/0x930
    [<00000000aa848639>] __x64_sys_execve+0x2f/0x40
    [<000000008d5c43e1>] do_syscall_64+0x43/0xf0
    [<0000000068b03b0e>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
    [<000000001fab781f>] 0xffffffffffffffff

[2]
#!/bin/bash

umount /mnt/share155
mount -t nfs <nfs-server>:/images/share /mnt/share155
echo scan > /sys/kernel/debug/kmemleak
cat /sys/kernel/debug/kmemleak

[3]
git bisect start
# good: [e93c9c99a629c61837d5a7fc2120cd2b6c70dbdd] Linux 5.1
git bisect good e93c9c99a629c61837d5a7fc2120cd2b6c70dbdd
# bad: [a188339ca5a396acc588e5851ed7e19f66b0ebd9] Linux 5.2-rc1
git bisect bad a188339ca5a396acc588e5851ed7e19f66b0ebd9
# good: [2646719a48c21ba0cae82a3f57382a9573fd8400] Merge tag 'kbuild-v5.2' of git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild
git bisect good 2646719a48c21ba0cae82a3f57382a9573fd8400
# bad: [b970afcfcabd63cd3832e95db096439c177c3592] Merge tag 'powerpc-5.2-1' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
git bisect bad b970afcfcabd63cd3832e95db096439c177c3592
# good: [eb85d03e01c3e9f3b0ba7282b2e3515a635decb2] Merge tag 'drm-misc-next-fixes-2019-05-08' of git://anongit.freedesktop.org/drm/drm-misc into drm-next
git bisect good eb85d03e01c3e9f3b0ba7282b2e3515a635decb2
# good: [dce45af5c2e9e85f22578f2f8065f225f5d11764] Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
git bisect good dce45af5c2e9e85f22578f2f8065f225f5d11764
# bad: [8e4ff713ce313dcabbb60e6ede1ffc193e67631f] Merge tag 'rtc-5.2' of git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux
git bisect bad 8e4ff713ce313dcabbb60e6ede1ffc193e67631f
# bad: [45182e4e1f8ac04708ca7508c51d9103f07d81ab] Merge branch 'i2c/for-5.2' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux
git bisect bad 45182e4e1f8ac04708ca7508c51d9103f07d81ab
# bad: [5940d1cf9f42f67e9cc3f7df9eda39f5888d6e9e] SUNRPC: Rebalance a kref in auth_gss.c
git bisect bad 5940d1cf9f42f67e9cc3f7df9eda39f5888d6e9e
# good: [23146500b32fbee7eaa57c5002fcd64e5d9b32ca] xprtrdma: Clean up rpcrdma_create_rep() and rpcrdma_destroy_rep()
git bisect good 23146500b32fbee7eaa57c5002fcd64e5d9b32ca
# good: [2cfd11f16f01c0ee8f83bb07027c9d2f43565473] xprtrdma: Remove stale comment
git bisect good 2cfd11f16f01c0ee8f83bb07027c9d2f43565473
# bad: [b422df915cef80333d7a1732e6ed81f41db12b79] lockd: Store the lockd client credential in struct nlm_host
git bisect bad b422df915cef80333d7a1732e6ed81f41db12b79
# bad: [ac83228a7101e655ba5a7fa61ae10b058ada15db] SUNRPC: Use namespace of listening daemon in the client AUTH_GSS upcall
git bisect bad ac83228a7101e655ba5a7fa61ae10b058ada15db
# bad: [1a58e8a0e5c1f188a80eb9e505bc77d78a31a4ec] NFS: Store the credential of the mount process in the nfs_server
git bisect bad 1a58e8a0e5c1f188a80eb9e505bc77d78a31a4ec
# bad: [79caa5fad47c69874f9efc4ac3128cc3f6d36f6e] SUNRPC: Cache cred of process creating the rpc_client
git bisect bad 79caa5fad47c69874f9efc4ac3128cc3f6d36f6e
# first bad commit: [79caa5fad47c69874f9efc4ac3128cc3f6d36f6e] SUNRPC: Cache cred of process creating the rpc_client

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/9] SUNRPC: Cache cred of process creating the rpc_client
  2019-06-14 18:52   ` [PATCH 1/9] SUNRPC: Cache cred of process creating the rpc_client Ido Schimmel
@ 2019-06-20 12:33     ` Ido Schimmel
  0 siblings, 0 replies; 19+ messages in thread
From: Ido Schimmel @ 2019-06-20 12:33 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: Anna Schumaker, linux-nfs

On Fri, Jun 14, 2019 at 09:52:37PM +0300, Ido Schimmel wrote:
> On Wed, Apr 24, 2019 at 05:46:42PM -0400, Trond Myklebust wrote:
> > When converting kuids to AUTH_UNIX creds, etc we will want to use the
> > same user namespace as the process that created the rpc client.
> > 
> > Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> 
> Hi,
> 
> Since upgrading to v5.2-rc1 I started encountering these memory leaks
> [1]. Bisection using this reproducer [2] points to this patch. Attached
> the full bisection log [3].
> 
> Please let me know if more information is required.

Ping?

> 
> Thanks
> 
> [1]
> unreferenced object 0xffffa35dfaea3000 (size 192):
>   comm "mount", pid 428, jiffies 4294703475 (age 7.578s)
>   hex dump (first 32 bytes):
>     03 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
>     00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
>   backtrace:
>     [<00000000b43bed74>] prepare_exec_creds+0x6/0x40
>     [<00000000422eb980>] __do_execve_file.isra.56+0x124/0x930
>     [<00000000aa848639>] __x64_sys_execve+0x2f/0x40
>     [<000000008d5c43e1>] do_syscall_64+0x43/0xf0
>     [<0000000068b03b0e>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
>     [<000000001fab781f>] 0xffffffffffffffff
> 
> [2]
> #!/bin/bash
> 
> umount /mnt/share155
> mount -t nfs <nfs-server>:/images/share /mnt/share155
> echo scan > /sys/kernel/debug/kmemleak
> cat /sys/kernel/debug/kmemleak
> 
> [3]
> git bisect start
> # good: [e93c9c99a629c61837d5a7fc2120cd2b6c70dbdd] Linux 5.1
> git bisect good e93c9c99a629c61837d5a7fc2120cd2b6c70dbdd
> # bad: [a188339ca5a396acc588e5851ed7e19f66b0ebd9] Linux 5.2-rc1
> git bisect bad a188339ca5a396acc588e5851ed7e19f66b0ebd9
> # good: [2646719a48c21ba0cae82a3f57382a9573fd8400] Merge tag 'kbuild-v5.2' of git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild
> git bisect good 2646719a48c21ba0cae82a3f57382a9573fd8400
> # bad: [b970afcfcabd63cd3832e95db096439c177c3592] Merge tag 'powerpc-5.2-1' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
> git bisect bad b970afcfcabd63cd3832e95db096439c177c3592
> # good: [eb85d03e01c3e9f3b0ba7282b2e3515a635decb2] Merge tag 'drm-misc-next-fixes-2019-05-08' of git://anongit.freedesktop.org/drm/drm-misc into drm-next
> git bisect good eb85d03e01c3e9f3b0ba7282b2e3515a635decb2
> # good: [dce45af5c2e9e85f22578f2f8065f225f5d11764] Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
> git bisect good dce45af5c2e9e85f22578f2f8065f225f5d11764
> # bad: [8e4ff713ce313dcabbb60e6ede1ffc193e67631f] Merge tag 'rtc-5.2' of git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux
> git bisect bad 8e4ff713ce313dcabbb60e6ede1ffc193e67631f
> # bad: [45182e4e1f8ac04708ca7508c51d9103f07d81ab] Merge branch 'i2c/for-5.2' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux
> git bisect bad 45182e4e1f8ac04708ca7508c51d9103f07d81ab
> # bad: [5940d1cf9f42f67e9cc3f7df9eda39f5888d6e9e] SUNRPC: Rebalance a kref in auth_gss.c
> git bisect bad 5940d1cf9f42f67e9cc3f7df9eda39f5888d6e9e
> # good: [23146500b32fbee7eaa57c5002fcd64e5d9b32ca] xprtrdma: Clean up rpcrdma_create_rep() and rpcrdma_destroy_rep()
> git bisect good 23146500b32fbee7eaa57c5002fcd64e5d9b32ca
> # good: [2cfd11f16f01c0ee8f83bb07027c9d2f43565473] xprtrdma: Remove stale comment
> git bisect good 2cfd11f16f01c0ee8f83bb07027c9d2f43565473
> # bad: [b422df915cef80333d7a1732e6ed81f41db12b79] lockd: Store the lockd client credential in struct nlm_host
> git bisect bad b422df915cef80333d7a1732e6ed81f41db12b79
> # bad: [ac83228a7101e655ba5a7fa61ae10b058ada15db] SUNRPC: Use namespace of listening daemon in the client AUTH_GSS upcall
> git bisect bad ac83228a7101e655ba5a7fa61ae10b058ada15db
> # bad: [1a58e8a0e5c1f188a80eb9e505bc77d78a31a4ec] NFS: Store the credential of the mount process in the nfs_server
> git bisect bad 1a58e8a0e5c1f188a80eb9e505bc77d78a31a4ec
> # bad: [79caa5fad47c69874f9efc4ac3128cc3f6d36f6e] SUNRPC: Cache cred of process creating the rpc_client
> git bisect bad 79caa5fad47c69874f9efc4ac3128cc3f6d36f6e
> # first bad commit: [79caa5fad47c69874f9efc4ac3128cc3f6d36f6e] SUNRPC: Cache cred of process creating the rpc_client

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, back to index

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-24 21:46 [PATCH 0/9] Client container fixes Trond Myklebust
2019-04-24 21:46 ` [PATCH 1/9] SUNRPC: Cache cred of process creating the rpc_client Trond Myklebust
2019-04-24 21:46   ` [PATCH 2/9] NFS: Store the credential of the mount process in the nfs_server Trond Myklebust
2019-04-24 21:46     ` [PATCH 3/9] SUNRPC: Use the client user namespace when encoding creds Trond Myklebust
2019-04-24 21:46       ` [PATCH 4/9] SUNRPC: Use namespace of listening daemon in the client AUTH_GSS upcall Trond Myklebust
2019-04-24 21:46         ` [PATCH 5/9] NFS: Convert NFSv3 to use the container user namespace Trond Myklebust
2019-04-24 21:46           ` [PATCH 6/9] NFSv4: Convert the NFS client idmapper " Trond Myklebust
2019-04-24 21:46             ` [PATCH 7/9] NFS: Convert NFSv2 " Trond Myklebust
2019-04-24 21:46               ` [PATCH 8/9] NFS: When mounting, don't share filesystems between different user namespaces Trond Myklebust
2019-04-24 21:46                 ` [PATCH 9/9] lockd: Store the lockd client credential in struct nlm_host Trond Myklebust
2019-04-25 14:32             ` [PATCH 6/9] NFSv4: Convert the NFS client idmapper to use the container user namespace bfields
2019-04-25 15:00               ` Trond Myklebust
2019-04-25 15:33                 ` bfields
2019-04-25 16:40                   ` Trond Myklebust
2019-04-25 16:45                     ` bfields
2019-04-25 16:48                       ` Trond Myklebust
2019-04-25 20:16                         ` bfields
2019-06-14 18:52   ` [PATCH 1/9] SUNRPC: Cache cred of process creating the rpc_client Ido Schimmel
2019-06-20 12:33     ` Ido Schimmel

Linux-NFS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-nfs/0 linux-nfs/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-nfs linux-nfs/ https://lore.kernel.org/linux-nfs \
		linux-nfs@vger.kernel.org linux-nfs@archiver.kernel.org
	public-inbox-index linux-nfs

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-nfs


AGPL code for this site: git clone https://public-inbox.org/ public-inbox