[RFC][PATCH] Improve NFS use of network and mount namespaces

* [RFC][PATCH] Improve NFS use of network and mount namespaces
@ 2009-05-12 21:51 ` Matt Helsley
  0 siblings, 0 replies; 18+ messages in thread
From: Matt Helsley @ 2009-05-12 21:51 UTC (permalink / raw)
  To: Containers, linux-nfs-u79uwXL29TY76Z2rM5mHXA; +Cc: Eric Biederman


Sun RPC currently opens sockets from the initial network namespace making it
impossible to restrict which NFS servers a container may interact with.

For example, the NFS server at 10.0.0.3 reachable from the initial namespace
will always be used even if an entirely different server with the address
10.0.0.3 is reachable from a container's network namespace. Hence network
namespaces cannot be used to restrict the network access of a container as long
as the RPC code opens sockets using the initial network namespace. This is
in stark contrast to other protocols like HTTP where the sockets are created in
their proper namespaces because kernel threads are not used to open sockets for
client network IO.

We may plausibly end up with namespaces created by:
I) The administrator may mount 10.0.0.3:/export_foo from init's
container, clone the mount namespace, and unmount from the original
mount namespace.

II) The administrator may start a task which clones the mount namespace
before mounting 10.0.0.3:/export_foo.

Proposed Solution:

The network namespace of the task that did the mount best defines which server
the "administrator", whether in a container or not, expects to work with.
When the mount is done inside a container then that is the network namespace 
to use. When the mount is done prior to creating the container then that's the 
namespace that should be used.

This allows system administrators to isolate network traffic generated by NFS
clients by mounting after creating a container. If partial isolation is desired
then the administrator may mount before creating a container with a new network
namespace. In each case the RPC packets would originate from a consistent
namespace.

One way to ensure consistent namespace usage would be to hold a reference to
the original network namespace as long as the mount exists. This naturally 
suggests storing the network namespace reference in the NFS superblock. 
However, it may be better to store it with the RPC transport itself since
it is directly responsible for (re)opening the sockets.

This patch adds a reference to the network namespace to the RPC
transport. When the NFS export is mounted the network namespace of
the current task establishes which namespace to reference. That
reference is stored in the RPC transport and used to open sockets
whenever a new socket is required.

Signed-off-by: Matt Helsley <matthltc-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
 fs/nfs/client.c             |    5 ++++-
 include/linux/net.h         |    2 ++
 include/linux/sunrpc/clnt.h |    1 +
 include/linux/sunrpc/xprt.h |    1 +
 net/socket.c                |    5 +++++
 net/sunrpc/clnt.c           |    1 +
 net/sunrpc/xprtsock.c       |   26 ++++++++++++++++++++++----
 7 files changed, 36 insertions(+), 5 deletions(-)

Index: linux-2.6.29/fs/nfs/client.c
===================================================================

--- linux-2.6.29.orig/fs/nfs/client.c
+++ linux-2.6.29/fs/nfs/client.c
@@ -10,11 +10,11 @@
  */
 
 
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/nsproxy.h>
 #include <linux/time.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/string.h>
 #include <linux/stat.h>
@@ -564,10 +564,11 @@ static int nfs_create_rpc_client(struct 
 	struct rpc_clnt		*clnt = NULL;
 	struct rpc_create_args args = {
 		.protocol	= clp->cl_proto,
 		.address	= (struct sockaddr *)&clp->cl_addr,
 		.addrsize	= clp->cl_addrlen,
+		.net_ns		= current->nsproxy->net_ns,
 		.timeout	= timeparms,
 		.servername	= clp->cl_hostname,
 		.program	= &nfs_program,
 		.version	= clp->rpc_ops->version,
 		.authflavor	= flavor,
@@ -579,12 +580,14 @@ static int nfs_create_rpc_client(struct 
 		args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
 
 	if (!IS_ERR(clp->cl_rpcclient))
 		return 0;
 
+	get_net(current->nsproxy->net_ns);
 	clnt = rpc_create(&args);
 	if (IS_ERR(clnt)) {
+		put_net(current->nsproxy->net_ns);
 		dprintk("%s: cannot create RPC client. Error = %ld\n",
 				__func__, PTR_ERR(clnt));
 		return PTR_ERR(clnt);
 	}
 
Index: linux-2.6.29/include/linux/net.h
===================================================================
--- linux-2.6.29.orig/include/linux/net.h
+++ linux-2.6.29/include/linux/net.h
@@ -210,10 +210,12 @@ extern int	     sock_register(const stru
 extern void	     sock_unregister(int family);
 extern int	     sock_create(int family, int type, int proto,
 				 struct socket **res);
 extern int	     sock_create_kern(int family, int type, int proto,
 				      struct socket **res);
+extern int	     net_sock_create_kern(struct net *net, int family, int type,
+					  int proto, struct socket **res);
 extern int	     sock_create_lite(int family, int type, int proto,
 				      struct socket **res); 
 extern void	     sock_release(struct socket *sock);
 extern int   	     sock_sendmsg(struct socket *sock, struct msghdr *msg,
 				  size_t len);
Index: linux-2.6.29/include/linux/sunrpc/clnt.h
===================================================================
--- linux-2.6.29.orig/include/linux/sunrpc/clnt.h
+++ linux-2.6.29/include/linux/sunrpc/clnt.h
@@ -100,10 +100,11 @@ struct rpc_procinfo {
 struct rpc_create_args {
 	int			protocol;
 	struct sockaddr		*address;
 	size_t			addrsize;
 	struct sockaddr		*saddress;
+	struct net 		*net_ns;
 	const struct rpc_timeout *timeout;
 	char			*servername;
 	struct rpc_program	*program;
 	u32			prognumber;	/* overrides program->number */
 	u32			version;
Index: linux-2.6.29/include/linux/sunrpc/xprt.h
===================================================================
--- linux-2.6.29.orig/include/linux/sunrpc/xprt.h
+++ linux-2.6.29/include/linux/sunrpc/xprt.h
@@ -194,10 +194,11 @@ struct rpc_xprt {
 
 struct xprt_create {
 	int			ident;		/* XPRT_TRANSPORT identifier */
 	struct sockaddr *	srcaddr;	/* optional local address */
 	struct sockaddr *	dstaddr;	/* remote peer address */
+	struct net *		net_ns;		/* net namespace */
 	size_t			addrlen;
 };
 
 struct xprt_class {
 	struct list_head	list;
Index: linux-2.6.29/net/socket.c
===================================================================
--- linux-2.6.29.orig/net/socket.c
+++ linux-2.6.29/net/socket.c
@@ -1212,10 +1212,15 @@ int sock_create(int family, int type, in
 int sock_create_kern(int family, int type, int protocol, struct socket **res)
 {
 	return __sock_create(&init_net, family, type, protocol, res, 1);
 }
 
+int net_sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
+{
+	return __sock_create(net, family, type, protocol, res, 1);
+}
+
 SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
 {
 	int retval;
 	struct socket *sock;
 	int flags;
Index: linux-2.6.29/net/sunrpc/clnt.c
===================================================================
--- linux-2.6.29.orig/net/sunrpc/clnt.c
+++ linux-2.6.29/net/sunrpc/clnt.c
@@ -263,10 +263,11 @@ struct rpc_clnt *rpc_create(struct rpc_c
 	struct rpc_clnt *clnt;
 	struct xprt_create xprtargs = {
 		.ident = args->protocol,
 		.srcaddr = args->saddress,
 		.dstaddr = args->address,
+		.net_ns  = args->net_ns,
 		.addrlen = args->addrsize,
 	};
 	char servername[48];
 
 	/*
Index: linux-2.6.29/net/sunrpc/xprtsock.c
===================================================================
--- linux-2.6.29.orig/net/sunrpc/xprtsock.c
+++ linux-2.6.29/net/sunrpc/xprtsock.c
@@ -234,10 +234,11 @@ struct sock_xprt {
 	 * Connection of transports
 	 */
 	struct delayed_work	connect_worker;
 	struct sockaddr_storage	addr;
 	unsigned short		port;
+	struct net		*net_ns;
 
 	/*
 	 * UDP socket buffer size parameters
 	 */
 	size_t			rcvsize,
@@ -819,10 +820,11 @@ static void xs_destroy(struct rpc_xprt *
 	cancel_rearming_delayed_work(&transport->connect_worker);
 
 	xs_close(xprt);
 	xs_free_peer_addresses(xprt);
 	kfree(xprt->slot);
+ 	put_net(transport->net_ns);
 	kfree(xprt);
 	module_put(THIS_MODULE);
 }
 
 static inline struct rpc_xprt *xprt_from_sock(struct sock *sk)
@@ -1537,11 +1539,13 @@ static void xs_udp_connect_worker4(struc
 		goto out;
 
 	/* Start by resetting any existing state */
 	xs_close(xprt);
 
-	if ((err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
+ 	err = net_sock_create_kern(transport->net_ns, PF_INET, SOCK_DGRAM,
+ 				   IPPROTO_UDP, &sock);
+	if (err < 0) {
 		dprintk("RPC:       can't create UDP transport socket (%d).\n", -err);
 		goto out;
 	}
 	xs_reclassify_socket4(sock);
 
@@ -1578,11 +1582,13 @@ static void xs_udp_connect_worker6(struc
 		goto out;
 
 	/* Start by resetting any existing state */
 	xs_close(xprt);
 
-	if ((err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
+ 	err = net_sock_create_kern(transport->net_ns, PF_INET6, SOCK_DGRAM,
+ 				   IPPROTO_UDP, &sock);
+	if (err < 0) {
 		dprintk("RPC:       can't create UDP transport socket (%d).\n", -err);
 		goto out;
 	}
 	xs_reclassify_socket6(sock);
 
@@ -1684,11 +1690,13 @@ static void xs_tcp_connect_worker4(struc
 	if (xprt->shutdown)
 		goto out;
 
 	if (!sock) {
 		/* start from scratch */
-		if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
+		err = net_sock_create_kern(transport->net_ns, PF_INET,
+					   SOCK_STREAM, IPPROTO_TCP, &sock);
+		if (err < 0) {
 			dprintk("RPC:       can't create TCP transport socket (%d).\n", -err);
 			goto out;
 		}
 		xs_reclassify_socket4(sock);
 
@@ -1744,11 +1752,13 @@ static void xs_tcp_connect_worker6(struc
 	if (xprt->shutdown)
 		goto out;
 
 	if (!sock) {
 		/* start from scratch */
-		if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
+		err = net_sock_create_kern(transport->net_ns, PF_INET6,
+					   SOCK_STREAM, IPPROTO_TCP, &sock);
+		if (err < 0) {
 			dprintk("RPC:       can't create TCP transport socket (%d).\n", -err);
 			goto out;
 		}
 		xs_reclassify_socket6(sock);
 
@@ -1988,10 +1998,14 @@ static struct rpc_xprt *xs_setup_udp(str
 
 	xprt->ops = &xs_udp_ops;
 
 	xprt->timeout = &xs_udp_default_timeout;
 
+	if (args->net_ns)
+		transport->net_ns = args->net_ns;
+	else
+		transport->net_ns = &init_net;
 	switch (addr->sa_family) {
 	case AF_INET:
 		if (((struct sockaddr_in *)addr)->sin_port != htons(0))
 			xprt_set_bound(xprt);
 
@@ -2055,10 +2069,14 @@ static struct rpc_xprt *xs_setup_tcp(str
 	xprt->idle_timeout = XS_IDLE_DISC_TO;
 
 	xprt->ops = &xs_tcp_ops;
 	xprt->timeout = &xs_tcp_default_timeout;
 
+	if (args->net_ns)
+		transport->net_ns = args->net_ns;
+	else
+		transport->net_ns = &init_net;
 	switch (addr->sa_family) {
 	case AF_INET:
 		if (((struct sockaddr_in *)addr)->sin_port != htons(0))
 			xprt_set_bound(xprt);
 
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 18+ messages in thread