All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2 v3] net: add uevent socket member
@ 2018-03-17 11:08 Christian Brauner
  2018-03-17 11:08 ` [PATCH 2/2 v3] netns: send uevent messages Christian Brauner
  2018-03-19 11:53 ` [PATCH 1/2 v3] net: add uevent socket member Kirill Tkhai
  0 siblings, 2 replies; 4+ messages in thread
From: Christian Brauner @ 2018-03-17 11:08 UTC (permalink / raw)
  To: ebiederm, gregkh, netdev, linux-kernel
  Cc: serge, avagin, ktkhai, Christian Brauner

This commit adds struct uevent_sock to struct net. Since struct uevent_sock
records the position of the uevent socket in the uevent socket list we can
trivially remove it from the uevent socket list during cleanup. This speeds
up the old removal codepath.
Note, list_del() will hit __list_del_entry_valid() in its call chain which
will validate that the element is a member of the list. If it isn't it will
take care that the list is not modified.

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
Changelog v2->v3:
* patch added
  This patch was split out of the follow up patch
  Subject: [PATCH 2/2 v3] netns: send uevent messages

Changelog v1->v2:
* patch not present

Changelog v0->v1:
* patch not present
---
 include/net/net_namespace.h |  4 +++-
 lib/kobject_uevent.c        | 19 +++++++++----------
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index f306b2aa15a4..abd7d91bffac 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -40,7 +40,7 @@ struct net_device;
 struct sock;
 struct ctl_table_header;
 struct net_generic;
-struct sock;
+struct uevent_sock;
 struct netns_ipvs;
 
 
@@ -79,6 +79,8 @@ struct net {
 	struct sock 		*rtnl;			/* rtnetlink socket */
 	struct sock		*genl_sock;
 
+	struct uevent_sock	*uevent_sock;		/* uevent socket */
+
 	struct list_head 	dev_base_head;
 	struct hlist_head 	*dev_name_head;
 	struct hlist_head	*dev_index_head;
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 9fe6ec8fda28..cbdc60542cab 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -32,11 +32,13 @@ u64 uevent_seqnum;
 #ifdef CONFIG_UEVENT_HELPER
 char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH;
 #endif
-#ifdef CONFIG_NET
+
 struct uevent_sock {
 	struct list_head list;
 	struct sock *sk;
 };
+
+#ifdef CONFIG_NET
 static LIST_HEAD(uevent_sock_list);
 #endif
 
@@ -621,6 +623,9 @@ static int uevent_net_init(struct net *net)
 		kfree(ue_sk);
 		return -ENODEV;
 	}
+
+	net->uevent_sock = ue_sk;
+
 	mutex_lock(&uevent_sock_mutex);
 	list_add_tail(&ue_sk->list, &uevent_sock_list);
 	mutex_unlock(&uevent_sock_mutex);
@@ -629,22 +634,16 @@ static int uevent_net_init(struct net *net)
 
 static void uevent_net_exit(struct net *net)
 {
-	struct uevent_sock *ue_sk;
+	struct uevent_sock *ue_sk = net->uevent_sock;
 
 	mutex_lock(&uevent_sock_mutex);
-	list_for_each_entry(ue_sk, &uevent_sock_list, list) {
-		if (sock_net(ue_sk->sk) == net)
-			goto found;
-	}
-	mutex_unlock(&uevent_sock_mutex);
-	return;
-
-found:
 	list_del(&ue_sk->list);
 	mutex_unlock(&uevent_sock_mutex);
 
 	netlink_kernel_release(ue_sk->sk);
 	kfree(ue_sk);
+
+	return;
 }
 
 static struct pernet_operations uevent_net_ops = {
-- 
2.15.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/2 v3] netns: send uevent messages
  2018-03-17 11:08 [PATCH 1/2 v3] net: add uevent socket member Christian Brauner
@ 2018-03-17 11:08 ` Christian Brauner
  2018-03-19 11:53 ` [PATCH 1/2 v3] net: add uevent socket member Kirill Tkhai
  1 sibling, 0 replies; 4+ messages in thread
From: Christian Brauner @ 2018-03-17 11:08 UTC (permalink / raw)
  To: ebiederm, gregkh, netdev, linux-kernel
  Cc: serge, avagin, ktkhai, Christian Brauner

This patch adds a receive method to NETLINK_KOBJECT_UEVENT netlink sockets
to allow sending uevent messages into the network namespace the socket
belongs to.

Currently non-initial network namespaces are already isolated and don't
receive uevents. There are a number of cases where it is beneficial for a
sufficiently privileged userspace process to send a uevent into a network
namespace.

One such use case would be debugging and fuzzing of a piece of software
which listens and reacts to uevents. By running a copy of that software
inside a network namespace, specific uevents could then be presented to it.
More concretely, this would allow for easy testing of udevd/ueventd.

This will also allow some piece of software to run components inside a
separate network namespace and then effectively filter what that software
can receive. Some examples of software that do directly listen to uevents
and that we have in the past attempted to run inside a network namespace
are rbd (CEPH client) or the X server.

Implementation:
The implementation has been kept as simple as possible from the kernel's
perspective. Specifically, a simple input method uevent_net_rcv() is added
to NETLINK_KOBJECT_UEVENT sockets which completely reuses existing
af_netlink infrastructure and does neither add an additional netlink family
nor requires any user-visible changes.

For example, by using netlink_rcv_skb() we can make use of existing netlink
infrastructure to report back informative error messages to userspace.

Furthermore, this implementation does not introduce any overhead for
existing uevent generating codepaths. The struct netns got a new uevent
socket member that records the uevent socket associated with that network
namespace including its position in the uevent socket list. Since we record
the uevent socket for each network namespace in struct net we don't have to
walk the whole uevent socket list. Instead we can directly retrieve the
relevant uevent socket and send the message. At exit time we can now also
trivially remove the uevent socket from the uevent socket list. This keeps
the codepath very performant without introducing needless overhead and even
makes older codepaths faster.

Uevent sequence numbers are kept global. When a uevent message is sent to
another network namespace the implementation will simply increment the
global uevent sequence number and append it to the received uevent. This
has the advantage that the kernel will never need to parse the received
uevent message to replace any existing uevent sequence numbers. Instead it
is up to the userspace process to remove any existing uevent sequence
numbers in case the uevent message to be sent contains any.

Security:
In order for a caller to send uevent messages to a target network namespace
the caller must have CAP_SYS_ADMIN in the owning user namespace of the
target network namespace. Additionally, any received uevent message is
verified to not exceed size UEVENT_BUFFER_SIZE. This includes the space
needed to append the uevent sequence number.

Testing:
This patch has been tested and verified to work with the following udev
implementations:
1. CentOS 6 with udevd version 147
2. Debian Sid with systemd-udevd version 237
3. Android 7.1.1 with ueventd

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
Changelog v2->v3:
* Use reverse christmas tree ordering for local variables.

Changelog v1->v2:
* Add the whole struct uevent_sock to struct net not just the socket
  member. Since struct uevent_sock records the position of the uevent
  socket in the uevent socket list we can trivially remove it from the
  uevent socket list during cleanup. This speeds up the old removal
  codepath. list_del() will hitl __list_del_entry_valid() in its call chain
  which will validate that the element is a member of the list. If it isn't
  it will take care that the list is not modified.

Changelog v0->v1:
* Hold mutex_lock() until uevent is sent to preserve uevent message
  ordering. See udev and commit for reference:

  commit 7b60a18da393ed70db043a777fd9e6d5363077c4
  Author: Andrew Vagin <avagin@openvz.org>
  Date:   Wed Mar 7 14:49:56 2012 +0400

      uevent: send events in correct order according to seqnum (v3)

      The queue handling in the udev daemon assumes that the events are
      ordered.
---
 lib/kobject_uevent.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 78 insertions(+), 1 deletion(-)

diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index cbdc60542cab..7631513f856e 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -25,6 +25,7 @@
 #include <linux/uuid.h>
 #include <linux/ctype.h>
 #include <net/sock.h>
+#include <net/netlink.h>
 #include <net/net_namespace.h>
 
 
@@ -604,12 +605,88 @@ int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...)
 EXPORT_SYMBOL_GPL(add_uevent_var);
 
 #if defined(CONFIG_NET)
+static int uevent_net_broadcast(struct sock *usk, struct sk_buff *skb,
+				struct netlink_ext_ack *extack)
+{
+	/* u64 to chars: 2^64 - 1 = 21 chars */
+	char buf[sizeof("SEQNUM=") + 21];
+	struct sk_buff *skbc;
+	int ret;
+
+	/* bump and prepare sequence number */
+	ret = snprintf(buf, sizeof(buf), "SEQNUM=%llu", ++uevent_seqnum);
+	if (ret < 0 || (size_t)ret >= sizeof(buf))
+		return -ENOMEM;
+	ret++;
+
+	/* verify message does not overflow */
+	if ((skb->len + ret) > UEVENT_BUFFER_SIZE) {
+		NL_SET_ERR_MSG(extack, "uevent message too big");
+		return -EINVAL;
+	}
+
+	/* copy skb and extend to accommodate sequence number */
+	skbc = skb_copy_expand(skb, 0, ret, GFP_KERNEL);
+	if (!skbc)
+		return -ENOMEM;
+
+	/* append sequence number */
+	skb_put_data(skbc, buf, ret);
+
+	/* remove msg header */
+	skb_pull(skbc, NLMSG_HDRLEN);
+
+	/* set portid 0 to inform userspace message comes from kernel */
+	NETLINK_CB(skbc).portid = 0;
+	NETLINK_CB(skbc).dst_group = 1;
+
+	ret = netlink_broadcast(usk, skbc, 0, 1, GFP_KERNEL);
+	/* ENOBUFS should be handled in userspace */
+	if (ret == -ENOBUFS || ret == -ESRCH)
+		ret = 0;
+
+	return ret;
+}
+
+static int uevent_net_rcv_skb(struct sk_buff *skb, struct nlmsghdr *nlh,
+			      struct netlink_ext_ack *extack)
+{
+	struct net *net;
+	int ret;
+
+	if (!nlmsg_data(nlh))
+		return -EINVAL;
+
+	/*
+	 * Verify that we are allowed to send messages to the target
+	 * network namespace. The caller must have CAP_SYS_ADMIN in the
+	 * owning user namespace of the target network namespace.
+	 */
+	net = sock_net(NETLINK_CB(skb).sk);
+	if (!netlink_ns_capable(skb, net->user_ns, CAP_SYS_ADMIN)) {
+		NL_SET_ERR_MSG(extack, "missing CAP_SYS_ADMIN capability");
+		return -EPERM;
+	}
+
+	mutex_lock(&uevent_sock_mutex);
+	ret = uevent_net_broadcast(net->uevent_sock->sk, skb, extack);
+	mutex_unlock(&uevent_sock_mutex);
+
+	return ret;
+}
+
+static void uevent_net_rcv(struct sk_buff *skb)
+{
+	netlink_rcv_skb(skb, &uevent_net_rcv_skb);
+}
+
 static int uevent_net_init(struct net *net)
 {
 	struct uevent_sock *ue_sk;
 	struct netlink_kernel_cfg cfg = {
 		.groups	= 1,
-		.flags	= NL_CFG_F_NONROOT_RECV,
+		.input = uevent_net_rcv,
+		.flags	= NL_CFG_F_NONROOT_RECV
 	};
 
 	ue_sk = kzalloc(sizeof(*ue_sk), GFP_KERNEL);
-- 
2.15.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/2 v3] net: add uevent socket member
  2018-03-17 11:08 [PATCH 1/2 v3] net: add uevent socket member Christian Brauner
  2018-03-17 11:08 ` [PATCH 2/2 v3] netns: send uevent messages Christian Brauner
@ 2018-03-19 11:53 ` Kirill Tkhai
  2018-03-19 11:59   ` Christian Brauner
  1 sibling, 1 reply; 4+ messages in thread
From: Kirill Tkhai @ 2018-03-19 11:53 UTC (permalink / raw)
  To: Christian Brauner, ebiederm, gregkh, netdev, linux-kernel; +Cc: serge, avagin

Thanks for doing this. One small comment below.

On 17.03.2018 14:08, Christian Brauner wrote:
> This commit adds struct uevent_sock to struct net. Since struct uevent_sock
> records the position of the uevent socket in the uevent socket list we can
> trivially remove it from the uevent socket list during cleanup. This speeds
> up the old removal codepath.
> Note, list_del() will hit __list_del_entry_valid() in its call chain which
> will validate that the element is a member of the list. If it isn't it will
> take care that the list is not modified.
> 
> Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
> ---
> Changelog v2->v3:
> * patch added
>   This patch was split out of the follow up patch
>   Subject: [PATCH 2/2 v3] netns: send uevent messages
> 
> Changelog v1->v2:
> * patch not present
> 
> Changelog v0->v1:
> * patch not present
> ---
>  include/net/net_namespace.h |  4 +++-
>  lib/kobject_uevent.c        | 19 +++++++++----------
>  2 files changed, 12 insertions(+), 11 deletions(-)
> 
> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
> index f306b2aa15a4..abd7d91bffac 100644
> --- a/include/net/net_namespace.h
> +++ b/include/net/net_namespace.h
> @@ -40,7 +40,7 @@ struct net_device;
>  struct sock;
>  struct ctl_table_header;
>  struct net_generic;
> -struct sock;
> +struct uevent_sock;
>  struct netns_ipvs;
>  
>  
> @@ -79,6 +79,8 @@ struct net {
>  	struct sock 		*rtnl;			/* rtnetlink socket */
>  	struct sock		*genl_sock;
>  
> +	struct uevent_sock	*uevent_sock;		/* uevent socket */
> +
>  	struct list_head 	dev_base_head;
>  	struct hlist_head 	*dev_name_head;
>  	struct hlist_head	*dev_index_head;
> diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
> index 9fe6ec8fda28..cbdc60542cab 100644
> --- a/lib/kobject_uevent.c
> +++ b/lib/kobject_uevent.c
> @@ -32,11 +32,13 @@ u64 uevent_seqnum;
>  #ifdef CONFIG_UEVENT_HELPER
>  char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH;
>  #endif
> -#ifdef CONFIG_NET
> +
>  struct uevent_sock {
>  	struct list_head list;
>  	struct sock *sk;
>  };
> +
> +#ifdef CONFIG_NET
>  static LIST_HEAD(uevent_sock_list);
>  #endif
>  
> @@ -621,6 +623,9 @@ static int uevent_net_init(struct net *net)
>  		kfree(ue_sk);
>  		return -ENODEV;
>  	}
> +
> +	net->uevent_sock = ue_sk;
> +
>  	mutex_lock(&uevent_sock_mutex);
>  	list_add_tail(&ue_sk->list, &uevent_sock_list);
>  	mutex_unlock(&uevent_sock_mutex);
> @@ -629,22 +634,16 @@ static int uevent_net_init(struct net *net)
>  
>  static void uevent_net_exit(struct net *net)
>  {
> -	struct uevent_sock *ue_sk;
> +	struct uevent_sock *ue_sk = net->uevent_sock;
>  
>  	mutex_lock(&uevent_sock_mutex);
> -	list_for_each_entry(ue_sk, &uevent_sock_list, list) {
> -		if (sock_net(ue_sk->sk) == net)
> -			goto found;
> -	}
> -	mutex_unlock(&uevent_sock_mutex);
> -	return;
> -
> -found:
>  	list_del(&ue_sk->list);
>  	mutex_unlock(&uevent_sock_mutex);
>  
>  	netlink_kernel_release(ue_sk->sk);
>  	kfree(ue_sk);
> +
> +	return;

There is end of function. Doesn't return is excess here?

>  }
>  
>  static struct pernet_operations uevent_net_ops = {
> 

Kirill

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/2 v3] net: add uevent socket member
  2018-03-19 11:53 ` [PATCH 1/2 v3] net: add uevent socket member Kirill Tkhai
@ 2018-03-19 11:59   ` Christian Brauner
  0 siblings, 0 replies; 4+ messages in thread
From: Christian Brauner @ 2018-03-19 11:59 UTC (permalink / raw)
  To: Kirill Tkhai
  Cc: Christian Brauner, ebiederm, gregkh, netdev, linux-kernel, serge, avagin

On Mon, Mar 19, 2018 at 02:53:09PM +0300, Kirill Tkhai wrote:
> Thanks for doing this. One small comment below.
> 
> On 17.03.2018 14:08, Christian Brauner wrote:
> > This commit adds struct uevent_sock to struct net. Since struct uevent_sock
> > records the position of the uevent socket in the uevent socket list we can
> > trivially remove it from the uevent socket list during cleanup. This speeds
> > up the old removal codepath.
> > Note, list_del() will hit __list_del_entry_valid() in its call chain which
> > will validate that the element is a member of the list. If it isn't it will
> > take care that the list is not modified.
> > 
> > Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
> > ---
> > Changelog v2->v3:
> > * patch added
> >   This patch was split out of the follow up patch
> >   Subject: [PATCH 2/2 v3] netns: send uevent messages
> > 
> > Changelog v1->v2:
> > * patch not present
> > 
> > Changelog v0->v1:
> > * patch not present
> > ---
> >  include/net/net_namespace.h |  4 +++-
> >  lib/kobject_uevent.c        | 19 +++++++++----------
> >  2 files changed, 12 insertions(+), 11 deletions(-)
> > 
> > diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
> > index f306b2aa15a4..abd7d91bffac 100644
> > --- a/include/net/net_namespace.h
> > +++ b/include/net/net_namespace.h
> > @@ -40,7 +40,7 @@ struct net_device;
> >  struct sock;
> >  struct ctl_table_header;
> >  struct net_generic;
> > -struct sock;
> > +struct uevent_sock;
> >  struct netns_ipvs;
> >  
> >  
> > @@ -79,6 +79,8 @@ struct net {
> >  	struct sock 		*rtnl;			/* rtnetlink socket */
> >  	struct sock		*genl_sock;
> >  
> > +	struct uevent_sock	*uevent_sock;		/* uevent socket */
> > +
> >  	struct list_head 	dev_base_head;
> >  	struct hlist_head 	*dev_name_head;
> >  	struct hlist_head	*dev_index_head;
> > diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
> > index 9fe6ec8fda28..cbdc60542cab 100644
> > --- a/lib/kobject_uevent.c
> > +++ b/lib/kobject_uevent.c
> > @@ -32,11 +32,13 @@ u64 uevent_seqnum;
> >  #ifdef CONFIG_UEVENT_HELPER
> >  char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH;
> >  #endif
> > -#ifdef CONFIG_NET
> > +
> >  struct uevent_sock {
> >  	struct list_head list;
> >  	struct sock *sk;
> >  };
> > +
> > +#ifdef CONFIG_NET
> >  static LIST_HEAD(uevent_sock_list);
> >  #endif
> >  
> > @@ -621,6 +623,9 @@ static int uevent_net_init(struct net *net)
> >  		kfree(ue_sk);
> >  		return -ENODEV;
> >  	}
> > +
> > +	net->uevent_sock = ue_sk;
> > +
> >  	mutex_lock(&uevent_sock_mutex);
> >  	list_add_tail(&ue_sk->list, &uevent_sock_list);
> >  	mutex_unlock(&uevent_sock_mutex);
> > @@ -629,22 +634,16 @@ static int uevent_net_init(struct net *net)
> >  
> >  static void uevent_net_exit(struct net *net)
> >  {
> > -	struct uevent_sock *ue_sk;
> > +	struct uevent_sock *ue_sk = net->uevent_sock;
> >  
> >  	mutex_lock(&uevent_sock_mutex);
> > -	list_for_each_entry(ue_sk, &uevent_sock_list, list) {
> > -		if (sock_net(ue_sk->sk) == net)
> > -			goto found;
> > -	}
> > -	mutex_unlock(&uevent_sock_mutex);
> > -	return;
> > -
> > -found:
> >  	list_del(&ue_sk->list);
> >  	mutex_unlock(&uevent_sock_mutex);
> >  
> >  	netlink_kernel_release(ue_sk->sk);
> >  	kfree(ue_sk);
> > +
> > +	return;
> 
> There is end of function. Doesn't return is excess here?

Yeah, I can remove it and resend it now.

Christian

> 
> >  }
> >  
> >  static struct pernet_operations uevent_net_ops = {
> > 
> 
> Kirill

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2018-03-19 11:59 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-17 11:08 [PATCH 1/2 v3] net: add uevent socket member Christian Brauner
2018-03-17 11:08 ` [PATCH 2/2 v3] netns: send uevent messages Christian Brauner
2018-03-19 11:53 ` [PATCH 1/2 v3] net: add uevent socket member Kirill Tkhai
2018-03-19 11:59   ` Christian Brauner

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.