All of lore.kernel.org
 help / color / mirror / Atom feed
* [net-next PATCH v1] net: netprio_cgroup: rework update socket logic
@ 2012-07-20 20:39 John Fastabend
  2012-07-21  2:00 ` Neil Horman
  0 siblings, 1 reply; 5+ messages in thread
From: John Fastabend @ 2012-07-20 20:39 UTC (permalink / raw)
  To: davem, nhorman; +Cc: netdev, eric.dumazet, gaofeng, lizefan

Instead of updating the sk_cgrp_prioidx struct field on every send
this only updates the field when a task is moved via cgroup
infrastructure.

This allows sockets that may be used by a kernel worker thread
to be managed. For example in the iscsi case today a user can
put iscsid in a netprio cgroup and control traffic will be sent
with the correct sk_cgrp_prioidx value set but as soon as data
is sent the kernel worker thread isssues a send and sk_cgrp_prioidx
is updated with the kernel worker threads value which is the
default case.

It seems more correct to only update the field when the user
explicitly sets it via control group infrastructure. This allows
the users to manage sockets that may be used with other threads.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---

 include/linux/net.h          |    1 +
 include/net/netprio_cgroup.h |    4 ++-
 net/core/netprio_cgroup.c    |   53 ++++++++++++++++++++++++++++++++++++++++++
 net/core/sock.c              |    6 ++---
 net/socket.c                 |    5 ++--
 5 files changed, 61 insertions(+), 8 deletions(-)

diff --git a/include/linux/net.h b/include/linux/net.h
index dc95700..99276c3 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -248,6 +248,7 @@ extern int	     sock_recvmsg(struct socket *sock, struct msghdr *msg,
 				  size_t size, int flags);
 extern int 	     sock_map_fd(struct socket *sock, int flags);
 extern struct socket *sockfd_lookup(int fd, int *err);
+extern struct socket *sock_from_file(struct file *file, int *err);
 #define		     sockfd_put(sock) fput(sock->file)
 extern int	     net_ratelimit(void);
 
diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h
index d58fdec..2719dec 100644
--- a/include/net/netprio_cgroup.h
+++ b/include/net/netprio_cgroup.h
@@ -35,7 +35,7 @@ struct cgroup_netprio_state {
 extern int net_prio_subsys_id;
 #endif
 
-extern void sock_update_netprioidx(struct sock *sk);
+extern void sock_update_netprioidx(struct sock *sk, struct task_struct *task);
 
 #if IS_BUILTIN(CONFIG_NETPRIO_CGROUP)
 
@@ -82,7 +82,7 @@ static inline u32 task_netprioidx(struct task_struct *p)
 #endif /* CONFIG_NETPRIO_CGROUP */
 
 #else
-#define sock_update_netprioidx(sk)
+#define sock_update_netprioidx(sk, task)
 #endif
 
 #endif  /* _NET_CLS_CGROUP_H */
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index b2e9caa..63d15e8 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -25,6 +25,8 @@
 #include <net/sock.h>
 #include <net/netprio_cgroup.h>
 
+#include <linux/fdtable.h>
+
 #define PRIOIDX_SZ 128
 
 static unsigned long prioidx_map[PRIOIDX_SZ];
@@ -272,6 +274,56 @@ out_free_devname:
 	return ret;
 }
 
+void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+{
+	struct task_struct *p;
+	char *tmp = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL);
+
+	if (!tmp) {
+		pr_warn("Unable to attach cgrp due to alloc failure!\n");
+		return;
+	}
+
+	cgroup_taskset_for_each(p, cgrp, tset) {
+		unsigned int fd;
+		struct fdtable *fdt;
+		struct files_struct *files;
+
+		task_lock(p);
+		files = p->files;
+		if (!files) {
+			task_unlock(p);
+			continue;
+		}
+
+		rcu_read_lock();
+		fdt = files_fdtable(files);
+		for (fd = 0; fd < fdt->max_fds; fd++) {
+			char *path;
+			struct file *file;
+			struct socket *sock;
+			unsigned long s;
+			int rv, err = 0;
+
+			file = fcheck_files(files, fd);
+			if (!file)
+				continue;
+
+			path = d_path(&file->f_path, tmp, PAGE_SIZE);
+			rv = sscanf(path, "socket:[%lu]", &s);
+			if (rv <= 0)
+				continue;
+
+			sock = sock_from_file(file, &err);
+			if (!err)
+				sock_update_netprioidx(sock->sk, p);
+		}
+		rcu_read_unlock();
+		task_unlock(p);
+	}
+	kfree(tmp);
+}
+
 static struct cftype ss_files[] = {
 	{
 		.name = "prioidx",
@@ -289,6 +341,7 @@ struct cgroup_subsys net_prio_subsys = {
 	.name		= "net_prio",
 	.create		= cgrp_create,
 	.destroy	= cgrp_destroy,
+	.attach		= net_prio_attach,
 #ifdef CONFIG_NETPRIO_CGROUP
 	.subsys_id	= net_prio_subsys_id,
 #endif
diff --git a/net/core/sock.c b/net/core/sock.c
index 24039ac..2676a88 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1180,12 +1180,12 @@ void sock_update_classid(struct sock *sk)
 }
 EXPORT_SYMBOL(sock_update_classid);
 
-void sock_update_netprioidx(struct sock *sk)
+void sock_update_netprioidx(struct sock *sk, struct task_struct *task)
 {
 	if (in_interrupt())
 		return;
 
-	sk->sk_cgrp_prioidx = task_netprioidx(current);
+	sk->sk_cgrp_prioidx = task_netprioidx(task);
 }
 EXPORT_SYMBOL_GPL(sock_update_netprioidx);
 #endif
@@ -1215,7 +1215,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
 		atomic_set(&sk->sk_wmem_alloc, 1);
 
 		sock_update_classid(sk);
-		sock_update_netprioidx(sk);
+		sock_update_netprioidx(sk, current);
 	}
 
 	return sk;
diff --git a/net/socket.c b/net/socket.c
index 0452dca..dfe5b66 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -398,7 +398,7 @@ int sock_map_fd(struct socket *sock, int flags)
 }
 EXPORT_SYMBOL(sock_map_fd);
 
-static struct socket *sock_from_file(struct file *file, int *err)
+struct socket *sock_from_file(struct file *file, int *err)
 {
 	if (file->f_op == &socket_file_ops)
 		return file->private_data;	/* set in sock_map_fd */
@@ -406,6 +406,7 @@ static struct socket *sock_from_file(struct file *file, int *err)
 	*err = -ENOTSOCK;
 	return NULL;
 }
+EXPORT_SYMBOL(sock_from_file);
 
 /**
  *	sockfd_lookup - Go from a file number to its socket slot
@@ -554,8 +555,6 @@ static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
 
 	sock_update_classid(sock->sk);
 
-	sock_update_netprioidx(sock->sk);
-
 	si->sock = sock;
 	si->scm = NULL;
 	si->msg = msg;

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [net-next PATCH v1] net: netprio_cgroup: rework update socket logic
  2012-07-20 20:39 [net-next PATCH v1] net: netprio_cgroup: rework update socket logic John Fastabend
@ 2012-07-21  2:00 ` Neil Horman
  2012-07-21 17:02   ` John Fastabend
  0 siblings, 1 reply; 5+ messages in thread
From: Neil Horman @ 2012-07-21  2:00 UTC (permalink / raw)
  To: John Fastabend; +Cc: davem, netdev, eric.dumazet, gaofeng, lizefan

On Fri, Jul 20, 2012 at 01:39:25PM -0700, John Fastabend wrote:
> Instead of updating the sk_cgrp_prioidx struct field on every send
> this only updates the field when a task is moved via cgroup
> infrastructure.
> 
> This allows sockets that may be used by a kernel worker thread
> to be managed. For example in the iscsi case today a user can
> put iscsid in a netprio cgroup and control traffic will be sent
> with the correct sk_cgrp_prioidx value set but as soon as data
> is sent the kernel worker thread isssues a send and sk_cgrp_prioidx
> is updated with the kernel worker threads value which is the
> default case.
> 
> It seems more correct to only update the field when the user
> explicitly sets it via control group infrastructure. This allows
> the users to manage sockets that may be used with other threads.
> 
> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
I like the idea, but IIRC last time we tried this I think it caused problems
with processes that shared sockets.  That is to say, if you have a parent and
child process that dup an socket descriptior, and put them in separate cgroups,
you get unpredictable results, as the socket gets assigned a priority based on
the last processed that moved cgroups.

Neil

> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [net-next PATCH v1] net: netprio_cgroup: rework update socket logic
  2012-07-21  2:00 ` Neil Horman
@ 2012-07-21 17:02   ` John Fastabend
  2012-07-21 17:18     ` Neil Horman
  0 siblings, 1 reply; 5+ messages in thread
From: John Fastabend @ 2012-07-21 17:02 UTC (permalink / raw)
  To: Neil Horman; +Cc: davem, netdev, eric.dumazet, gaofeng, lizefan

On 7/20/2012 7:00 PM, Neil Horman wrote:
> On Fri, Jul 20, 2012 at 01:39:25PM -0700, John Fastabend wrote:
>> Instead of updating the sk_cgrp_prioidx struct field on every send
>> this only updates the field when a task is moved via cgroup
>> infrastructure.
>>
>> This allows sockets that may be used by a kernel worker thread
>> to be managed. For example in the iscsi case today a user can
>> put iscsid in a netprio cgroup and control traffic will be sent
>> with the correct sk_cgrp_prioidx value set but as soon as data
>> is sent the kernel worker thread isssues a send and sk_cgrp_prioidx
>> is updated with the kernel worker threads value which is the
>> default case.
>>
>> It seems more correct to only update the field when the user
>> explicitly sets it via control group infrastructure. This allows
>> the users to manage sockets that may be used with other threads.
>>
>> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
> I like the idea, but IIRC last time we tried this I think it caused problems
> with processes that shared sockets.  That is to say, if you have a parent and
> child process that dup an socket descriptior, and put them in separate cgroups,
> you get unpredictable results, as the socket gets assigned a priority based on
> the last processed that moved cgroups.
>
> Neil
>

Shared sockets creates strange behavior as it exists today. If a dup
of the socket fd is created the private data is still shared right. So
in this case the sk_cgrp_prioidx value is going to get updated by both
threads and then it is a race to see what it happens to be set to in
the xmit path.

With this patch at least the behavior is deterministic. Without it
I can create the above scenario but have no way to determine what the
skb priority will actually be set to.

.John

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [net-next PATCH v1] net: netprio_cgroup: rework update socket logic
  2012-07-21 17:02   ` John Fastabend
@ 2012-07-21 17:18     ` Neil Horman
  2012-07-22 19:44       ` David Miller
  0 siblings, 1 reply; 5+ messages in thread
From: Neil Horman @ 2012-07-21 17:18 UTC (permalink / raw)
  To: John Fastabend; +Cc: davem, netdev, eric.dumazet, gaofeng, lizefan

On Sat, Jul 21, 2012 at 10:02:03AM -0700, John Fastabend wrote:
> On 7/20/2012 7:00 PM, Neil Horman wrote:
> >On Fri, Jul 20, 2012 at 01:39:25PM -0700, John Fastabend wrote:
> >>Instead of updating the sk_cgrp_prioidx struct field on every send
> >>this only updates the field when a task is moved via cgroup
> >>infrastructure.
> >>
> >>This allows sockets that may be used by a kernel worker thread
> >>to be managed. For example in the iscsi case today a user can
> >>put iscsid in a netprio cgroup and control traffic will be sent
> >>with the correct sk_cgrp_prioidx value set but as soon as data
> >>is sent the kernel worker thread isssues a send and sk_cgrp_prioidx
> >>is updated with the kernel worker threads value which is the
> >>default case.
> >>
> >>It seems more correct to only update the field when the user
> >>explicitly sets it via control group infrastructure. This allows
> >>the users to manage sockets that may be used with other threads.
> >>
> >>Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
> >I like the idea, but IIRC last time we tried this I think it caused problems
> >with processes that shared sockets.  That is to say, if you have a parent and
> >child process that dup an socket descriptior, and put them in separate cgroups,
> >you get unpredictable results, as the socket gets assigned a priority based on
> >the last processed that moved cgroups.
> >
> >Neil
> >
> 
> Shared sockets creates strange behavior as it exists today. If a dup
> of the socket fd is created the private data is still shared right. So
> in this case the sk_cgrp_prioidx value is going to get updated by both
> threads and then it is a race to see what it happens to be set to in
> the xmit path.
> 
> With this patch at least the behavior is deterministic. Without it
> I can create the above scenario but have no way to determine what the
> skb priority will actually be set to.
> 
> .John
> 
Ok, I can buy that.  Lets give this a try:

Acked-by: Neil Horman <nhorman@tuxdriver.com>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [net-next PATCH v1] net: netprio_cgroup: rework update socket logic
  2012-07-21 17:18     ` Neil Horman
@ 2012-07-22 19:44       ` David Miller
  0 siblings, 0 replies; 5+ messages in thread
From: David Miller @ 2012-07-22 19:44 UTC (permalink / raw)
  To: nhorman; +Cc: john.r.fastabend, netdev, eric.dumazet, gaofeng, lizefan

From: Neil Horman <nhorman@tuxdriver.com>
Date: Sat, 21 Jul 2012 13:18:55 -0400

> On Sat, Jul 21, 2012 at 10:02:03AM -0700, John Fastabend wrote:
>> On 7/20/2012 7:00 PM, Neil Horman wrote:
>> >On Fri, Jul 20, 2012 at 01:39:25PM -0700, John Fastabend wrote:
>> >>Instead of updating the sk_cgrp_prioidx struct field on every send
>> >>this only updates the field when a task is moved via cgroup
>> >>infrastructure.
>> >>
>> >>This allows sockets that may be used by a kernel worker thread
>> >>to be managed. For example in the iscsi case today a user can
>> >>put iscsid in a netprio cgroup and control traffic will be sent
>> >>with the correct sk_cgrp_prioidx value set but as soon as data
>> >>is sent the kernel worker thread isssues a send and sk_cgrp_prioidx
>> >>is updated with the kernel worker threads value which is the
>> >>default case.
>> >>
>> >>It seems more correct to only update the field when the user
>> >>explicitly sets it via control group infrastructure. This allows
>> >>the users to manage sockets that may be used with other threads.
>> >>
>> >>Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
>> >I like the idea, but IIRC last time we tried this I think it caused problems
>> >with processes that shared sockets.  That is to say, if you have a parent and
>> >child process that dup an socket descriptior, and put them in separate cgroups,
>> >you get unpredictable results, as the socket gets assigned a priority based on
>> >the last processed that moved cgroups.
>> >
>> >Neil
>> >
>> 
>> Shared sockets creates strange behavior as it exists today. If a dup
>> of the socket fd is created the private data is still shared right. So
>> in this case the sk_cgrp_prioidx value is going to get updated by both
>> threads and then it is a race to see what it happens to be set to in
>> the xmit path.
>> 
>> With this patch at least the behavior is deterministic. Without it
>> I can create the above scenario but have no way to determine what the
>> skb priority will actually be set to.
>> 
>> .John
>> 
> Ok, I can buy that.  Lets give this a try:
> 
> Acked-by: Neil Horman <nhorman@tuxdriver.com>
> 

Applied.

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2012-07-22 19:44 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-07-20 20:39 [net-next PATCH v1] net: netprio_cgroup: rework update socket logic John Fastabend
2012-07-21  2:00 ` Neil Horman
2012-07-21 17:02   ` John Fastabend
2012-07-21 17:18     ` Neil Horman
2012-07-22 19:44       ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.