All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] igmp: make /proc/net/{igmp,mcfilter} per netns
@ 2008-09-08  1:28 Alexey Dobriyan
  2008-09-08  9:35 ` Daniel Lezcano
       [not found] ` <20080908012855.GC575-2ev+ksY9ol182hYKe6nXyg@public.gmane.org>
  0 siblings, 2 replies; 11+ messages in thread
From: Alexey Dobriyan @ 2008-09-08  1:28 UTC (permalink / raw)
  To: davem; +Cc: netdev, containers

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---

 net/ipv4/igmp.c |   49 ++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 40 insertions(+), 9 deletions(-)

--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2272,6 +2272,7 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p
 
 #if defined(CONFIG_PROC_FS)
 struct igmp_mc_iter_state {
+	struct seq_net_private p;
 	struct net_device *dev;
 	struct in_device *in_dev;
 };
@@ -2280,11 +2281,12 @@ struct igmp_mc_iter_state {
 
 static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq)
 {
+	struct net *net = seq_file_net(seq);
 	struct ip_mc_list *im = NULL;
 	struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
 
 	state->in_dev = NULL;
-	for_each_netdev(&init_net, state->dev) {
+	for_each_netdev(net, state->dev) {
 		struct in_device *in_dev;
 		in_dev = in_dev_get(state->dev);
 		if (!in_dev)
@@ -2405,7 +2407,7 @@ static const struct seq_operations igmp_mc_seq_ops = {
 
 static int igmp_mc_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &igmp_mc_seq_ops,
+	return seq_open_net(inode, file, &igmp_mc_seq_ops,
 			sizeof(struct igmp_mc_iter_state));
 }
 
@@ -2414,10 +2416,11 @@ static const struct file_operations igmp_mc_seq_fops = {
 	.open		=	igmp_mc_seq_open,
 	.read		=	seq_read,
 	.llseek		=	seq_lseek,
-	.release	=	seq_release_private,
+	.release	=	seq_release_net,
 };
 
 struct igmp_mcf_iter_state {
+	struct seq_net_private p;
 	struct net_device *dev;
 	struct in_device *idev;
 	struct ip_mc_list *im;
@@ -2427,13 +2430,14 @@ struct igmp_mcf_iter_state {
 
 static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq)
 {
+	struct net *net = seq_file_net(seq);
 	struct ip_sf_list *psf = NULL;
 	struct ip_mc_list *im = NULL;
 	struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq);
 
 	state->idev = NULL;
 	state->im = NULL;
-	for_each_netdev(&init_net, state->dev) {
+	for_each_netdev(net, state->dev) {
 		struct in_device *idev;
 		idev = in_dev_get(state->dev);
 		if (unlikely(idev == NULL))
@@ -2564,7 +2568,7 @@ static const struct seq_operations igmp_mcf_seq_ops = {
 
 static int igmp_mcf_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &igmp_mcf_seq_ops,
+	return seq_open_net(inode, file, &igmp_mcf_seq_ops,
 			sizeof(struct igmp_mcf_iter_state));
 }
 
@@ -2573,14 +2577,41 @@ static const struct file_operations igmp_mcf_seq_fops = {
 	.open		=	igmp_mcf_seq_open,
 	.read		=	seq_read,
 	.llseek		=	seq_lseek,
-	.release	=	seq_release_private,
+	.release	=	seq_release_net,
 };
 
-int __init igmp_mc_proc_init(void)
+static int igmp_net_init(struct net *net)
 {
-	proc_net_fops_create(&init_net, "igmp", S_IRUGO, &igmp_mc_seq_fops);
-	proc_net_fops_create(&init_net, "mcfilter", S_IRUGO, &igmp_mcf_seq_fops);
+	struct proc_dir_entry *pde;
+
+	pde = proc_net_fops_create(net, "igmp", S_IRUGO, &igmp_mc_seq_fops);
+	if (!pde)
+		goto out_igmp;
+	pde = proc_net_fops_create(net, "mcfilter", S_IRUGO, &igmp_mcf_seq_fops);
+	if (!pde)
+		goto out_mcfilter;
 	return 0;
+
+out_mcfilter:
+	proc_net_remove(net, "igmp");
+out_igmp:
+	return -ENOMEM;
+}
+
+static void igmp_net_exit(struct net *net)
+{
+	proc_net_remove(net, "mcfilter");
+	proc_net_remove(net, "igmp");
+}
+
+static struct pernet_operations igmp_net_ops = {
+	.init = igmp_net_init,
+	.exit = igmp_net_exit,
+};
+
+int __init igmp_mc_proc_init(void)
+{
+	return register_pernet_subsys(&igmp_net_ops);
 }
 #endif
 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] igmp: make /proc/net/{igmp,mcfilter} per netns
       [not found] ` <20080908012855.GC575-2ev+ksY9ol182hYKe6nXyg@public.gmane.org>
@ 2008-09-08  9:35   ` Daniel Lezcano
  0 siblings, 0 replies; 11+ messages in thread
From: Daniel Lezcano @ 2008-09-08  9:35 UTC (permalink / raw)
  To: Alexey Dobriyan
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	davem-fT/PcQaiUtIeIZ0/mPfg9Q

Alexey Dobriyan wrote:
> Signed-off-by: Alexey Dobriyan <adobriyan-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>

I have exactly the same patch to be submitted :)

Acked-by: Daniel Lezcano <dlezcano-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>

By the way, you are posting patches related to netns. As some of us are 
working in this area too, do you mind to sync with us for the network 
items you want to handle to avoid duplicate work ?

We are currently working on mroute[6].

Thanks.
   -- Daniel

> ---
> 
>  net/ipv4/igmp.c |   49 ++++++++++++++++++++++++++++++++++++++++---------
>  1 file changed, 40 insertions(+), 9 deletions(-)
> 
> --- a/net/ipv4/igmp.c
> +++ b/net/ipv4/igmp.c
> @@ -2272,6 +2272,7 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p
>  
>  #if defined(CONFIG_PROC_FS)
>  struct igmp_mc_iter_state {
> +	struct seq_net_private p;
>  	struct net_device *dev;
>  	struct in_device *in_dev;
>  };
> @@ -2280,11 +2281,12 @@ struct igmp_mc_iter_state {
>  
>  static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq)
>  {
> +	struct net *net = seq_file_net(seq);
>  	struct ip_mc_list *im = NULL;
>  	struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
>  
>  	state->in_dev = NULL;
> -	for_each_netdev(&init_net, state->dev) {
> +	for_each_netdev(net, state->dev) {
>  		struct in_device *in_dev;
>  		in_dev = in_dev_get(state->dev);
>  		if (!in_dev)
> @@ -2405,7 +2407,7 @@ static const struct seq_operations igmp_mc_seq_ops = {
>  
>  static int igmp_mc_seq_open(struct inode *inode, struct file *file)
>  {
> -	return seq_open_private(file, &igmp_mc_seq_ops,
> +	return seq_open_net(inode, file, &igmp_mc_seq_ops,
>  			sizeof(struct igmp_mc_iter_state));
>  }
>  
> @@ -2414,10 +2416,11 @@ static const struct file_operations igmp_mc_seq_fops = {
>  	.open		=	igmp_mc_seq_open,
>  	.read		=	seq_read,
>  	.llseek		=	seq_lseek,
> -	.release	=	seq_release_private,
> +	.release	=	seq_release_net,
>  };
>  
>  struct igmp_mcf_iter_state {
> +	struct seq_net_private p;
>  	struct net_device *dev;
>  	struct in_device *idev;
>  	struct ip_mc_list *im;
> @@ -2427,13 +2430,14 @@ struct igmp_mcf_iter_state {
>  
>  static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq)
>  {
> +	struct net *net = seq_file_net(seq);
>  	struct ip_sf_list *psf = NULL;
>  	struct ip_mc_list *im = NULL;
>  	struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq);
>  
>  	state->idev = NULL;
>  	state->im = NULL;
> -	for_each_netdev(&init_net, state->dev) {
> +	for_each_netdev(net, state->dev) {
>  		struct in_device *idev;
>  		idev = in_dev_get(state->dev);
>  		if (unlikely(idev == NULL))
> @@ -2564,7 +2568,7 @@ static const struct seq_operations igmp_mcf_seq_ops = {
>  
>  static int igmp_mcf_seq_open(struct inode *inode, struct file *file)
>  {
> -	return seq_open_private(file, &igmp_mcf_seq_ops,
> +	return seq_open_net(inode, file, &igmp_mcf_seq_ops,
>  			sizeof(struct igmp_mcf_iter_state));
>  }
>  
> @@ -2573,14 +2577,41 @@ static const struct file_operations igmp_mcf_seq_fops = {
>  	.open		=	igmp_mcf_seq_open,
>  	.read		=	seq_read,
>  	.llseek		=	seq_lseek,
> -	.release	=	seq_release_private,
> +	.release	=	seq_release_net,
>  };
>  
> -int __init igmp_mc_proc_init(void)
> +static int igmp_net_init(struct net *net)
>  {
> -	proc_net_fops_create(&init_net, "igmp", S_IRUGO, &igmp_mc_seq_fops);
> -	proc_net_fops_create(&init_net, "mcfilter", S_IRUGO, &igmp_mcf_seq_fops);
> +	struct proc_dir_entry *pde;
> +
> +	pde = proc_net_fops_create(net, "igmp", S_IRUGO, &igmp_mc_seq_fops);
> +	if (!pde)
> +		goto out_igmp;
> +	pde = proc_net_fops_create(net, "mcfilter", S_IRUGO, &igmp_mcf_seq_fops);
> +	if (!pde)
> +		goto out_mcfilter;
>  	return 0;
> +
> +out_mcfilter:
> +	proc_net_remove(net, "igmp");
> +out_igmp:
> +	return -ENOMEM;
> +}
> +
> +static void igmp_net_exit(struct net *net)
> +{
> +	proc_net_remove(net, "mcfilter");
> +	proc_net_remove(net, "igmp");
> +}
> +
> +static struct pernet_operations igmp_net_ops = {
> +	.init = igmp_net_init,
> +	.exit = igmp_net_exit,
> +};
> +
> +int __init igmp_mc_proc_init(void)
> +{
> +	return register_pernet_subsys(&igmp_net_ops);
>  }
>  #endif

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] igmp: make /proc/net/{igmp,mcfilter} per netns
  2008-09-08  1:28 [PATCH] igmp: make /proc/net/{igmp,mcfilter} per netns Alexey Dobriyan
@ 2008-09-08  9:35 ` Daniel Lezcano
  2008-09-08 18:17   ` David Stevens
       [not found]   ` <48C4F1C6.6080605-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
       [not found] ` <20080908012855.GC575-2ev+ksY9ol182hYKe6nXyg@public.gmane.org>
  1 sibling, 2 replies; 11+ messages in thread
From: Daniel Lezcano @ 2008-09-08  9:35 UTC (permalink / raw)
  To: Alexey Dobriyan; +Cc: davem, netdev, containers

Alexey Dobriyan wrote:
> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>

I have exactly the same patch to be submitted :)

Acked-by: Daniel Lezcano <dlezcano@fr.ibm.com>

By the way, you are posting patches related to netns. As some of us are 
working in this area too, do you mind to sync with us for the network 
items you want to handle to avoid duplicate work ?

We are currently working on mroute[6].

Thanks.
   -- Daniel

> ---
> 
>  net/ipv4/igmp.c |   49 ++++++++++++++++++++++++++++++++++++++++---------
>  1 file changed, 40 insertions(+), 9 deletions(-)
> 
> --- a/net/ipv4/igmp.c
> +++ b/net/ipv4/igmp.c
> @@ -2272,6 +2272,7 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p
>  
>  #if defined(CONFIG_PROC_FS)
>  struct igmp_mc_iter_state {
> +	struct seq_net_private p;
>  	struct net_device *dev;
>  	struct in_device *in_dev;
>  };
> @@ -2280,11 +2281,12 @@ struct igmp_mc_iter_state {
>  
>  static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq)
>  {
> +	struct net *net = seq_file_net(seq);
>  	struct ip_mc_list *im = NULL;
>  	struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
>  
>  	state->in_dev = NULL;
> -	for_each_netdev(&init_net, state->dev) {
> +	for_each_netdev(net, state->dev) {
>  		struct in_device *in_dev;
>  		in_dev = in_dev_get(state->dev);
>  		if (!in_dev)
> @@ -2405,7 +2407,7 @@ static const struct seq_operations igmp_mc_seq_ops = {
>  
>  static int igmp_mc_seq_open(struct inode *inode, struct file *file)
>  {
> -	return seq_open_private(file, &igmp_mc_seq_ops,
> +	return seq_open_net(inode, file, &igmp_mc_seq_ops,
>  			sizeof(struct igmp_mc_iter_state));
>  }
>  
> @@ -2414,10 +2416,11 @@ static const struct file_operations igmp_mc_seq_fops = {
>  	.open		=	igmp_mc_seq_open,
>  	.read		=	seq_read,
>  	.llseek		=	seq_lseek,
> -	.release	=	seq_release_private,
> +	.release	=	seq_release_net,
>  };
>  
>  struct igmp_mcf_iter_state {
> +	struct seq_net_private p;
>  	struct net_device *dev;
>  	struct in_device *idev;
>  	struct ip_mc_list *im;
> @@ -2427,13 +2430,14 @@ struct igmp_mcf_iter_state {
>  
>  static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq)
>  {
> +	struct net *net = seq_file_net(seq);
>  	struct ip_sf_list *psf = NULL;
>  	struct ip_mc_list *im = NULL;
>  	struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq);
>  
>  	state->idev = NULL;
>  	state->im = NULL;
> -	for_each_netdev(&init_net, state->dev) {
> +	for_each_netdev(net, state->dev) {
>  		struct in_device *idev;
>  		idev = in_dev_get(state->dev);
>  		if (unlikely(idev == NULL))
> @@ -2564,7 +2568,7 @@ static const struct seq_operations igmp_mcf_seq_ops = {
>  
>  static int igmp_mcf_seq_open(struct inode *inode, struct file *file)
>  {
> -	return seq_open_private(file, &igmp_mcf_seq_ops,
> +	return seq_open_net(inode, file, &igmp_mcf_seq_ops,
>  			sizeof(struct igmp_mcf_iter_state));
>  }
>  
> @@ -2573,14 +2577,41 @@ static const struct file_operations igmp_mcf_seq_fops = {
>  	.open		=	igmp_mcf_seq_open,
>  	.read		=	seq_read,
>  	.llseek		=	seq_lseek,
> -	.release	=	seq_release_private,
> +	.release	=	seq_release_net,
>  };
>  
> -int __init igmp_mc_proc_init(void)
> +static int igmp_net_init(struct net *net)
>  {
> -	proc_net_fops_create(&init_net, "igmp", S_IRUGO, &igmp_mc_seq_fops);
> -	proc_net_fops_create(&init_net, "mcfilter", S_IRUGO, &igmp_mcf_seq_fops);
> +	struct proc_dir_entry *pde;
> +
> +	pde = proc_net_fops_create(net, "igmp", S_IRUGO, &igmp_mc_seq_fops);
> +	if (!pde)
> +		goto out_igmp;
> +	pde = proc_net_fops_create(net, "mcfilter", S_IRUGO, &igmp_mcf_seq_fops);
> +	if (!pde)
> +		goto out_mcfilter;
>  	return 0;
> +
> +out_mcfilter:
> +	proc_net_remove(net, "igmp");
> +out_igmp:
> +	return -ENOMEM;
> +}
> +
> +static void igmp_net_exit(struct net *net)
> +{
> +	proc_net_remove(net, "mcfilter");
> +	proc_net_remove(net, "igmp");
> +}
> +
> +static struct pernet_operations igmp_net_ops = {
> +	.init = igmp_net_init,
> +	.exit = igmp_net_exit,
> +};
> +
> +int __init igmp_mc_proc_init(void)
> +{
> +	return register_pernet_subsys(&igmp_net_ops);
>  }
>  #endif


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] igmp: make /proc/net/{igmp,mcfilter} per netns
       [not found]   ` <48C4F1C6.6080605-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
@ 2008-09-08 18:17     ` David Stevens
  0 siblings, 0 replies; 11+ messages in thread
From: David Stevens @ 2008-09-08 18:17 UTC (permalink / raw)
  To: Daniel Lezcano
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	Alexey Dobriyan, netdev-owner-u79uwXL29TY76Z2rM5mHXA,
	davem-fT/PcQaiUtIeIZ0/mPfg9Q

As I've said before, I really don't like the model you're
using for multicasting here (if I understand correctly, and
I shamelessly admit I haven't looked at this code in detail).

As I understand it, you're modelling the multiple virtual interfaces
as different pieces of hardware on the same physical network.
The implication is that apps joining the same group in multiple
containers will result in multiple advertisements for the same
group, from each of the multiple instances of IGMP & MLD.

In IPv4, that's just ineffecient. In IPv6, the question is: do you have
multiple link-local addresses-- one for each virtual device?
If not, then MLD will be sending multiple copies of everything in
violation of the spec (since they'll be from the same source, too).

I think IGMP and MLD both belong with the physical interface, since
they pretty much do exactly what you want already: glom all the
different filters and group memberships together into exactly the
minimal set of group memberships needed for everyone to hear
just the pieces they've requested.

If you do that at the interface, then you won't have any duplicated
traffic on the physical net and you can separate copies as needed
for the different virtual nets on the host. Perfect, and indistinguishable
externally from a non-container machine (and the code to do it is
already in IGMP and MLD).

If you treat them as separate physical devices all the way to the
wire, then you're just needlessly increasing the host processing
you need to do, as well as loading the multicast routers and network
that are unfortunate enough to be on the same network as you are.

I haven't been paying attention, so I'll be happy if you tell me you've
already addressed this. :-) Otherwise, I think it'd be wise to do so
before it's released into the wild and can't be easily changed.

                                                +-DLS

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] igmp: make /proc/net/{igmp,mcfilter} per netns
  2008-09-08  9:35 ` Daniel Lezcano
@ 2008-09-08 18:17   ` David Stevens
  2008-09-11 17:15     ` Daniel Lezcano
       [not found]     ` <OFAF5F99D2.464B2693-ON882574BE.00630917-882574BE.0064824E-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
       [not found]   ` <48C4F1C6.6080605-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
  1 sibling, 2 replies; 11+ messages in thread
From: David Stevens @ 2008-09-08 18:17 UTC (permalink / raw)
  To: Daniel Lezcano; +Cc: Alexey Dobriyan, containers, davem, netdev, netdev-owner

As I've said before, I really don't like the model you're
using for multicasting here (if I understand correctly, and
I shamelessly admit I haven't looked at this code in detail).

As I understand it, you're modelling the multiple virtual interfaces
as different pieces of hardware on the same physical network.
The implication is that apps joining the same group in multiple
containers will result in multiple advertisements for the same
group, from each of the multiple instances of IGMP & MLD.

In IPv4, that's just ineffecient. In IPv6, the question is: do you have
multiple link-local addresses-- one for each virtual device?
If not, then MLD will be sending multiple copies of everything in
violation of the spec (since they'll be from the same source, too).

I think IGMP and MLD both belong with the physical interface, since
they pretty much do exactly what you want already: glom all the
different filters and group memberships together into exactly the
minimal set of group memberships needed for everyone to hear
just the pieces they've requested.

If you do that at the interface, then you won't have any duplicated
traffic on the physical net and you can separate copies as needed
for the different virtual nets on the host. Perfect, and indistinguishable
externally from a non-container machine (and the code to do it is
already in IGMP and MLD).

If you treat them as separate physical devices all the way to the
wire, then you're just needlessly increasing the host processing
you need to do, as well as loading the multicast routers and network
that are unfortunate enough to be on the same network as you are.

I haven't been paying attention, so I'll be happy if you tell me you've
already addressed this. :-) Otherwise, I think it'd be wise to do so
before it's released into the wild and can't be easily changed.

                                                +-DLS


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] igmp: make /proc/net/{igmp,mcfilter} per netns
       [not found]     ` <OFAF5F99D2.464B2693-ON882574BE.00630917-882574BE.0064824E-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2008-09-11 17:15       ` Daniel Lezcano
  0 siblings, 0 replies; 11+ messages in thread
From: Daniel Lezcano @ 2008-09-11 17:15 UTC (permalink / raw)
  To: David Stevens
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	Alexey Dobriyan, davem-fT/PcQaiUtIeIZ0/mPfg9Q

David Stevens wrote:
> As I've said before, I really don't like the model you're
> using for multicasting here (if I understand correctly, and
> I shamelessly admit I haven't looked at this code in detail).


Hi David,

Sorry for the delay.

> As I understand it, you're modelling the multiple virtual interfaces
> as different pieces of hardware on the same physical network.

Exact. The network namespace acts at the layer 2 level. The network 
resources are isolated and accessed relatively from the namespace 
instead of a global static variable. For example, the network device 
list is per namespace as well as the loopback.

The network devices belong to a specific namespace and can not be used, 
neither seen from another namespace. How the network namespace is able 
to discuss with the outside world will depends on the inter container 
network configuration: a physical device can be assigned to a network 
namespace, or a system with a bridge + a physical network device + one 
side of a pair device (having the other side to the namespace), or a 
macvlan assigned to a namespace, or 'nat' with a pair device, or a 
tunnel, etc ...

> The implication is that apps joining the same group in multiple
> containers will result in multiple advertisements for the same
> group, from each of the multiple instances of IGMP & MLD.
> In IPv4, that's just ineffecient.

I agree.

 > In IPv6, the question is: do you have
> multiple link-local addresses-- one for each virtual device?
> If not, then MLD will be sending multiple copies of everything in
> violation of the spec (since they'll be from the same source, too).

Yes, each virtual device has its own set of network resources, so when 
it is activated in the namespace, the link local address is computed, 
the DAD is invoked and the ip is set on the device.

> I think IGMP and MLD both belong with the physical interface, since
> they pretty much do exactly what you want already: glom all the
> different filters and group memberships together into exactly the
> minimal set of group memberships needed for everyone to hear
> just the pieces they've requested.
> If you do that at the interface, then you won't have any duplicated
> traffic on the physical net and you can separate copies as needed
> for the different virtual nets on the host. Perfect, and indistinguishable
> externally from a non-container machine (and the code to do it is
> already in IGMP and MLD).
> 
> If you treat them as separate physical devices all the way to the
> wire, then you're just needlessly increasing the host processing
> you need to do, as well as loading the multicast routers and network
> that are unfortunate enough to be on the same network as you are.

That makes sense, but the containers can be configured to have a network 
inside the host which acts like a router, a kind of an internal cluster 
in the host, I want to have each container to send an mcast report to 
reproduce the real behaviour of a physical network.

> I haven't been paying attention, so I'll be happy if you tell me you've
> already addressed this. :-) Otherwise, I think it'd be wise to do so
> before it's released into the wild and can't be easily changed.

No, you are right, I didn't addressed that. I thought we stated that was 
an optimization which can be done later.

I don't think having for N containers, N reports for joining / leaving a 
group is something critical at this point, IMHO we can live with that 
for now.

The critical point is : the protocol must not be violated and AFAICS 
this is the case, right ?

Your points are totally valid and I agree 100% with you. But as you can 
see this optimization is not trivial to realize because we have to take 
into account different use cases of the network namespaces and have the 
network stack to behave in a clever way depending on the report to be 
sent internally in the host each time or externally one time.

I will add this optimization to my huge TODO list :) The only question 
is where should I put it, at the beginning or at the end of the list ?
If you think I missed something and there is something wrong with the 
actual approach (expect it can be more efficient) and it is critical for 
the kernel / the protocol, just let me know and I will go to your 
suggestion.


Thanks for your feedback.

   -- Daniel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] igmp: make /proc/net/{igmp,mcfilter} per netns
  2008-09-08 18:17   ` David Stevens
@ 2008-09-11 17:15     ` Daniel Lezcano
  2008-09-11 19:59       ` David Stevens
       [not found]       ` <48C9522B.8080002-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
       [not found]     ` <OFAF5F99D2.464B2693-ON882574BE.00630917-882574BE.0064824E-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  1 sibling, 2 replies; 11+ messages in thread
From: Daniel Lezcano @ 2008-09-11 17:15 UTC (permalink / raw)
  To: David Stevens; +Cc: Alexey Dobriyan, containers, davem, netdev

David Stevens wrote:
> As I've said before, I really don't like the model you're
> using for multicasting here (if I understand correctly, and
> I shamelessly admit I haven't looked at this code in detail).


Hi David,

Sorry for the delay.

> As I understand it, you're modelling the multiple virtual interfaces
> as different pieces of hardware on the same physical network.

Exact. The network namespace acts at the layer 2 level. The network 
resources are isolated and accessed relatively from the namespace 
instead of a global static variable. For example, the network device 
list is per namespace as well as the loopback.

The network devices belong to a specific namespace and can not be used, 
neither seen from another namespace. How the network namespace is able 
to discuss with the outside world will depends on the inter container 
network configuration: a physical device can be assigned to a network 
namespace, or a system with a bridge + a physical network device + one 
side of a pair device (having the other side to the namespace), or a 
macvlan assigned to a namespace, or 'nat' with a pair device, or a 
tunnel, etc ...

> The implication is that apps joining the same group in multiple
> containers will result in multiple advertisements for the same
> group, from each of the multiple instances of IGMP & MLD.
> In IPv4, that's just ineffecient.

I agree.

 > In IPv6, the question is: do you have
> multiple link-local addresses-- one for each virtual device?
> If not, then MLD will be sending multiple copies of everything in
> violation of the spec (since they'll be from the same source, too).

Yes, each virtual device has its own set of network resources, so when 
it is activated in the namespace, the link local address is computed, 
the DAD is invoked and the ip is set on the device.

> I think IGMP and MLD both belong with the physical interface, since
> they pretty much do exactly what you want already: glom all the
> different filters and group memberships together into exactly the
> minimal set of group memberships needed for everyone to hear
> just the pieces they've requested.
> If you do that at the interface, then you won't have any duplicated
> traffic on the physical net and you can separate copies as needed
> for the different virtual nets on the host. Perfect, and indistinguishable
> externally from a non-container machine (and the code to do it is
> already in IGMP and MLD).
> 
> If you treat them as separate physical devices all the way to the
> wire, then you're just needlessly increasing the host processing
> you need to do, as well as loading the multicast routers and network
> that are unfortunate enough to be on the same network as you are.

That makes sense, but the containers can be configured to have a network 
inside the host which acts like a router, a kind of an internal cluster 
in the host, I want to have each container to send an mcast report to 
reproduce the real behaviour of a physical network.

> I haven't been paying attention, so I'll be happy if you tell me you've
> already addressed this. :-) Otherwise, I think it'd be wise to do so
> before it's released into the wild and can't be easily changed.

No, you are right, I didn't addressed that. I thought we stated that was 
an optimization which can be done later.

I don't think having for N containers, N reports for joining / leaving a 
group is something critical at this point, IMHO we can live with that 
for now.

The critical point is : the protocol must not be violated and AFAICS 
this is the case, right ?

Your points are totally valid and I agree 100% with you. But as you can 
see this optimization is not trivial to realize because we have to take 
into account different use cases of the network namespaces and have the 
network stack to behave in a clever way depending on the report to be 
sent internally in the host each time or externally one time.

I will add this optimization to my huge TODO list :) The only question 
is where should I put it, at the beginning or at the end of the list ?
If you think I missed something and there is something wrong with the 
actual approach (expect it can be more efficient) and it is critical for 
the kernel / the protocol, just let me know and I will go to your 
suggestion.


Thanks for your feedback.

   -- Daniel





^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] igmp: make /proc/net/{igmp,mcfilter} per netns
       [not found]       ` <48C9522B.8080002-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
@ 2008-09-11 19:59         ` David Stevens
  0 siblings, 0 replies; 11+ messages in thread
From: David Stevens @ 2008-09-11 19:59 UTC (permalink / raw)
  To: Daniel Lezcano
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	Alexey Dobriyan, netdev-owner-u79uwXL29TY76Z2rM5mHXA,
	davem-fT/PcQaiUtIeIZ0/mPfg9Q

Daniel,
        Thanks for the detailed response.

        As long as you have distinct link-local addresses for
each advertiser, I don't see any protocol violations here.

        I don't believe it's all that difficult to do-- the main
thing is simply to push container joins down to the physical
device-- basically don't do any IGMP or MLD processing
in the virtual layer and just pass all that to the device. This
is essentially what is done  already to get multiple sockets
with distinct joins and filter sets boiled down to the right set
of listens to satisfy everything requested.
        Because the link-local addresses are distinct in v6,
it'll work for now, but be a little harder to do there than blindly
passing all advertisements to the device. Instead, you'd want
a shim layer at the device to intercept them and translate them
into joins for the physcal device, which would itself generate
the advertisements.
        Either way you'd want to demultiplex inbound to the
right container.
        I'm interested in looking at this, but I don't own my
time. Feel free to contact me when  you get started and I can
at least review, if not contribute on it.

        I suppose I won't lose sleep at night over it, but
it does mean use of multicasting in containers won't scale
very well until it's addressed.

                                        +-DLS

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] igmp: make /proc/net/{igmp,mcfilter} per netns
  2008-09-11 17:15     ` Daniel Lezcano
@ 2008-09-11 19:59       ` David Stevens
       [not found]       ` <48C9522B.8080002-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
  1 sibling, 0 replies; 11+ messages in thread
From: David Stevens @ 2008-09-11 19:59 UTC (permalink / raw)
  To: Daniel Lezcano; +Cc: Alexey Dobriyan, containers, davem, netdev, netdev-owner

Daniel,
        Thanks for the detailed response.

        As long as you have distinct link-local addresses for
each advertiser, I don't see any protocol violations here.

        I don't believe it's all that difficult to do-- the main
thing is simply to push container joins down to the physical
device-- basically don't do any IGMP or MLD processing
in the virtual layer and just pass all that to the device. This
is essentially what is done  already to get multiple sockets
with distinct joins and filter sets boiled down to the right set
of listens to satisfy everything requested.
        Because the link-local addresses are distinct in v6,
it'll work for now, but be a little harder to do there than blindly
passing all advertisements to the device. Instead, you'd want
a shim layer at the device to intercept them and translate them
into joins for the physcal device, which would itself generate
the advertisements.
        Either way you'd want to demultiplex inbound to the
right container.
        I'm interested in looking at this, but I don't own my
time. Feel free to contact me when  you get started and I can
at least review, if not contribute on it.

        I suppose I won't lose sleep at night over it, but
it does mean use of multicasting in containers won't scale
very well until it's addressed.

                                        +-DLS


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH] igmp: make /proc/net/{igmp,mcfilter} per netns
@ 2008-09-08  1:28 Alexey Dobriyan
  0 siblings, 0 replies; 11+ messages in thread
From: Alexey Dobriyan @ 2008-09-08  1:28 UTC (permalink / raw)
  To: davem-fT/PcQaiUtIeIZ0/mPfg9Q
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

Signed-off-by: Alexey Dobriyan <adobriyan-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
---

 net/ipv4/igmp.c |   49 ++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 40 insertions(+), 9 deletions(-)

--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2272,6 +2272,7 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p
 
 #if defined(CONFIG_PROC_FS)
 struct igmp_mc_iter_state {
+	struct seq_net_private p;
 	struct net_device *dev;
 	struct in_device *in_dev;
 };
@@ -2280,11 +2281,12 @@ struct igmp_mc_iter_state {
 
 static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq)
 {
+	struct net *net = seq_file_net(seq);
 	struct ip_mc_list *im = NULL;
 	struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
 
 	state->in_dev = NULL;
-	for_each_netdev(&init_net, state->dev) {
+	for_each_netdev(net, state->dev) {
 		struct in_device *in_dev;
 		in_dev = in_dev_get(state->dev);
 		if (!in_dev)
@@ -2405,7 +2407,7 @@ static const struct seq_operations igmp_mc_seq_ops = {
 
 static int igmp_mc_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &igmp_mc_seq_ops,
+	return seq_open_net(inode, file, &igmp_mc_seq_ops,
 			sizeof(struct igmp_mc_iter_state));
 }
 
@@ -2414,10 +2416,11 @@ static const struct file_operations igmp_mc_seq_fops = {
 	.open		=	igmp_mc_seq_open,
 	.read		=	seq_read,
 	.llseek		=	seq_lseek,
-	.release	=	seq_release_private,
+	.release	=	seq_release_net,
 };
 
 struct igmp_mcf_iter_state {
+	struct seq_net_private p;
 	struct net_device *dev;
 	struct in_device *idev;
 	struct ip_mc_list *im;
@@ -2427,13 +2430,14 @@ struct igmp_mcf_iter_state {
 
 static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq)
 {
+	struct net *net = seq_file_net(seq);
 	struct ip_sf_list *psf = NULL;
 	struct ip_mc_list *im = NULL;
 	struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq);
 
 	state->idev = NULL;
 	state->im = NULL;
-	for_each_netdev(&init_net, state->dev) {
+	for_each_netdev(net, state->dev) {
 		struct in_device *idev;
 		idev = in_dev_get(state->dev);
 		if (unlikely(idev == NULL))
@@ -2564,7 +2568,7 @@ static const struct seq_operations igmp_mcf_seq_ops = {
 
 static int igmp_mcf_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &igmp_mcf_seq_ops,
+	return seq_open_net(inode, file, &igmp_mcf_seq_ops,
 			sizeof(struct igmp_mcf_iter_state));
 }
 
@@ -2573,14 +2577,41 @@ static const struct file_operations igmp_mcf_seq_fops = {
 	.open		=	igmp_mcf_seq_open,
 	.read		=	seq_read,
 	.llseek		=	seq_lseek,
-	.release	=	seq_release_private,
+	.release	=	seq_release_net,
 };
 
-int __init igmp_mc_proc_init(void)
+static int igmp_net_init(struct net *net)
 {
-	proc_net_fops_create(&init_net, "igmp", S_IRUGO, &igmp_mc_seq_fops);
-	proc_net_fops_create(&init_net, "mcfilter", S_IRUGO, &igmp_mcf_seq_fops);
+	struct proc_dir_entry *pde;
+
+	pde = proc_net_fops_create(net, "igmp", S_IRUGO, &igmp_mc_seq_fops);
+	if (!pde)
+		goto out_igmp;
+	pde = proc_net_fops_create(net, "mcfilter", S_IRUGO, &igmp_mcf_seq_fops);
+	if (!pde)
+		goto out_mcfilter;
 	return 0;
+
+out_mcfilter:
+	proc_net_remove(net, "igmp");
+out_igmp:
+	return -ENOMEM;
+}
+
+static void igmp_net_exit(struct net *net)
+{
+	proc_net_remove(net, "mcfilter");
+	proc_net_remove(net, "igmp");
+}
+
+static struct pernet_operations igmp_net_ops = {
+	.init = igmp_net_init,
+	.exit = igmp_net_exit,
+};
+
+int __init igmp_mc_proc_init(void)
+{
+	return register_pernet_subsys(&igmp_net_ops);
 }
 #endif

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH] igmp: make /proc/net/{igmp,mcfilter} per netns
@ 2008-09-08  0:35 Alexey Dobriyan
  0 siblings, 0 replies; 11+ messages in thread
From: Alexey Dobriyan @ 2008-09-08  0:35 UTC (permalink / raw)
  To: davem; +Cc: netdev, containters

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---

 net/ipv4/igmp.c |   49 ++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 40 insertions(+), 9 deletions(-)

--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2272,6 +2272,7 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p
 
 #if defined(CONFIG_PROC_FS)
 struct igmp_mc_iter_state {
+	struct seq_net_private p;
 	struct net_device *dev;
 	struct in_device *in_dev;
 };
@@ -2280,11 +2281,12 @@ struct igmp_mc_iter_state {
 
 static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq)
 {
+	struct net *net = seq_file_net(seq);
 	struct ip_mc_list *im = NULL;
 	struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
 
 	state->in_dev = NULL;
-	for_each_netdev(&init_net, state->dev) {
+	for_each_netdev(net, state->dev) {
 		struct in_device *in_dev;
 		in_dev = in_dev_get(state->dev);
 		if (!in_dev)
@@ -2405,7 +2407,7 @@ static const struct seq_operations igmp_mc_seq_ops = {
 
 static int igmp_mc_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &igmp_mc_seq_ops,
+	return seq_open_net(inode, file, &igmp_mc_seq_ops,
 			sizeof(struct igmp_mc_iter_state));
 }
 
@@ -2414,10 +2416,11 @@ static const struct file_operations igmp_mc_seq_fops = {
 	.open		=	igmp_mc_seq_open,
 	.read		=	seq_read,
 	.llseek		=	seq_lseek,
-	.release	=	seq_release_private,
+	.release	=	seq_release_net,
 };
 
 struct igmp_mcf_iter_state {
+	struct seq_net_private p;
 	struct net_device *dev;
 	struct in_device *idev;
 	struct ip_mc_list *im;
@@ -2427,13 +2430,14 @@ struct igmp_mcf_iter_state {
 
 static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq)
 {
+	struct net *net = seq_file_net(seq);
 	struct ip_sf_list *psf = NULL;
 	struct ip_mc_list *im = NULL;
 	struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq);
 
 	state->idev = NULL;
 	state->im = NULL;
-	for_each_netdev(&init_net, state->dev) {
+	for_each_netdev(net, state->dev) {
 		struct in_device *idev;
 		idev = in_dev_get(state->dev);
 		if (unlikely(idev == NULL))
@@ -2564,7 +2568,7 @@ static const struct seq_operations igmp_mcf_seq_ops = {
 
 static int igmp_mcf_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &igmp_mcf_seq_ops,
+	return seq_open_net(inode, file, &igmp_mcf_seq_ops,
 			sizeof(struct igmp_mcf_iter_state));
 }
 
@@ -2573,14 +2577,41 @@ static const struct file_operations igmp_mcf_seq_fops = {
 	.open		=	igmp_mcf_seq_open,
 	.read		=	seq_read,
 	.llseek		=	seq_lseek,
-	.release	=	seq_release_private,
+	.release	=	seq_release_net,
 };
 
-int __init igmp_mc_proc_init(void)
+static int igmp_net_init(struct net *net)
 {
-	proc_net_fops_create(&init_net, "igmp", S_IRUGO, &igmp_mc_seq_fops);
-	proc_net_fops_create(&init_net, "mcfilter", S_IRUGO, &igmp_mcf_seq_fops);
+	struct proc_dir_entry *pde;
+
+	pde = proc_net_fops_create(net, "igmp", S_IRUGO, &igmp_mc_seq_fops);
+	if (!pde)
+		goto out_igmp;
+	pde = proc_net_fops_create(net, "mcfilter", S_IRUGO, &igmp_mcf_seq_fops);
+	if (!pde)
+		goto out_mcfilter;
 	return 0;
+
+out_mcfilter:
+	proc_net_remove(net, "igmp");
+out_igmp:
+	return -ENOMEM;
+}
+
+static void igmp_net_exit(struct net *net)
+{
+	proc_net_remove(net, "mcfilter");
+	proc_net_remove(net, "igmp");
+}
+
+static struct pernet_operations igmp_net_ops = {
+	.init = igmp_net_init,
+	.exit = igmp_net_exit,
+};
+
+int __init igmp_mc_proc_init(void)
+{
+	return register_pernet_subsys(&igmp_net_ops);
 }
 #endif
 


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2008-09-11 19:59 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-09-08  1:28 [PATCH] igmp: make /proc/net/{igmp,mcfilter} per netns Alexey Dobriyan
2008-09-08  9:35 ` Daniel Lezcano
2008-09-08 18:17   ` David Stevens
2008-09-11 17:15     ` Daniel Lezcano
2008-09-11 19:59       ` David Stevens
     [not found]       ` <48C9522B.8080002-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
2008-09-11 19:59         ` David Stevens
     [not found]     ` <OFAF5F99D2.464B2693-ON882574BE.00630917-882574BE.0064824E-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2008-09-11 17:15       ` Daniel Lezcano
     [not found]   ` <48C4F1C6.6080605-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
2008-09-08 18:17     ` David Stevens
     [not found] ` <20080908012855.GC575-2ev+ksY9ol182hYKe6nXyg@public.gmane.org>
2008-09-08  9:35   ` Daniel Lezcano
  -- strict thread matches above, loose matches on Subject: below --
2008-09-08  1:28 Alexey Dobriyan
2008-09-08  0:35 Alexey Dobriyan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.