All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] ipv6: enable net.ipv6.route.max_size sysctl in network namespace
@ 2021-10-12  7:39 Alexander Kuznetsov
  2021-10-12 16:38 ` Eric W. Biederman
  0 siblings, 1 reply; 2+ messages in thread
From: Alexander Kuznetsov @ 2021-10-12  7:39 UTC (permalink / raw)
  To: netdev; +Cc: zeil, davem, ebiederm, dmtrmonakhov

We want to increase route cache size in network namespace
created with user namespace. Currently ipv6 route settings
are disabled for non-initial network namespaces.

We want to allow to write this sysctl only for users
from host(initial) user ns.

Signed-off-by: Alexander Kuznetsov <wwfq@yandex-team.ru>
Acked-by: Dmitry Yakunin <zeil@yandex-team.ru>
Acked-by: Dmitry Monakhov <dmtrmonakhov@yandex-team.ru>
---
 net/ipv6/route.c | 32 ++++++++++++++++++++------------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index dbc2240..2d96c9f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -6303,13 +6303,21 @@ static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
 	return 0;
 }
 
+static int ipv6_sysctl_route_max_size(struct ctl_table *ctl, int write,
+				       void *buffer, size_t *lenp, loff_t *ppos)
+{
+	if (write && current_user_ns() != &init_user_ns)
+		return -EPERM;
+
+	return proc_dointvec(ctl, write, buffer, lenp, ppos);
+}
 static struct ctl_table ipv6_route_table_template[] = {
 	{
-		.procname	=	"flush",
-		.data		=	&init_net.ipv6.sysctl.flush_delay,
+		.procname	=	"max_size",
+		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
 		.maxlen		=	sizeof(int),
-		.mode		=	0200,
-		.proc_handler	=	ipv6_sysctl_rtcache_flush
+		.mode		=	0644,
+		.proc_handler	=	ipv6_sysctl_route_max_size,
 	},
 	{
 		.procname	=	"gc_thresh",
@@ -6319,11 +6327,11 @@ static struct ctl_table ipv6_route_table_template[] = {
 		.proc_handler	=	proc_dointvec,
 	},
 	{
-		.procname	=	"max_size",
-		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
+		.procname	=	"flush",
+		.data		=	&init_net.ipv6.sysctl.flush_delay,
 		.maxlen		=	sizeof(int),
-		.mode		=	0644,
-		.proc_handler	=	proc_dointvec,
+		.mode		=	0200,
+		.proc_handler	=	ipv6_sysctl_rtcache_flush
 	},
 	{
 		.procname	=	"gc_min_interval",
@@ -6395,10 +6403,10 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
 			GFP_KERNEL);
 
 	if (table) {
-		table[0].data = &net->ipv6.sysctl.flush_delay;
-		table[0].extra1 = net;
+		table[0].data = &net->ipv6.sysctl.ip6_rt_max_size;
 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
-		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
+		table[2].data = &net->ipv6.sysctl.flush_delay;
+		table[2].extra1 = net;
 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
@@ -6410,7 +6418,7 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
 
 		/* Don't export sysctls to unprivileged users */
 		if (net->user_ns != &init_user_ns)
-			table[0].procname = NULL;
+			table[1].procname = NULL;
 	}
 
 	return table;
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] ipv6: enable net.ipv6.route.max_size sysctl in network namespace
  2021-10-12  7:39 [PATCH] ipv6: enable net.ipv6.route.max_size sysctl in network namespace Alexander Kuznetsov
@ 2021-10-12 16:38 ` Eric W. Biederman
  0 siblings, 0 replies; 2+ messages in thread
From: Eric W. Biederman @ 2021-10-12 16:38 UTC (permalink / raw)
  To: Alexander Kuznetsov; +Cc: netdev, zeil, davem, dmtrmonakhov

Alexander Kuznetsov <wwfq@yandex-team.ru> writes:

> We want to increase route cache size in network namespace
> created with user namespace. Currently ipv6 route settings
> are disabled for non-initial network namespaces.
>
> We want to allow to write this sysctl only for users
> from host(initial) user ns.

This is subtle, and arguably broken.  Having permission tests
inside write methods has been proven problematic over the years.

That is because it usually is pretty simple to fool some application
that has the appropriate permissions to write to a file descriptor
it did not open.


From what I can see you are doing this convoluted test to avoid
performing the analysis to see if it is safe to allow the route
cache size to be changed from inside the namespace.


Usually limits like this exist more as to catch it when things
go crazy rather than to limit resource consumption in general.

So I expect it is reasonable to relax this limit possibly after
ensuring you have memory cgroup annotation on the allocations.

I suspect the routing table can grow large enough memory cgroup
annotations need to be present already.


Eric


> Signed-off-by: Alexander Kuznetsov <wwfq@yandex-team.ru>
> Acked-by: Dmitry Yakunin <zeil@yandex-team.ru>
> Acked-by: Dmitry Monakhov <dmtrmonakhov@yandex-team.ru>
> ---
>  net/ipv6/route.c | 32 ++++++++++++++++++++------------
>  1 file changed, 20 insertions(+), 12 deletions(-)
>
> diff --git a/net/ipv6/route.c b/net/ipv6/route.c
> index dbc2240..2d96c9f 100644
> --- a/net/ipv6/route.c
> +++ b/net/ipv6/route.c
> @@ -6303,13 +6303,21 @@ static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
>  	return 0;
>  }
>  
> +static int ipv6_sysctl_route_max_size(struct ctl_table *ctl, int write,
> +				       void *buffer, size_t *lenp, loff_t *ppos)
> +{
> +	if (write && current_user_ns() != &init_user_ns)
> +		return -EPERM;
> +
> +	return proc_dointvec(ctl, write, buffer, lenp, ppos);
> +}
>  static struct ctl_table ipv6_route_table_template[] = {
>  	{
> -		.procname	=	"flush",
> -		.data		=	&init_net.ipv6.sysctl.flush_delay,
> +		.procname	=	"max_size",
> +		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
>  		.maxlen		=	sizeof(int),
> -		.mode		=	0200,
> -		.proc_handler	=	ipv6_sysctl_rtcache_flush
> +		.mode		=	0644,
> +		.proc_handler	=	ipv6_sysctl_route_max_size,
>  	},
>  	{
>  		.procname	=	"gc_thresh",
> @@ -6319,11 +6327,11 @@ static struct ctl_table ipv6_route_table_template[] = {
>  		.proc_handler	=	proc_dointvec,
>  	},
>  	{
> -		.procname	=	"max_size",
> -		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
> +		.procname	=	"flush",
> +		.data		=	&init_net.ipv6.sysctl.flush_delay,
>  		.maxlen		=	sizeof(int),
> -		.mode		=	0644,
> -		.proc_handler	=	proc_dointvec,
> +		.mode		=	0200,
> +		.proc_handler	=	ipv6_sysctl_rtcache_flush
>  	},
>  	{
>  		.procname	=	"gc_min_interval",
> @@ -6395,10 +6403,10 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
>  			GFP_KERNEL);
>  
>  	if (table) {
> -		table[0].data = &net->ipv6.sysctl.flush_delay;
> -		table[0].extra1 = net;
> +		table[0].data = &net->ipv6.sysctl.ip6_rt_max_size;
>  		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
> -		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
> +		table[2].data = &net->ipv6.sysctl.flush_delay;
> +		table[2].extra1 = net;
>  		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
>  		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
>  		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
> @@ -6410,7 +6418,7 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
>  
>  		/* Don't export sysctls to unprivileged users */
>  		if (net->user_ns != &init_user_ns)
> -			table[0].procname = NULL;
> +			table[1].procname = NULL;
>  	}
>  
>  	return table;

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-10-12 16:39 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-12  7:39 [PATCH] ipv6: enable net.ipv6.route.max_size sysctl in network namespace Alexander Kuznetsov
2021-10-12 16:38 ` Eric W. Biederman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.