* [net-next RFC 1/4] bindtoprefix: infrastructure
2016-03-23 2:26 [net-next RFC 0/4] SO_BINDTOPREFIX Gilberto Bertin
@ 2016-03-23 2:26 ` Gilberto Bertin
2016-03-23 2:26 ` [net-next RFC 2/4] bindtoprefix: TCP/IPv4 implementation Gilberto Bertin
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Gilberto Bertin @ 2016-03-23 2:26 UTC (permalink / raw)
To: netdev; +Cc: tom, markzzzsmith, Gilberto Bertin
Signed-off-by: Gilberto Bertin <gilberto.bertin@gmail.com>
---
include/net/sock.h | 20 +++++++
include/uapi/asm-generic/socket.h | 1 +
net/core/sock.c | 111 ++++++++++++++++++++++++++++++++++++++
3 files changed, 132 insertions(+)
diff --git a/include/net/sock.h b/include/net/sock.h
index f5ea148..409d255 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -109,6 +109,16 @@ typedef struct {
#endif
} socket_lock_t;
+struct ipv4_prefix {
+ __be32 net;
+ u_char plen;
+};
+
+struct ipv6_prefix {
+ struct in6_addr net;
+ u_char plen;
+};
+
struct sock;
struct proto;
struct net;
@@ -176,6 +186,13 @@ struct sock_common {
unsigned char skc_ipv6only:1;
unsigned char skc_net_refcnt:1;
int skc_bound_dev_if;
+
+ unsigned char skc_bind_to_prefix;
+ union {
+ struct ipv4_prefix skc_bind_prefix4;
+ struct ipv6_prefix skc_bind_prefix6;
+ };
+
union {
struct hlist_node skc_bind_node;
struct hlist_nulls_node skc_portaddr_node;
@@ -327,6 +344,9 @@ struct sock {
#define sk_state __sk_common.skc_state
#define sk_reuse __sk_common.skc_reuse
#define sk_reuseport __sk_common.skc_reuseport
+#define sk_bind_to_prefix __sk_common.skc_bind_to_prefix
+#define sk_bind_prefix4 __sk_common.skc_bind_prefix4
+#define sk_bind_prefix6 __sk_common.skc_bind_prefix6
#define sk_ipv6only __sk_common.skc_ipv6only
#define sk_net_refcnt __sk_common.skc_net_refcnt
#define sk_bound_dev_if __sk_common.skc_bound_dev_if
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index fb8a416..b4dd61f 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -30,6 +30,7 @@
#define SO_SNDLOWAT 19
#define SO_RCVTIMEO 20
#define SO_SNDTIMEO 21
+#define SO_BINDTOPREFIX 22
#endif
/* Security levels - as per NRL IPv6 - don't actually do anything */
diff --git a/net/core/sock.c b/net/core/sock.c
index 6c1c8bc..e4c9c55 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -571,6 +571,68 @@ out:
return ret;
}
+static int sock_setbindtoprefix(struct sock *sk, char __user *optval,
+ int optlen)
+{
+ int ret = -ENOPROTOOPT;
+
+ if (sk->sk_family == AF_INET) {
+ struct ipv4_prefix bind_prefix4;
+
+ ret = -EFAULT;
+ if (optlen != sizeof(struct ipv4_prefix))
+ goto out;
+
+ if (copy_from_user(&bind_prefix4, optval,
+ sizeof(struct ipv4_prefix)))
+ goto out;
+
+ ret = -EINVAL;
+ if (bind_prefix4.plen > 32)
+ goto out;
+
+ lock_sock(sk);
+
+ sk->sk_bind_to_prefix = 1;
+ sk->sk_bind_prefix4.net = bind_prefix4.net;
+ sk->sk_bind_prefix4.plen = bind_prefix4.plen;
+ sk_dst_reset(sk);
+
+ release_sock(sk);
+
+ ret = 0;
+ } else if (sk->sk_family == AF_INET6) {
+ struct ipv6_prefix bind_prefix6;
+
+ ret = -EFAULT;
+ if (optlen != sizeof(struct ipv6_prefix))
+ goto out;
+
+ if (copy_from_user(&bind_prefix6, optval,
+ sizeof(struct ipv6_prefix)))
+ goto out;
+
+ ret = -EINVAL;
+ if (bind_prefix6.plen > 128)
+ goto out;
+
+ lock_sock(sk);
+
+ sk->sk_bind_to_prefix = 1;
+ memcpy(&sk->sk_bind_prefix6.net, &bind_prefix6.net,
+ sizeof(struct in6_addr));
+ sk->sk_bind_prefix6.plen = bind_prefix6.plen;
+ sk_dst_reset(sk);
+
+ release_sock(sk);
+
+ ret = 0;
+ }
+
+out:
+ return ret;
+}
+
static int sock_getbindtodevice(struct sock *sk, char __user *optval,
int __user *optlen, int len)
{
@@ -611,6 +673,49 @@ out:
return ret;
}
+static int sock_getbindtoprefix(struct sock *sk, char __user *optval,
+ int __user *optlen, int len)
+{
+ int ret;
+
+ if (sk->sk_bind_to_prefix == 0) {
+ len = 0;
+ goto zero;
+ }
+
+ if (sk->sk_family == AF_INET) {
+ ret = -EINVAL;
+ if (len < sizeof(struct ipv4_prefix))
+ goto out;
+
+ len = sizeof(struct ipv4_prefix);
+
+ ret = -EFAULT;
+ if (copy_to_user(optval, &sk->sk_bind_prefix4, len))
+ goto out;
+
+ } else if (sk->sk_family == AF_INET6) {
+ ret = -EINVAL;
+ if (len < sizeof(struct ipv6_prefix))
+ goto out;
+
+ len = sizeof(struct ipv6_prefix);
+
+ ret = -EFAULT;
+ if (copy_to_user(optval, &sk->sk_bind_prefix6, len))
+ goto out;
+ }
+
+zero:
+ ret = -EFAULT;
+ if (put_user(len, optlen))
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+}
+
static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
{
if (valbool)
@@ -659,6 +764,9 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
if (optname == SO_BINDTODEVICE)
return sock_setbindtodevice(sk, optval, optlen);
+ else if (optname == SO_BINDTOPREFIX)
+ return sock_setbindtoprefix(sk, optval, optlen);
+
if (optlen < sizeof(int))
return -EINVAL;
@@ -1214,6 +1322,9 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
case SO_BINDTODEVICE:
return sock_getbindtodevice(sk, optval, optlen, len);
+ case SO_BINDTOPREFIX:
+ return sock_getbindtoprefix(sk, optval, optlen, len);
+
case SO_GET_FILTER:
len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
if (len < 0)
--
2.7.3
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [net-next RFC 2/4] bindtoprefix: TCP/IPv4 implementation
2016-03-23 2:26 [net-next RFC 0/4] SO_BINDTOPREFIX Gilberto Bertin
2016-03-23 2:26 ` [net-next RFC 1/4] bindtoprefix: infrastructure Gilberto Bertin
@ 2016-03-23 2:26 ` Gilberto Bertin
2016-03-23 2:26 ` [net-next RFC 3/4] bindtoprefix: TCP/IPv6 implementation Gilberto Bertin
` (2 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Gilberto Bertin @ 2016-03-23 2:26 UTC (permalink / raw)
To: netdev; +Cc: tom, markzzzsmith, Gilberto Bertin
Signed-off-by: Gilberto Bertin <gilberto.bertin@gmail.com>
---
net/ipv4/inet_connection_sock.c | 20 +++++++++++++++++++-
net/ipv4/inet_hashtables.c | 9 +++++++++
2 files changed, 28 insertions(+), 1 deletion(-)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 6414891..162c252 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -15,6 +15,7 @@
#include <linux/module.h>
#include <linux/jhash.h>
+#include <linux/inetdevice.h>
#include <net/inet_connection_sock.h>
#include <net/inet_hashtables.h>
@@ -43,6 +44,22 @@ void inet_get_local_port_range(struct net *net, int *low, int *high)
}
EXPORT_SYMBOL(inet_get_local_port_range);
+static inline int inet_csk_bind_prefix_conflict(const struct sock *sk,
+ const struct sock *sk2)
+{
+ __be32 mask;
+
+ if (sk->sk_bind_to_prefix && sk2->sk_bind_to_prefix) {
+ mask = inet_make_mask(min(sk->sk_bind_prefix4.plen,
+ sk2->sk_bind_prefix4.plen));
+
+ return (sk->sk_bind_prefix4.net & mask) ==
+ (sk2->sk_bind_prefix4.net & mask);
+ }
+
+ return 0;
+}
+
int inet_csk_bind_conflict(const struct sock *sk,
const struct inet_bind_bucket *tb, bool relax)
{
@@ -63,7 +80,8 @@ int inet_csk_bind_conflict(const struct sock *sk,
!inet_v6_ipv6only(sk2) &&
(!sk->sk_bound_dev_if ||
!sk2->sk_bound_dev_if ||
- sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
+ sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
+ inet_csk_bind_prefix_conflict(sk, sk2)) {
if ((!reuse || !sk2->sk_reuse ||
sk2->sk_state == TCP_LISTEN) &&
(!reuseport || !sk2->sk_reuseport ||
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index ccc5980..44693c4 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -13,6 +13,7 @@
* 2 of the License, or (at your option) any later version.
*/
+#include <linux/inetdevice.h>
#include <linux/module.h>
#include <linux/random.h>
#include <linux/sched.h>
@@ -189,6 +190,14 @@ static inline int compute_score(struct sock *sk, struct net *net,
return -1;
score += 4;
}
+ if (sk->sk_bind_to_prefix) {
+ __be32 mask = inet_make_mask(sk->sk_bind_prefix4.plen);
+
+ if ((sk->sk_bind_prefix4.net & mask) != (daddr & mask))
+ return -1;
+ score += 4;
+ }
+
if (sk->sk_incoming_cpu == raw_smp_processor_id())
score++;
}
--
2.7.3
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [net-next RFC 3/4] bindtoprefix: TCP/IPv6 implementation
2016-03-23 2:26 [net-next RFC 0/4] SO_BINDTOPREFIX Gilberto Bertin
2016-03-23 2:26 ` [net-next RFC 1/4] bindtoprefix: infrastructure Gilberto Bertin
2016-03-23 2:26 ` [net-next RFC 2/4] bindtoprefix: TCP/IPv4 implementation Gilberto Bertin
@ 2016-03-23 2:26 ` Gilberto Bertin
2016-03-23 2:26 ` [net-next RFC 4/4] bindtoprefix: UPD implementation Gilberto Bertin
2016-03-29 14:31 ` [net-next RFC 0/4] SO_BINDTOPREFIX Eric Dumazet
4 siblings, 0 replies; 6+ messages in thread
From: Gilberto Bertin @ 2016-03-23 2:26 UTC (permalink / raw)
To: netdev; +Cc: tom, markzzzsmith, Gilberto Bertin
Signed-off-by: Gilberto Bertin <gilberto.bertin@gmail.com>
---
net/ipv6/inet6_connection_sock.c | 17 ++++++++++++++++-
net/ipv6/inet6_hashtables.c | 6 ++++++
2 files changed, 22 insertions(+), 1 deletion(-)
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 36c3f01..c65023f 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -27,6 +27,20 @@
#include <net/sock.h>
#include <net/inet6_connection_sock.h>
+int inet6_csk_bind_prefix_conflict(const struct sock *sk,
+ const struct sock *sk2)
+{
+ u_char plen;
+
+ plen = min(sk->sk_bind_prefix6.plen, sk2->sk_bind_prefix6.plen);
+
+ if (sk->sk_bind_to_prefix && sk2->sk_bind_to_prefix)
+ return ipv6_prefix_equal(&sk->sk_bind_prefix6.net,
+ &sk2->sk_bind_prefix6.net, plen);
+
+ return 0;
+}
+
int inet6_csk_bind_conflict(const struct sock *sk,
const struct inet_bind_bucket *tb, bool relax)
{
@@ -44,7 +58,8 @@ int inet6_csk_bind_conflict(const struct sock *sk,
if (sk != sk2 &&
(!sk->sk_bound_dev_if ||
!sk2->sk_bound_dev_if ||
- sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
+ sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
+ inet6_csk_bind_prefix_conflict(sk, sk2)) {
if ((!reuse || !sk2->sk_reuse ||
sk2->sk_state == TCP_LISTEN) &&
(!reuseport || !sk2->sk_reuseport ||
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 21ace5a..bcc16ed 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -114,6 +114,12 @@ static inline int compute_score(struct sock *sk, struct net *net,
return -1;
score++;
}
+ if (sk->sk_bind_to_prefix) {
+ if (!ipv6_prefix_equal(&sk->sk_bind_prefix6.net, daddr,
+ sk->sk_bind_prefix6.plen))
+ return -1;
+ score++;
+ }
if (sk->sk_incoming_cpu == raw_smp_processor_id())
score++;
}
--
2.7.3
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [net-next RFC 4/4] bindtoprefix: UPD implementation
2016-03-23 2:26 [net-next RFC 0/4] SO_BINDTOPREFIX Gilberto Bertin
` (2 preceding siblings ...)
2016-03-23 2:26 ` [net-next RFC 3/4] bindtoprefix: TCP/IPv6 implementation Gilberto Bertin
@ 2016-03-23 2:26 ` Gilberto Bertin
2016-03-29 14:31 ` [net-next RFC 0/4] SO_BINDTOPREFIX Eric Dumazet
4 siblings, 0 replies; 6+ messages in thread
From: Gilberto Bertin @ 2016-03-23 2:26 UTC (permalink / raw)
To: netdev; +Cc: tom, markzzzsmith, Gilberto Bertin
Signed-off-by: Gilberto Bertin <gilberto.bertin@gmail.com>
---
net/ipv4/udp.c | 36 ++++++++++++++++++++++++++++++++++++
1 file changed, 36 insertions(+)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 95d2f19..31b9687 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -133,6 +133,23 @@ EXPORT_SYMBOL(udp_memory_allocated);
#define MAX_UDP_PORTS 65536
#define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN)
+static inline int udp_csk_bind_prefix_conflict(const struct sock *sk,
+ const struct sock *sk2)
+{
+ __be32 mask;
+
+ if (sk->sk_bind_to_prefix && sk2->sk_bind_to_prefix) {
+ mask = inet_make_mask(min(sk->sk_bind_prefix4.plen,
+ sk2->sk_bind_prefix4.plen));
+
+ return (sk->sk_bind_prefix4.net & mask) ==
+ (sk2->sk_bind_prefix4.net & mask);
+ }
+
+ return 0;
+}
+
+
static int udp_lib_lport_inuse(struct net *net, __u16 num,
const struct udp_hslot *hslot,
unsigned long *bitmap,
@@ -153,6 +170,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
(!sk2->sk_reuse || !sk->sk_reuse) &&
(!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+ udp_csk_bind_prefix_conflict(sk, sk2) &&
(!sk2->sk_reuseport || !sk->sk_reuseport ||
rcu_access_pointer(sk->sk_reuseport_cb) ||
!uid_eq(uid, sock_i_uid(sk2))) &&
@@ -189,6 +207,7 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
(!sk2->sk_reuse || !sk->sk_reuse) &&
(!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+ udp_csk_bind_prefix_conflict(sk, sk2) &&
(!sk2->sk_reuseport || !sk->sk_reuseport ||
rcu_access_pointer(sk->sk_reuseport_cb) ||
!uid_eq(uid, sock_i_uid(sk2))) &&
@@ -426,6 +445,15 @@ static inline int compute_score(struct sock *sk, struct net *net,
return -1;
score += 4;
}
+
+ if (sk->sk_bind_to_prefix) {
+ __be32 mask = inet_make_mask(sk->sk_bind_prefix4.plen);
+
+ if ((sk->sk_bind_prefix4.net & mask) != (daddr & mask))
+ return -1;
+ score += 4;
+ }
+
if (sk->sk_incoming_cpu == raw_smp_processor_id())
score++;
return score;
@@ -471,6 +499,14 @@ static inline int compute_score2(struct sock *sk, struct net *net,
score += 4;
}
+ if (sk->sk_bind_to_prefix) {
+ __be32 mask = inet_make_mask(sk->sk_bind_prefix4.plen);
+
+ if ((sk->sk_bind_prefix4.net & mask) != (daddr & mask))
+ return -1;
+ score += 4;
+ }
+
if (sk->sk_incoming_cpu == raw_smp_processor_id())
score++;
--
2.7.3
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [net-next RFC 0/4] SO_BINDTOPREFIX
2016-03-23 2:26 [net-next RFC 0/4] SO_BINDTOPREFIX Gilberto Bertin
` (3 preceding siblings ...)
2016-03-23 2:26 ` [net-next RFC 4/4] bindtoprefix: UPD implementation Gilberto Bertin
@ 2016-03-29 14:31 ` Eric Dumazet
4 siblings, 0 replies; 6+ messages in thread
From: Eric Dumazet @ 2016-03-29 14:31 UTC (permalink / raw)
To: Gilberto Bertin; +Cc: netdev, tom, markzzzsmith
On Wed, 2016-03-23 at 02:26 +0000, Gilberto Bertin wrote:
> Since the net-next window just opened, I'm resubmitting my RFC for the
> SO_BINDTOSUBNET patch, following Mark Smith's suggestion to rename the
> whole thing to a more clear SO_BINDTOPREFIX.
Please do not add such monolithic option.
BPF is absolutely the way to go here, as it allows for whatever user
specified tweaks, like a list of destination subnetwork, or/and a list
of source network, or the date/time of the day, or port knocking without
netfilter, or ... you name it.
Simply add an option to load a BPF filter on a socket, used to vary the
various compute_score() functions.
No hard coded knowledge in the kernel, but a generic interface.
^ permalink raw reply [flat|nested] 6+ messages in thread