All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH bpf-next 0/7] bpf: sockmap, sockhash: support storing UDP sockets
@ 2020-02-25 13:56 Lorenz Bauer
  2020-02-25 13:56 ` [PATCH bpf-next 1/7] bpf: sockmap: only check ULP for TCP sockets Lorenz Bauer
                   ` (6 more replies)
  0 siblings, 7 replies; 21+ messages in thread
From: Lorenz Bauer @ 2020-02-25 13:56 UTC (permalink / raw)
  To: ast, daniel, john.fastabend; +Cc: netdev, bpf, kernel-team, Lorenz Bauer

This series adds support for storing UDP sockets in sockmap and sockhash. This
allows using these maps in reuseport programs on UDP sockets, instead of
reuseport sockarrays. We want to use this in our work for BPF based
socket dispatch.

The first two patches make the sockmap code more generic: anything related to
ULP must only be called on TCP sockets, and some of the tcp_bpf hooks can be
re-used for UDP.

The third patch introduces a new struct psock_hooks, which encapsulates some
fiddly state handling required to support IPv6-as-a-module. I'm not
particularly fond of it, and would be happy for suggestions on how to make
it less obtrusive.

The fourth patch adds udp_bpf modeled on tcp_bpf, using struct psock_hooks,
and relaxes sockmap update checks.

The final patches enable tests.

Lorenz Bauer (7):
  bpf: sockmap: only check ULP for TCP sockets
  bpf: sockmap: move generic sockmap hooks from BPF TCP
  skmsg: introduce sk_psock_hooks
  bpf: sockmap: allow UDP sockets
  selftests: bpf: don't listen() on UDP sockets
  selftests: bpf: add tests for UDP sockets in sockmap
  selftests: bpf: enable UDP sockmap reuseport tests

 MAINTAINERS                                   |   1 +
 include/linux/bpf.h                           |   4 +-
 include/linux/skmsg.h                         |  72 ++++----
 include/linux/udp.h                           |   4 +
 include/net/tcp.h                             |   1 -
 net/core/skmsg.c                              |  52 ++++++
 net/core/sock_map.c                           | 155 +++++++++++-----
 net/ipv4/Makefile                             |   1 +
 net/ipv4/tcp_bpf.c                            | 169 ++++--------------
 net/ipv4/udp_bpf.c                            |  53 ++++++
 .../bpf/prog_tests/select_reuseport.c         |   7 -
 .../selftests/bpf/prog_tests/sockmap_listen.c | 139 ++++++++------
 12 files changed, 381 insertions(+), 277 deletions(-)
 create mode 100644 net/ipv4/udp_bpf.c

-- 
2.20.1


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH bpf-next 1/7] bpf: sockmap: only check ULP for TCP sockets
  2020-02-25 13:56 [PATCH bpf-next 0/7] bpf: sockmap, sockhash: support storing UDP sockets Lorenz Bauer
@ 2020-02-25 13:56 ` Lorenz Bauer
  2020-02-25 16:45   ` Song Liu
  2020-02-25 13:56 ` [PATCH bpf-next 2/7] bpf: sockmap: move generic sockmap hooks from BPF TCP Lorenz Bauer
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 21+ messages in thread
From: Lorenz Bauer @ 2020-02-25 13:56 UTC (permalink / raw)
  To: ast, daniel, john.fastabend; +Cc: netdev, bpf, kernel-team, Lorenz Bauer

The sock map code checks that a socket does not have an active upper
layer protocol before inserting it into the map. This requires casting
via inet_csk, which isn't valid for UDP sockets.

Guard checks for ULP by checking inet_sk(sk)->is_icsk first.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
---
 include/linux/skmsg.h |  8 +++++++-
 net/core/sock_map.c   | 11 +++++++----
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 112765bd146d..54a9a3e36b29 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -360,7 +360,13 @@ static inline void sk_psock_restore_proto(struct sock *sk,
 					  struct sk_psock *psock)
 {
 	sk->sk_prot->unhash = psock->saved_unhash;
-	tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space);
+	if (inet_sk(sk)->is_icsk) {
+		tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space);
+	} else {
+		sk->sk_write_space = psock->saved_write_space;
+		/* Pairs with lockless read in sk_clone_lock() */
+		WRITE_ONCE(sk->sk_prot, psock->sk_proto);
+	}
 }
 
 static inline void sk_psock_set_state(struct sk_psock *psock,
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 2e0f465295c3..695ecacc7afa 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -94,6 +94,11 @@ static void sock_map_sk_release(struct sock *sk)
 	release_sock(sk);
 }
 
+static bool sock_map_sk_has_ulp(struct sock *sk)
+{
+	return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops;
+}
+
 static void sock_map_add_link(struct sk_psock *psock,
 			      struct sk_psock_link *link,
 			      struct bpf_map *map, void *link_raw)
@@ -384,7 +389,6 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
 				  struct sock *sk, u64 flags)
 {
 	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
-	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct sk_psock_link *link;
 	struct sk_psock *psock;
 	struct sock *osk;
@@ -395,7 +399,7 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
 		return -EINVAL;
 	if (unlikely(idx >= map->max_entries))
 		return -E2BIG;
-	if (unlikely(rcu_access_pointer(icsk->icsk_ulp_data)))
+	if (sock_map_sk_has_ulp(sk))
 		return -EINVAL;
 
 	link = sk_psock_init_link();
@@ -738,7 +742,6 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
 				   struct sock *sk, u64 flags)
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-	struct inet_connection_sock *icsk = inet_csk(sk);
 	u32 key_size = map->key_size, hash;
 	struct bpf_htab_elem *elem, *elem_new;
 	struct bpf_htab_bucket *bucket;
@@ -749,7 +752,7 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
 	WARN_ON_ONCE(!rcu_read_lock_held());
 	if (unlikely(flags > BPF_EXIST))
 		return -EINVAL;
-	if (unlikely(icsk->icsk_ulp_data))
+	if (sock_map_sk_has_ulp(sk))
 		return -EINVAL;
 
 	link = sk_psock_init_link();
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH bpf-next 2/7] bpf: sockmap: move generic sockmap hooks from BPF TCP
  2020-02-25 13:56 [PATCH bpf-next 0/7] bpf: sockmap, sockhash: support storing UDP sockets Lorenz Bauer
  2020-02-25 13:56 ` [PATCH bpf-next 1/7] bpf: sockmap: only check ULP for TCP sockets Lorenz Bauer
@ 2020-02-25 13:56 ` Lorenz Bauer
  2020-02-25 17:22   ` Song Liu
  2020-02-25 22:15   ` kbuild test robot
  2020-02-25 13:56 ` [PATCH bpf-next 3/7] skmsg: introduce sk_psock_hooks Lorenz Bauer
                   ` (4 subsequent siblings)
  6 siblings, 2 replies; 21+ messages in thread
From: Lorenz Bauer @ 2020-02-25 13:56 UTC (permalink / raw)
  To: ast, daniel, john.fastabend; +Cc: netdev, bpf, kernel-team, Lorenz Bauer

The close, unhash and clone handlers from TCP sockmap are actually generic,
and can be reused by UDP sockmap. Move the helpers into the sockmap code base
and expose them.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
---
 include/linux/bpf.h   |  4 ++-
 include/linux/skmsg.h | 28 ----------------
 net/core/sock_map.c   | 77 +++++++++++++++++++++++++++++++++++++++++--
 net/ipv4/tcp_bpf.c    | 55 ++-----------------------------
 4 files changed, 79 insertions(+), 85 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 49b1a70e12c8..8422ab6a63d8 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1355,6 +1355,8 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map)
 #if defined(CONFIG_BPF_STREAM_PARSER)
 int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, u32 which);
 int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
+void sock_map_unhash(struct sock *sk);
+void sock_map_close(struct sock *sk, long timeout);
 #else
 static inline int sock_map_prog_update(struct bpf_map *map,
 				       struct bpf_prog *prog, u32 which)
@@ -1367,7 +1369,7 @@ static inline int sock_map_get_from_fd(const union bpf_attr *attr,
 {
 	return -EINVAL;
 }
-#endif
+#endif /* CONFIG_BPF_STREAM_PARSER */
 
 #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
 void bpf_sk_reuseport_detach(struct sock *sk);
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 54a9a3e36b29..c881094387db 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -323,14 +323,6 @@ static inline void sk_psock_free_link(struct sk_psock_link *link)
 }
 
 struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock);
-#if defined(CONFIG_BPF_STREAM_PARSER)
-void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link);
-#else
-static inline void sk_psock_unlink(struct sock *sk,
-				   struct sk_psock_link *link)
-{
-}
-#endif
 
 void __sk_psock_purge_ingress_msg(struct sk_psock *psock);
 
@@ -387,26 +379,6 @@ static inline bool sk_psock_test_state(const struct sk_psock *psock,
 	return test_bit(bit, &psock->state);
 }
 
-static inline struct sk_psock *sk_psock_get_checked(struct sock *sk)
-{
-	struct sk_psock *psock;
-
-	rcu_read_lock();
-	psock = sk_psock(sk);
-	if (psock) {
-		if (sk->sk_prot->recvmsg != tcp_bpf_recvmsg) {
-			psock = ERR_PTR(-EBUSY);
-			goto out;
-		}
-
-		if (!refcount_inc_not_zero(&psock->refcnt))
-			psock = ERR_PTR(-EBUSY);
-	}
-out:
-	rcu_read_unlock();
-	return psock;
-}
-
 static inline struct sk_psock *sk_psock_get(struct sock *sk)
 {
 	struct sk_psock *psock;
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 695ecacc7afa..459b3ba16023 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -146,6 +146,26 @@ static void sock_map_unref(struct sock *sk, void *link_raw)
 	}
 }
 
+static struct sk_psock *sock_map_psock_get_checked(struct sock *sk)
+{
+	struct sk_psock *psock;
+
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (psock) {
+		if (sk->sk_prot->recvmsg != tcp_bpf_recvmsg) {
+			psock = ERR_PTR(-EBUSY);
+			goto out;
+		}
+
+		if (!refcount_inc_not_zero(&psock->refcnt))
+			psock = ERR_PTR(-EBUSY);
+	}
+out:
+	rcu_read_unlock();
+	return psock;
+}
+
 static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
 			 struct sock *sk)
 {
@@ -177,7 +197,7 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
 		}
 	}
 
-	psock = sk_psock_get_checked(sk);
+	psock = sock_map_psock_get_checked(sk);
 	if (IS_ERR(psock)) {
 		ret = PTR_ERR(psock);
 		goto out_progs;
@@ -240,7 +260,7 @@ static int sock_map_link_no_progs(struct bpf_map *map, struct sock *sk)
 	struct sk_psock *psock;
 	int ret;
 
-	psock = sk_psock_get_checked(sk);
+	psock = sock_map_psock_get_checked(sk);
 	if (IS_ERR(psock))
 		return PTR_ERR(psock);
 
@@ -1132,7 +1152,7 @@ int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
 	return 0;
 }
 
-void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link)
+static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link)
 {
 	switch (link->map->map_type) {
 	case BPF_MAP_TYPE_SOCKMAP:
@@ -1145,3 +1165,54 @@ void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link)
 		break;
 	}
 }
+
+static void sock_map_remove_links(struct sock *sk, struct sk_psock *psock)
+{
+	struct sk_psock_link *link;
+
+	while ((link = sk_psock_link_pop(psock))) {
+		sock_map_unlink(sk, link);
+		sk_psock_free_link(link);
+	}
+}
+
+void sock_map_unhash(struct sock *sk)
+{
+	void (*saved_unhash)(struct sock *sk);
+	struct sk_psock *psock;
+
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (unlikely(!psock)) {
+		rcu_read_unlock();
+		if (sk->sk_prot->unhash)
+			sk->sk_prot->unhash(sk);
+		return;
+	}
+
+	saved_unhash = psock->saved_unhash;
+	sock_map_remove_links(sk, psock);
+	rcu_read_unlock();
+	saved_unhash(sk);
+}
+
+void sock_map_close(struct sock *sk, long timeout)
+{
+	void (*saved_close)(struct sock *sk, long timeout);
+	struct sk_psock *psock;
+
+	lock_sock(sk);
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (unlikely(!psock)) {
+		rcu_read_unlock();
+		release_sock(sk);
+		return sk->sk_prot->close(sk, timeout);
+	}
+
+	saved_close = psock->saved_close;
+	sock_map_remove_links(sk, psock);
+	rcu_read_unlock();
+	release_sock(sk);
+	saved_close(sk, timeout);
+}
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 7d6e1b75d4d4..90955c96a9a8 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -528,57 +528,6 @@ static int tcp_bpf_sendpage(struct sock *sk, struct page *page, int offset,
 	return copied ? copied : err;
 }
 
-static void tcp_bpf_remove(struct sock *sk, struct sk_psock *psock)
-{
-	struct sk_psock_link *link;
-
-	while ((link = sk_psock_link_pop(psock))) {
-		sk_psock_unlink(sk, link);
-		sk_psock_free_link(link);
-	}
-}
-
-static void tcp_bpf_unhash(struct sock *sk)
-{
-	void (*saved_unhash)(struct sock *sk);
-	struct sk_psock *psock;
-
-	rcu_read_lock();
-	psock = sk_psock(sk);
-	if (unlikely(!psock)) {
-		rcu_read_unlock();
-		if (sk->sk_prot->unhash)
-			sk->sk_prot->unhash(sk);
-		return;
-	}
-
-	saved_unhash = psock->saved_unhash;
-	tcp_bpf_remove(sk, psock);
-	rcu_read_unlock();
-	saved_unhash(sk);
-}
-
-static void tcp_bpf_close(struct sock *sk, long timeout)
-{
-	void (*saved_close)(struct sock *sk, long timeout);
-	struct sk_psock *psock;
-
-	lock_sock(sk);
-	rcu_read_lock();
-	psock = sk_psock(sk);
-	if (unlikely(!psock)) {
-		rcu_read_unlock();
-		release_sock(sk);
-		return sk->sk_prot->close(sk, timeout);
-	}
-
-	saved_close = psock->saved_close;
-	tcp_bpf_remove(sk, psock);
-	rcu_read_unlock();
-	release_sock(sk);
-	saved_close(sk, timeout);
-}
-
 enum {
 	TCP_BPF_IPV4,
 	TCP_BPF_IPV6,
@@ -599,8 +548,8 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
 				   struct proto *base)
 {
 	prot[TCP_BPF_BASE]			= *base;
-	prot[TCP_BPF_BASE].unhash		= tcp_bpf_unhash;
-	prot[TCP_BPF_BASE].close		= tcp_bpf_close;
+	prot[TCP_BPF_BASE].unhash		= sock_map_unhash;
+	prot[TCP_BPF_BASE].close		= sock_map_close;
 	prot[TCP_BPF_BASE].recvmsg		= tcp_bpf_recvmsg;
 	prot[TCP_BPF_BASE].stream_memory_read	= tcp_bpf_stream_read;
 
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH bpf-next 3/7] skmsg: introduce sk_psock_hooks
  2020-02-25 13:56 [PATCH bpf-next 0/7] bpf: sockmap, sockhash: support storing UDP sockets Lorenz Bauer
  2020-02-25 13:56 ` [PATCH bpf-next 1/7] bpf: sockmap: only check ULP for TCP sockets Lorenz Bauer
  2020-02-25 13:56 ` [PATCH bpf-next 2/7] bpf: sockmap: move generic sockmap hooks from BPF TCP Lorenz Bauer
@ 2020-02-25 13:56 ` Lorenz Bauer
  2020-02-26 14:57   ` Jakub Sitnicki
                     ` (3 more replies)
  2020-02-25 13:56 ` [PATCH bpf-next 4/7] bpf: sockmap: allow UDP sockets Lorenz Bauer
                   ` (3 subsequent siblings)
  6 siblings, 4 replies; 21+ messages in thread
From: Lorenz Bauer @ 2020-02-25 13:56 UTC (permalink / raw)
  To: ast, daniel, john.fastabend; +Cc: netdev, bpf, kernel-team, Lorenz Bauer

The sockmap works by overriding some of the callbacks in sk->sk_prot, while
leaving others untouched. This means that we need access to the struct proto
for any protocol we want to support. For IPv4 this is trivial, since both
TCP and UDP are always compiled in. IPv6 may be disabled or compiled as a
module, so the existing TCP sockmap hooks use some trickery to lazily
initialize the modified struct proto for TCPv6.

Pull this logic into a standalone struct sk_psock_hooks, so that it can be
re-used by UDP sockmap.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
---
 include/linux/skmsg.h |  36 ++++++++-----
 include/net/tcp.h     |   1 -
 net/core/skmsg.c      |  52 +++++++++++++++++++
 net/core/sock_map.c   |  24 ++++-----
 net/ipv4/tcp_bpf.c    | 114 ++++++++++++------------------------------
 5 files changed, 116 insertions(+), 111 deletions(-)

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index c881094387db..70d65ab10b5c 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -109,6 +109,16 @@ struct sk_psock {
 	};
 };
 
+struct sk_psock_hooks {
+	struct proto *base_ipv6;
+	struct proto *ipv4;
+	struct proto *ipv6;
+	spinlock_t ipv6_lock;
+	int (*rebuild_proto)(struct proto prot[], struct proto *base);
+	struct proto *(*choose_proto)(struct proto prot[],
+				      struct sk_psock *psock);
+};
+
 int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
 		 int elem_first_coalesce);
 int sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src,
@@ -335,23 +345,14 @@ static inline void sk_psock_cork_free(struct sk_psock *psock)
 	}
 }
 
-static inline void sk_psock_update_proto(struct sock *sk,
-					 struct sk_psock *psock,
-					 struct proto *ops)
-{
-	psock->saved_unhash = sk->sk_prot->unhash;
-	psock->saved_close = sk->sk_prot->close;
-	psock->saved_write_space = sk->sk_write_space;
-
-	psock->sk_proto = sk->sk_prot;
-	/* Pairs with lockless read in sk_clone_lock() */
-	WRITE_ONCE(sk->sk_prot, ops);
-}
-
 static inline void sk_psock_restore_proto(struct sock *sk,
 					  struct sk_psock *psock)
 {
+	if (!psock->sk_proto)
+		return;
+
 	sk->sk_prot->unhash = psock->saved_unhash;
+
 	if (inet_sk(sk)->is_icsk) {
 		tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space);
 	} else {
@@ -424,4 +425,13 @@ static inline void psock_progs_drop(struct sk_psock_progs *progs)
 	psock_set_prog(&progs->skb_verdict, NULL);
 }
 
+static inline int sk_psock_hooks_init(struct sk_psock_hooks *hooks,
+				       struct proto *ipv4_base)
+{
+	hooks->ipv6_lock = __SPIN_LOCK_UNLOCKED();
+	return hooks->rebuild_proto(hooks->ipv4, ipv4_base);
+}
+
+int sk_psock_hooks_install(struct sk_psock_hooks *hooks, struct sock *sk);
+
 #endif /* _LINUX_SKMSG_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 07f947cc80e6..ccf39d80b695 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2196,7 +2196,6 @@ struct sk_msg;
 struct sk_psock;
 
 int tcp_bpf_init(struct sock *sk);
-void tcp_bpf_reinit(struct sock *sk);
 int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
 			  int flags);
 int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index eeb28cb85664..a9bdf02c2539 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -844,3 +844,55 @@ void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
 	strp_stop(&parser->strp);
 	parser->enabled = false;
 }
+
+static inline int sk_psock_hooks_init_ipv6(struct sk_psock_hooks *hooks,
+					    struct proto *base)
+{
+	int ret = 0;
+
+	if (likely(base == smp_load_acquire(&hooks->base_ipv6)))
+		return 0;
+
+	spin_lock_bh(&hooks->ipv6_lock);
+	if (likely(base != hooks->base_ipv6)) {
+		ret = hooks->rebuild_proto(hooks->ipv6, base);
+		if (!ret)
+			smp_store_release(&hooks->base_ipv6, base);
+	}
+	spin_unlock_bh(&hooks->ipv6_lock);
+	return ret;
+}
+
+int sk_psock_hooks_install(struct sk_psock_hooks *hooks, struct sock *sk)
+{
+	struct sk_psock *psock = sk_psock(sk);
+	struct proto *prot_base;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	if (unlikely(!psock))
+		return -EINVAL;
+
+	/* Initialize saved callbacks and original proto only once.
+	 * Since we've not installed the hooks, psock is not yet in use and
+	 * we can initialize it without synchronization.
+	 */
+	if (!psock->sk_proto) {
+		struct proto *prot = READ_ONCE(sk->sk_prot);
+
+		if (sk->sk_family == AF_INET6 &&
+		    sk_psock_hooks_init_ipv6(hooks, prot))
+			return -EINVAL;
+
+		psock->saved_unhash = prot->unhash;
+		psock->saved_close = prot->close;
+		psock->saved_write_space = sk->sk_write_space;
+
+		psock->sk_proto = prot;
+	}
+
+	/* Pairs with lockless read in sk_clone_lock() */
+	prot_base = sk->sk_family == AF_INET ? hooks->ipv4 : hooks->ipv6;
+	WRITE_ONCE(sk->sk_prot, hooks->choose_proto(prot_base, psock));
+	return 0;
+}
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 459b3ba16023..c84cc9fc7f6b 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -170,8 +170,8 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
 			 struct sock *sk)
 {
 	struct bpf_prog *msg_parser, *skb_parser, *skb_verdict;
-	bool skb_progs, sk_psock_is_new = false;
 	struct sk_psock *psock;
+	bool skb_progs;
 	int ret;
 
 	skb_verdict = READ_ONCE(progs->skb_verdict);
@@ -206,9 +206,8 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
 	if (psock) {
 		if ((msg_parser && READ_ONCE(psock->progs.msg_parser)) ||
 		    (skb_progs  && READ_ONCE(psock->progs.skb_parser))) {
-			sk_psock_put(sk, psock);
 			ret = -EBUSY;
-			goto out_progs;
+			goto out_drop;
 		}
 	} else {
 		psock = sk_psock_init(sk, map->numa_node);
@@ -216,18 +215,14 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
 			ret = -ENOMEM;
 			goto out_progs;
 		}
-		sk_psock_is_new = true;
 	}
 
 	if (msg_parser)
 		psock_set_prog(&psock->progs.msg_parser, msg_parser);
-	if (sk_psock_is_new) {
-		ret = tcp_bpf_init(sk);
-		if (ret < 0)
-			goto out_drop;
-	} else {
-		tcp_bpf_reinit(sk);
-	}
+
+	ret = tcp_bpf_init(sk);
+	if (ret < 0)
+		goto out_drop;
 
 	write_lock_bh(&sk->sk_callback_lock);
 	if (skb_progs && !psock->parser.enabled) {
@@ -264,15 +259,14 @@ static int sock_map_link_no_progs(struct bpf_map *map, struct sock *sk)
 	if (IS_ERR(psock))
 		return PTR_ERR(psock);
 
-	if (psock) {
-		tcp_bpf_reinit(sk);
-		return 0;
-	}
+	if (psock)
+		goto init;
 
 	psock = sk_psock_init(sk, map->numa_node);
 	if (!psock)
 		return -ENOMEM;
 
+init:
 	ret = tcp_bpf_init(sk);
 	if (ret < 0)
 		sk_psock_put(sk, psock);
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 90955c96a9a8..81c0431a8dbd 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -528,25 +528,23 @@ static int tcp_bpf_sendpage(struct sock *sk, struct page *page, int offset,
 	return copied ? copied : err;
 }
 
-enum {
-	TCP_BPF_IPV4,
-	TCP_BPF_IPV6,
-	TCP_BPF_NUM_PROTS,
-};
-
 enum {
 	TCP_BPF_BASE,
 	TCP_BPF_TX,
 	TCP_BPF_NUM_CFGS,
 };
 
-static struct proto *tcpv6_prot_saved __read_mostly;
-static DEFINE_SPINLOCK(tcpv6_prot_lock);
-static struct proto tcp_bpf_prots[TCP_BPF_NUM_PROTS][TCP_BPF_NUM_CFGS];
-
-static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
-				   struct proto *base)
+static int tcp_bpf_rebuild_proto(struct proto prot[], struct proto *base)
 {
+	/* In order to avoid retpoline, we make assumptions when we call
+	 * into ops if e.g. a psock is not present. Make sure they are
+	 * indeed valid assumptions.
+	 */
+	if (base->recvmsg  != tcp_recvmsg ||
+	    base->sendmsg  != tcp_sendmsg ||
+	    base->sendpage != tcp_sendpage)
+		return -ENOTSUPP;
+
 	prot[TCP_BPF_BASE]			= *base;
 	prot[TCP_BPF_BASE].unhash		= sock_map_unhash;
 	prot[TCP_BPF_BASE].close		= sock_map_close;
@@ -556,91 +554,42 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
 	prot[TCP_BPF_TX]			= prot[TCP_BPF_BASE];
 	prot[TCP_BPF_TX].sendmsg		= tcp_bpf_sendmsg;
 	prot[TCP_BPF_TX].sendpage		= tcp_bpf_sendpage;
-}
-
-static void tcp_bpf_check_v6_needs_rebuild(struct sock *sk, struct proto *ops)
-{
-	if (sk->sk_family == AF_INET6 &&
-	    unlikely(ops != smp_load_acquire(&tcpv6_prot_saved))) {
-		spin_lock_bh(&tcpv6_prot_lock);
-		if (likely(ops != tcpv6_prot_saved)) {
-			tcp_bpf_rebuild_protos(tcp_bpf_prots[TCP_BPF_IPV6], ops);
-			smp_store_release(&tcpv6_prot_saved, ops);
-		}
-		spin_unlock_bh(&tcpv6_prot_lock);
-	}
-}
-
-static int __init tcp_bpf_v4_build_proto(void)
-{
-	tcp_bpf_rebuild_protos(tcp_bpf_prots[TCP_BPF_IPV4], &tcp_prot);
 	return 0;
 }
-core_initcall(tcp_bpf_v4_build_proto);
 
-static void tcp_bpf_update_sk_prot(struct sock *sk, struct sk_psock *psock)
+static struct proto *tcp_bpf_choose_proto(struct proto prot[],
+					  struct sk_psock *psock)
 {
-	int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
-	int config = psock->progs.msg_parser   ? TCP_BPF_TX   : TCP_BPF_BASE;
+	int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE;
 
-	sk_psock_update_proto(sk, psock, &tcp_bpf_prots[family][config]);
+	return &prot[config];
 }
 
-static void tcp_bpf_reinit_sk_prot(struct sock *sk, struct sk_psock *psock)
-{
-	int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
-	int config = psock->progs.msg_parser   ? TCP_BPF_TX   : TCP_BPF_BASE;
-
-	/* Reinit occurs when program types change e.g. TCP_BPF_TX is removed
-	 * or added requiring sk_prot hook updates. We keep original saved
-	 * hooks in this case.
-	 *
-	 * Pairs with lockless read in sk_clone_lock().
-	 */
-	WRITE_ONCE(sk->sk_prot, &tcp_bpf_prots[family][config]);
-}
-
-static int tcp_bpf_assert_proto_ops(struct proto *ops)
-{
-	/* In order to avoid retpoline, we make assumptions when we call
-	 * into ops if e.g. a psock is not present. Make sure they are
-	 * indeed valid assumptions.
-	 */
-	return ops->recvmsg  == tcp_recvmsg &&
-	       ops->sendmsg  == tcp_sendmsg &&
-	       ops->sendpage == tcp_sendpage ? 0 : -ENOTSUPP;
-}
+static struct proto tcp_bpf_ipv4[TCP_BPF_NUM_CFGS];
+static struct proto tcp_bpf_ipv6[TCP_BPF_NUM_CFGS];
+static struct sk_psock_hooks tcp_bpf_hooks __read_mostly = {
+	.ipv4 = &tcp_bpf_ipv4[0],
+	.ipv6 = &tcp_bpf_ipv6[0],
+	.rebuild_proto = tcp_bpf_rebuild_proto,
+	.choose_proto = tcp_bpf_choose_proto,
+};
 
-void tcp_bpf_reinit(struct sock *sk)
+static int __init tcp_bpf_init_psock_hooks(void)
 {
-	struct sk_psock *psock;
-
-	sock_owned_by_me(sk);
-
-	rcu_read_lock();
-	psock = sk_psock(sk);
-	tcp_bpf_reinit_sk_prot(sk, psock);
-	rcu_read_unlock();
+	return sk_psock_hooks_init(&tcp_bpf_hooks, &tcp_prot);
 }
+core_initcall(tcp_bpf_init_psock_hooks);
 
 int tcp_bpf_init(struct sock *sk)
 {
-	struct proto *ops = READ_ONCE(sk->sk_prot);
-	struct sk_psock *psock;
+	int ret;
 
 	sock_owned_by_me(sk);
 
 	rcu_read_lock();
-	psock = sk_psock(sk);
-	if (unlikely(!psock || psock->sk_proto ||
-		     tcp_bpf_assert_proto_ops(ops))) {
-		rcu_read_unlock();
-		return -EINVAL;
-	}
-	tcp_bpf_check_v6_needs_rebuild(sk, ops);
-	tcp_bpf_update_sk_prot(sk, psock);
+	ret = sk_psock_hooks_install(&tcp_bpf_hooks, sk);
 	rcu_read_unlock();
-	return 0;
+	return ret;
 }
 
 /* If a child got cloned from a listening socket that had tcp_bpf
@@ -650,9 +599,10 @@ int tcp_bpf_init(struct sock *sk)
  */
 void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
 {
-	int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
-	struct proto *prot = newsk->sk_prot;
+	struct proto *prot = READ_ONCE(sk->sk_prot);
 
-	if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE])
+	/* TCP_LISTEN can only use TCP_BPF_BASE, so this is safe */
+	if (unlikely(prot == &tcp_bpf_ipv4[TCP_BPF_BASE] ||
+	             prot == &tcp_bpf_ipv6[TCP_BPF_BASE]))
 		newsk->sk_prot = sk->sk_prot_creator;
 }
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH bpf-next 4/7] bpf: sockmap: allow UDP sockets
  2020-02-25 13:56 [PATCH bpf-next 0/7] bpf: sockmap, sockhash: support storing UDP sockets Lorenz Bauer
                   ` (2 preceding siblings ...)
  2020-02-25 13:56 ` [PATCH bpf-next 3/7] skmsg: introduce sk_psock_hooks Lorenz Bauer
@ 2020-02-25 13:56 ` Lorenz Bauer
  2020-02-25 23:36   ` kbuild test robot
  2020-02-26 18:47   ` Martin KaFai Lau
  2020-02-25 13:56 ` [PATCH bpf-next 5/7] selftests: bpf: don't listen() on " Lorenz Bauer
                   ` (2 subsequent siblings)
  6 siblings, 2 replies; 21+ messages in thread
From: Lorenz Bauer @ 2020-02-25 13:56 UTC (permalink / raw)
  To: ast, daniel, john.fastabend; +Cc: netdev, bpf, kernel-team, Lorenz Bauer

Add basic psock hooks for UDP sockets. This allows adding and
removing sockets, as well as automatic removal on unhash and close.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
---
 MAINTAINERS         |  1 +
 include/linux/udp.h |  4 ++++
 net/core/sock_map.c | 47 +++++++++++++++++++++++-----------------
 net/ipv4/Makefile   |  1 +
 net/ipv4/udp_bpf.c  | 53 +++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 86 insertions(+), 20 deletions(-)
 create mode 100644 net/ipv4/udp_bpf.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 2af5fa73155e..495ba52038ad 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9358,6 +9358,7 @@ F:	include/linux/skmsg.h
 F:	net/core/skmsg.c
 F:	net/core/sock_map.c
 F:	net/ipv4/tcp_bpf.c
+F:	net/ipv4/udp_bpf.c
 
 LANTIQ / INTEL Ethernet drivers
 M:	Hauke Mehrtens <hauke@hauke-m.de>
diff --git a/include/linux/udp.h b/include/linux/udp.h
index aa84597bdc33..d90d8fd5f73d 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -143,4 +143,8 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
 
 #define IS_UDPLITE(__sk) (__sk->sk_protocol == IPPROTO_UDPLITE)
 
+#if defined(CONFIG_NET_SOCK_MSG)
+int udp_bpf_init(struct sock *sk);
+#endif
+
 #endif	/* _LINUX_UDP_H */
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index c84cc9fc7f6b..f998192c425f 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -153,7 +153,7 @@ static struct sk_psock *sock_map_psock_get_checked(struct sock *sk)
 	rcu_read_lock();
 	psock = sk_psock(sk);
 	if (psock) {
-		if (sk->sk_prot->recvmsg != tcp_bpf_recvmsg) {
+		if (sk->sk_prot->close != sock_map_close) {
 			psock = ERR_PTR(-EBUSY);
 			goto out;
 		}
@@ -166,6 +166,14 @@ static struct sk_psock *sock_map_psock_get_checked(struct sock *sk)
 	return psock;
 }
 
+static int sock_map_init_hooks(struct sock *sk)
+{
+	if (sk->sk_type == SOCK_DGRAM)
+		return udp_bpf_init(sk);
+	else
+		return tcp_bpf_init(sk);
+}
+
 static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
 			 struct sock *sk)
 {
@@ -220,7 +228,7 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
 	if (msg_parser)
 		psock_set_prog(&psock->progs.msg_parser, msg_parser);
 
-	ret = tcp_bpf_init(sk);
+	ret = sock_map_init_hooks(sk);
 	if (ret < 0)
 		goto out_drop;
 
@@ -267,7 +275,7 @@ static int sock_map_link_no_progs(struct bpf_map *map, struct sock *sk)
 		return -ENOMEM;
 
 init:
-	ret = tcp_bpf_init(sk);
+	ret = sock_map_init_hooks(sk);
 	if (ret < 0)
 		sk_psock_put(sk, psock);
 	return ret;
@@ -394,9 +402,14 @@ static int sock_map_get_next_key(struct bpf_map *map, void *key, void *next)
 	return 0;
 }
 
+static bool sock_map_sk_is_tcp(const struct sock *sk)
+{
+	return sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP;
+}
+
 static bool sock_map_redirect_allowed(const struct sock *sk)
 {
-	return sk->sk_state != TCP_LISTEN;
+	return sock_map_sk_is_tcp(sk) && sk->sk_state != TCP_LISTEN;
 }
 
 static int sock_map_update_common(struct bpf_map *map, u32 idx,
@@ -466,15 +479,17 @@ static bool sock_map_op_okay(const struct bpf_sock_ops_kern *ops)
 	       ops->op == BPF_SOCK_OPS_TCP_LISTEN_CB;
 }
 
+static bool sock_map_sk_is_udp(const struct sock *sk)
+{
+	return sk->sk_type == SOCK_DGRAM && sk->sk_protocol == IPPROTO_UDP;
+}
+
 static bool sock_map_sk_is_suitable(const struct sock *sk)
 {
-	return sk->sk_type == SOCK_STREAM &&
-	       sk->sk_protocol == IPPROTO_TCP;
-}
+	const int tcp_flags = TCPF_ESTABLISHED | TCPF_LISTEN | TCPF_SYN_RECV;
 
-static bool sock_map_sk_state_allowed(const struct sock *sk)
-{
-	return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN);
+	return (sock_map_sk_is_udp(sk) && sk_hashed(sk)) ||
+	       (sock_map_sk_is_tcp(sk) && (1 << sk->sk_state) & tcp_flags);
 }
 
 static int sock_map_update_elem(struct bpf_map *map, void *key,
@@ -501,13 +516,9 @@ static int sock_map_update_elem(struct bpf_map *map, void *key,
 		ret = -EINVAL;
 		goto out;
 	}
-	if (!sock_map_sk_is_suitable(sk)) {
-		ret = -EOPNOTSUPP;
-		goto out;
-	}
 
 	sock_map_sk_acquire(sk);
-	if (!sock_map_sk_state_allowed(sk))
+	if (!sock_map_sk_is_suitable(sk))
 		ret = -EOPNOTSUPP;
 	else
 		ret = sock_map_update_common(map, idx, sk, flags);
@@ -849,13 +860,9 @@ static int sock_hash_update_elem(struct bpf_map *map, void *key,
 		ret = -EINVAL;
 		goto out;
 	}
-	if (!sock_map_sk_is_suitable(sk)) {
-		ret = -EOPNOTSUPP;
-		goto out;
-	}
 
 	sock_map_sk_acquire(sk);
-	if (!sock_map_sk_state_allowed(sk))
+	if (!sock_map_sk_is_suitable(sk))
 		ret = -EOPNOTSUPP;
 	else
 		ret = sock_hash_update_common(map, key, sk, flags);
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 9d97bace13c8..48cc05d365e4 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -61,6 +61,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
 obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
 obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
 obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o
+obj-$(CONFIG_NET_SOCK_MSG) += udp_bpf.o
 obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
 
 obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
new file mode 100644
index 000000000000..e085a0648a94
--- /dev/null
+++ b/net/ipv4/udp_bpf.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Cloudflare Ltd https://cloudflare.com */
+
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/init.h>
+#include <linux/skmsg.h>
+#include <linux/wait.h>
+#include <net/udp.h>
+
+#include <net/inet_common.h>
+
+static int udp_bpf_rebuild_protos(struct proto *prot, struct proto *base)
+{
+	*prot        = *base;
+	prot->unhash = sock_map_unhash;
+	prot->close  = sock_map_close;
+	return 0;
+}
+
+static struct proto *udp_bpf_choose_proto(struct proto prot[],
+					  struct sk_psock *psock)
+{
+	return prot;
+}
+
+static struct proto udpv4_proto;
+static struct proto udpv6_proto;
+
+static struct sk_psock_hooks udp_psock_proto __read_mostly = {
+	.ipv4 = &udpv4_proto,
+	.ipv6 = &udpv6_proto,
+	.rebuild_proto = udp_bpf_rebuild_protos,
+	.choose_proto = udp_bpf_choose_proto,
+};
+
+static int __init udp_bpf_init_psock_hooks(void)
+{
+	return sk_psock_hooks_init(&udp_psock_proto, &udp_prot);
+}
+core_initcall(udp_bpf_init_psock_hooks);
+
+int udp_bpf_init(struct sock *sk)
+{
+	int ret;
+
+	sock_owned_by_me(sk);
+
+	rcu_read_lock();
+	ret = sk_psock_hooks_install(&udp_psock_proto, sk);
+	rcu_read_unlock();
+	return ret;
+}
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH bpf-next 5/7] selftests: bpf: don't listen() on UDP sockets
  2020-02-25 13:56 [PATCH bpf-next 0/7] bpf: sockmap, sockhash: support storing UDP sockets Lorenz Bauer
                   ` (3 preceding siblings ...)
  2020-02-25 13:56 ` [PATCH bpf-next 4/7] bpf: sockmap: allow UDP sockets Lorenz Bauer
@ 2020-02-25 13:56 ` Lorenz Bauer
  2020-02-25 13:56 ` [PATCH bpf-next 6/7] selftests: bpf: add tests for UDP sockets in sockmap Lorenz Bauer
  2020-02-25 13:56 ` [PATCH bpf-next 7/7] selftests: bpf: enable UDP sockmap reuseport tests Lorenz Bauer
  6 siblings, 0 replies; 21+ messages in thread
From: Lorenz Bauer @ 2020-02-25 13:56 UTC (permalink / raw)
  To: ast, daniel, john.fastabend; +Cc: netdev, bpf, kernel-team, Lorenz Bauer

Most tests for TCP sockmap can be adapted to UDP sockmap if the
listen call is skipped. Rename listen_loopback, etc. to socket_loopback
and skip listen() for SOCK_DGRAM.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
---
 .../selftests/bpf/prog_tests/sockmap_listen.c | 47 ++++++++++---------
 1 file changed, 25 insertions(+), 22 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index b1b2acea0638..4ba41dd26d6b 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -230,7 +230,7 @@ static int enable_reuseport(int s, int progfd)
 	return 0;
 }
 
-static int listen_loopback_reuseport(int family, int sotype, int progfd)
+static int socket_loopback_reuseport(int family, int sotype, int progfd)
 {
 	struct sockaddr_storage addr;
 	socklen_t len;
@@ -249,6 +249,9 @@ static int listen_loopback_reuseport(int family, int sotype, int progfd)
 	if (err)
 		goto close;
 
+	if (sotype == SOCK_DGRAM)
+		return s;
+
 	err = xlisten(s, SOMAXCONN);
 	if (err)
 		goto close;
@@ -259,9 +262,9 @@ static int listen_loopback_reuseport(int family, int sotype, int progfd)
 	return -1;
 }
 
-static int listen_loopback(int family, int sotype)
+static int socket_loopback(int family, int sotype)
 {
-	return listen_loopback_reuseport(family, sotype, -1);
+	return socket_loopback_reuseport(family, sotype, -1);
 }
 
 static void test_insert_invalid(int family, int sotype, int mapfd)
@@ -333,7 +336,7 @@ static void test_insert_listening(int family, int sotype, int mapfd)
 	u32 key;
 	int s;
 
-	s = listen_loopback(family, sotype);
+	s = socket_loopback(family, sotype);
 	if (s < 0)
 		return;
 
@@ -349,7 +352,7 @@ static void test_delete_after_insert(int family, int sotype, int mapfd)
 	u32 key;
 	int s;
 
-	s = listen_loopback(family, sotype);
+	s = socket_loopback(family, sotype);
 	if (s < 0)
 		return;
 
@@ -366,7 +369,7 @@ static void test_delete_after_close(int family, int sotype, int mapfd)
 	u64 value;
 	u32 key;
 
-	s = listen_loopback(family, sotype);
+	s = socket_loopback(family, sotype);
 	if (s < 0)
 		return;
 
@@ -390,7 +393,7 @@ static void test_lookup_after_insert(int family, int sotype, int mapfd)
 	u32 key;
 	int s;
 
-	s = listen_loopback(family, sotype);
+	s = socket_loopback(family, sotype);
 	if (s < 0)
 		return;
 
@@ -417,7 +420,7 @@ static void test_lookup_after_delete(int family, int sotype, int mapfd)
 	u64 value;
 	u32 key;
 
-	s = listen_loopback(family, sotype);
+	s = socket_loopback(family, sotype);
 	if (s < 0)
 		return;
 
@@ -439,7 +442,7 @@ static void test_lookup_32_bit_value(int family, int sotype, int mapfd)
 	u32 key, value32;
 	int err, s;
 
-	s = listen_loopback(family, sotype);
+	s = socket_loopback(family, sotype);
 	if (s < 0)
 		return;
 
@@ -470,11 +473,11 @@ static void test_update_listening(int family, int sotype, int mapfd)
 	u64 value;
 	u32 key;
 
-	s1 = listen_loopback(family, sotype);
+	s1 = socket_loopback(family, sotype);
 	if (s1 < 0)
 		return;
 
-	s2 = listen_loopback(family, sotype);
+	s2 = socket_loopback(family, sotype);
 	if (s2 < 0)
 		goto close_s1;
 
@@ -500,7 +503,7 @@ static void test_destroy_orphan_child(int family, int sotype, int mapfd)
 	u64 value;
 	u32 key;
 
-	s = listen_loopback(family, sotype);
+	s = socket_loopback(family, sotype);
 	if (s < 0)
 		return;
 
@@ -534,7 +537,7 @@ static void test_clone_after_delete(int family, int sotype, int mapfd)
 	u64 value;
 	u32 key;
 
-	s = listen_loopback(family, sotype);
+	s = socket_loopback(family, sotype);
 	if (s < 0)
 		return;
 
@@ -570,7 +573,7 @@ static void test_accept_after_delete(int family, int sotype, int mapfd)
 	socklen_t len;
 	u64 value;
 
-	s = listen_loopback(family, sotype);
+	s = socket_loopback(family, sotype);
 	if (s == -1)
 		return;
 
@@ -624,7 +627,7 @@ static void test_accept_before_delete(int family, int sotype, int mapfd)
 	socklen_t len;
 	u64 value;
 
-	s = listen_loopback(family, sotype);
+	s = socket_loopback(family, sotype);
 	if (s == -1)
 		return;
 
@@ -735,7 +738,7 @@ static void test_syn_recv_insert_delete(int family, int sotype, int mapfd)
 	int err, s;
 	u64 value;
 
-	s = listen_loopback(family, sotype | SOCK_NONBLOCK);
+	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
 	if (s < 0)
 		return;
 
@@ -877,7 +880,7 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
 
 	zero_verdict_count(verd_mapfd);
 
-	s = listen_loopback(family, sotype | SOCK_NONBLOCK);
+	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
 	if (s < 0)
 		return;
 
@@ -1009,7 +1012,7 @@ static void redir_to_listening(int family, int sotype, int sock_mapfd,
 
 	zero_verdict_count(verd_mapfd);
 
-	s = listen_loopback(family, sotype | SOCK_NONBLOCK);
+	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
 	if (s < 0)
 		return;
 
@@ -1120,7 +1123,7 @@ static void test_reuseport_select_listening(int family, int sotype,
 
 	zero_verdict_count(verd_map);
 
-	s = listen_loopback_reuseport(family, sotype, reuseport_prog);
+	s = socket_loopback_reuseport(family, sotype, reuseport_prog);
 	if (s < 0)
 		return;
 
@@ -1174,7 +1177,7 @@ static void test_reuseport_select_connected(int family, int sotype,
 
 	zero_verdict_count(verd_map);
 
-	s = listen_loopback_reuseport(family, sotype, reuseport_prog);
+	s = socket_loopback_reuseport(family, sotype, reuseport_prog);
 	if (s < 0)
 		return;
 
@@ -1249,11 +1252,11 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
 	zero_verdict_count(verd_map);
 
 	/* Create two listeners, each in its own reuseport group */
-	s1 = listen_loopback_reuseport(family, sotype, reuseport_prog);
+	s1 = socket_loopback_reuseport(family, sotype, reuseport_prog);
 	if (s1 < 0)
 		return;
 
-	s2 = listen_loopback_reuseport(family, sotype, reuseport_prog);
+	s2 = socket_loopback_reuseport(family, sotype, reuseport_prog);
 	if (s2 < 0)
 		goto close_srv1;
 
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH bpf-next 6/7] selftests: bpf: add tests for UDP sockets in sockmap
  2020-02-25 13:56 [PATCH bpf-next 0/7] bpf: sockmap, sockhash: support storing UDP sockets Lorenz Bauer
                   ` (4 preceding siblings ...)
  2020-02-25 13:56 ` [PATCH bpf-next 5/7] selftests: bpf: don't listen() on " Lorenz Bauer
@ 2020-02-25 13:56 ` Lorenz Bauer
  2020-02-27 11:49   ` Jakub Sitnicki
  2020-02-25 13:56 ` [PATCH bpf-next 7/7] selftests: bpf: enable UDP sockmap reuseport tests Lorenz Bauer
  6 siblings, 1 reply; 21+ messages in thread
From: Lorenz Bauer @ 2020-02-25 13:56 UTC (permalink / raw)
  To: ast, daniel, john.fastabend; +Cc: netdev, bpf, kernel-team, Lorenz Bauer

Expand the TCP sockmap test suite to also check UDP sockets.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
---
 .../selftests/bpf/prog_tests/sockmap_listen.c | 92 +++++++++++++------
 1 file changed, 63 insertions(+), 29 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 4ba41dd26d6b..72e578a5a5d2 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -330,7 +330,7 @@ static void test_insert_bound(int family, int sotype, int mapfd)
 	xclose(s);
 }
 
-static void test_insert_listening(int family, int sotype, int mapfd)
+static void test_insert(int family, int sotype, int mapfd)
 {
 	u64 value;
 	u32 key;
@@ -467,7 +467,7 @@ static void test_lookup_32_bit_value(int family, int sotype, int mapfd)
 	xclose(s);
 }
 
-static void test_update_listening(int family, int sotype, int mapfd)
+static void test_update_existing(int family, int sotype, int mapfd)
 {
 	int s1, s2;
 	u64 value;
@@ -1302,11 +1302,15 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
 	xclose(s1);
 }
 
-#define TEST(fn)                                                               \
+#define TEST_SOTYPE(fn, sotype)                                                \
 	{                                                                      \
-		fn, #fn                                                        \
+		fn, #fn, sotype                                                \
 	}
 
+#define TEST(fn) TEST_SOTYPE(fn, 0)
+#define TEST_STREAM(fn) TEST_SOTYPE(fn, SOCK_STREAM)
+#define TEST_DGRAM(fn) TEST_SOTYPE(fn, SOCK_DGRAM)
+
 static void test_ops_cleanup(const struct bpf_map *map)
 {
 	const struct bpf_map_def *def;
@@ -1353,18 +1357,31 @@ static const char *map_type_str(const struct bpf_map *map)
 	}
 }
 
+static const char *sotype_str(int sotype)
+{
+	switch (sotype) {
+	case SOCK_DGRAM:
+		return "UDP";
+	case SOCK_STREAM:
+		return "TCP";
+	default:
+		return "unknown";
+	}
+}
+
 static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
 		     int family, int sotype)
 {
 	const struct op_test {
 		void (*fn)(int family, int sotype, int mapfd);
 		const char *name;
+		int sotype;
 	} tests[] = {
 		/* insert */
 		TEST(test_insert_invalid),
 		TEST(test_insert_opened),
-		TEST(test_insert_bound),
-		TEST(test_insert_listening),
+		TEST_STREAM(test_insert_bound),
+		TEST(test_insert),
 		/* delete */
 		TEST(test_delete_after_insert),
 		TEST(test_delete_after_close),
@@ -1373,28 +1390,33 @@ static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
 		TEST(test_lookup_after_delete),
 		TEST(test_lookup_32_bit_value),
 		/* update */
-		TEST(test_update_listening),
+		TEST(test_update_existing),
 		/* races with insert/delete */
-		TEST(test_destroy_orphan_child),
-		TEST(test_syn_recv_insert_delete),
-		TEST(test_race_insert_listen),
+		TEST_STREAM(test_destroy_orphan_child),
+		TEST_STREAM(test_syn_recv_insert_delete),
+		TEST_STREAM(test_race_insert_listen),
 		/* child clone */
-		TEST(test_clone_after_delete),
-		TEST(test_accept_after_delete),
-		TEST(test_accept_before_delete),
+		TEST_STREAM(test_clone_after_delete),
+		TEST_STREAM(test_accept_after_delete),
+		TEST_STREAM(test_accept_before_delete),
 	};
-	const char *family_name, *map_name;
+	const char *family_name, *map_name, *sotype_name;
 	const struct op_test *t;
 	char s[MAX_TEST_NAME];
 	int map_fd;
 
 	family_name = family_str(family);
 	map_name = map_type_str(map);
+	sotype_name = sotype_str(sotype);
 	map_fd = bpf_map__fd(map);
 
+
 	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
-		snprintf(s, sizeof(s), "%s %s %s", map_name, family_name,
-			 t->name);
+		snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
+			 sotype_name, t->name);
+
+		if (t->sotype != 0 && t->sotype != sotype)
+			continue;
 
 		if (!test__start_subtest(s))
 			continue;
@@ -1411,22 +1433,28 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
 		void (*fn)(struct test_sockmap_listen *skel,
 			   struct bpf_map *map, int family, int sotype);
 		const char *name;
+		int sotype;
 	} tests[] = {
-		TEST(test_skb_redir_to_connected),
-		TEST(test_skb_redir_to_listening),
-		TEST(test_msg_redir_to_connected),
-		TEST(test_msg_redir_to_listening),
+		TEST_STREAM(test_skb_redir_to_connected),
+		TEST_STREAM(test_skb_redir_to_listening),
+		TEST_STREAM(test_msg_redir_to_connected),
+		TEST_STREAM(test_msg_redir_to_listening),
 	};
-	const char *family_name, *map_name;
+	const char *family_name, *map_name, *sotype_name;
 	const struct redir_test *t;
 	char s[MAX_TEST_NAME];
 
 	family_name = family_str(family);
 	map_name = map_type_str(map);
+	sotype_name = sotype_str(sotype);
 
 	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
-		snprintf(s, sizeof(s), "%s %s %s", map_name, family_name,
-			 t->name);
+		snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
+			 sotype_name, t->name);
+
+		if (t->sotype != 0 && t->sotype != sotype)
+			continue;
+
 		if (!test__start_subtest(s))
 			continue;
 
@@ -1441,26 +1469,31 @@ static void test_reuseport(struct test_sockmap_listen *skel,
 		void (*fn)(int family, int sotype, int socket_map,
 			   int verdict_map, int reuseport_prog);
 		const char *name;
+		int sotype;
 	} tests[] = {
-		TEST(test_reuseport_select_listening),
-		TEST(test_reuseport_select_connected),
-		TEST(test_reuseport_mixed_groups),
+		TEST_STREAM(test_reuseport_select_listening),
+		TEST_STREAM(test_reuseport_select_connected),
+		TEST_STREAM(test_reuseport_mixed_groups),
 	};
 	int socket_map, verdict_map, reuseport_prog;
-	const char *family_name, *map_name;
+	const char *family_name, *map_name, *sotype_name;
 	const struct reuseport_test *t;
 	char s[MAX_TEST_NAME];
 
 	family_name = family_str(family);
 	map_name = map_type_str(map);
+	sotype_name = sotype_str(sotype);
 
 	socket_map = bpf_map__fd(map);
 	verdict_map = bpf_map__fd(skel->maps.verdict_map);
 	reuseport_prog = bpf_program__fd(skel->progs.prog_reuseport);
 
 	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
-		snprintf(s, sizeof(s), "%s %s %s", map_name, family_name,
-			 t->name);
+		snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
+			 sotype_name, t->name);
+
+		if (t->sotype != 0 && t->sotype != sotype)
+			continue;
 
 		if (!test__start_subtest(s))
 			continue;
@@ -1473,6 +1506,7 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
 		      int family)
 {
 	test_ops(skel, map, family, SOCK_STREAM);
+	test_ops(skel, map, family, SOCK_DGRAM);
 	test_redir(skel, map, family, SOCK_STREAM);
 	test_reuseport(skel, map, family, SOCK_STREAM);
 }
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH bpf-next 7/7] selftests: bpf: enable UDP sockmap reuseport tests
  2020-02-25 13:56 [PATCH bpf-next 0/7] bpf: sockmap, sockhash: support storing UDP sockets Lorenz Bauer
                   ` (5 preceding siblings ...)
  2020-02-25 13:56 ` [PATCH bpf-next 6/7] selftests: bpf: add tests for UDP sockets in sockmap Lorenz Bauer
@ 2020-02-25 13:56 ` Lorenz Bauer
  2020-02-26 13:12   ` Jakub Sitnicki
  6 siblings, 1 reply; 21+ messages in thread
From: Lorenz Bauer @ 2020-02-25 13:56 UTC (permalink / raw)
  To: ast, daniel, john.fastabend; +Cc: netdev, bpf, kernel-team, Lorenz Bauer

Remove the guard that disables UDP tests now that sockmap has support for them.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
---
 tools/testing/selftests/bpf/prog_tests/select_reuseport.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
index 68d452bb9fd9..4c09766344a4 100644
--- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
+++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
@@ -816,13 +816,6 @@ static void test_config(int sotype, sa_family_t family, bool inany)
 		if (!test__start_subtest(s))
 			continue;
 
-		if (sotype == SOCK_DGRAM &&
-		    inner_map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
-			/* SOCKMAP/SOCKHASH don't support UDP yet */
-			test__skip();
-			continue;
-		}
-
 		setup_per_test(sotype, family, inany, t->no_inner_map);
 		t->fn(sotype, family);
 		cleanup_per_test(t->no_inner_map);
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next 1/7] bpf: sockmap: only check ULP for TCP sockets
  2020-02-25 13:56 ` [PATCH bpf-next 1/7] bpf: sockmap: only check ULP for TCP sockets Lorenz Bauer
@ 2020-02-25 16:45   ` Song Liu
  0 siblings, 0 replies; 21+ messages in thread
From: Song Liu @ 2020-02-25 16:45 UTC (permalink / raw)
  To: Lorenz Bauer
  Cc: Alexei Starovoitov, Daniel Borkmann, John Fastabend, Networking,
	bpf, kernel-team

On Tue, Feb 25, 2020 at 5:57 AM Lorenz Bauer <lmb@cloudflare.com> wrote:
>
> The sock map code checks that a socket does not have an active upper
> layer protocol before inserting it into the map. This requires casting
> via inet_csk, which isn't valid for UDP sockets.
>
> Guard checks for ULP by checking inet_sk(sk)->is_icsk first.
>
> Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>

Acked-by: Song Liu <songliubraving@fb.com>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next 2/7] bpf: sockmap: move generic sockmap hooks from BPF TCP
  2020-02-25 13:56 ` [PATCH bpf-next 2/7] bpf: sockmap: move generic sockmap hooks from BPF TCP Lorenz Bauer
@ 2020-02-25 17:22   ` Song Liu
  2020-02-25 22:15   ` kbuild test robot
  1 sibling, 0 replies; 21+ messages in thread
From: Song Liu @ 2020-02-25 17:22 UTC (permalink / raw)
  To: Lorenz Bauer
  Cc: Alexei Starovoitov, Daniel Borkmann, John Fastabend, Networking,
	bpf, kernel-team

On Tue, Feb 25, 2020 at 5:57 AM Lorenz Bauer <lmb@cloudflare.com> wrote:
>
> The close, unhash and clone handlers from TCP sockmap are actually generic,
> and can be reused by UDP sockmap. Move the helpers into the sockmap code base
> and expose them.
>
> Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
> ---
>  include/linux/bpf.h   |  4 ++-
>  include/linux/skmsg.h | 28 ----------------
>  net/core/sock_map.c   | 77 +++++++++++++++++++++++++++++++++++++++++--
>  net/ipv4/tcp_bpf.c    | 55 ++-----------------------------
>  4 files changed, 79 insertions(+), 85 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 49b1a70e12c8..8422ab6a63d8 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1355,6 +1355,8 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map)
>  #if defined(CONFIG_BPF_STREAM_PARSER)
>  int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, u32 which);
>  int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
> +void sock_map_unhash(struct sock *sk);
> +void sock_map_close(struct sock *sk, long timeout);

I think we still need dummy version of these two functions in the #else
cause?

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next 2/7] bpf: sockmap: move generic sockmap hooks from BPF TCP
  2020-02-25 13:56 ` [PATCH bpf-next 2/7] bpf: sockmap: move generic sockmap hooks from BPF TCP Lorenz Bauer
  2020-02-25 17:22   ` Song Liu
@ 2020-02-25 22:15   ` kbuild test robot
  1 sibling, 0 replies; 21+ messages in thread
From: kbuild test robot @ 2020-02-25 22:15 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 2706 bytes --]

Hi Lorenz,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on net-next/master]
[also build test ERROR on next-20200225]
[cannot apply to bpf-next/master bpf/master net/master linus/master v5.6-rc3]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Lorenz-Bauer/bpf-sockmap-sockhash-support-storing-UDP-sockets/20200226-020708
base:   https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git 3b3e808cd883dd2e39c85e6d8debc0020b5ef5e7
config: m68k-sun3_defconfig (attached as .config)
compiler: m68k-linux-gcc (GCC) 7.5.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        GCC_VERSION=7.5.0 make.cross ARCH=m68k 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   net//ipv4/tcp_bpf.c: In function 'tcp_bpf_rebuild_protos':
>> net//ipv4/tcp_bpf.c:551:31: error: 'sock_map_unhash' undeclared (first use in this function); did you mean 'lock_map_release'?
     prot[TCP_BPF_BASE].unhash  = sock_map_unhash;
                                  ^~~~~~~~~~~~~~~
                                  lock_map_release
   net//ipv4/tcp_bpf.c:551:31: note: each undeclared identifier is reported only once for each function it appears in
>> net//ipv4/tcp_bpf.c:552:30: error: 'sock_map_close' undeclared (first use in this function); did you mean 'sock_map_unhash'?
     prot[TCP_BPF_BASE].close  = sock_map_close;
                                 ^~~~~~~~~~~~~~
                                 sock_map_unhash

vim +551 net//ipv4/tcp_bpf.c

   546	
   547	static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
   548					   struct proto *base)
   549	{
   550		prot[TCP_BPF_BASE]			= *base;
 > 551		prot[TCP_BPF_BASE].unhash		= sock_map_unhash;
 > 552		prot[TCP_BPF_BASE].close		= sock_map_close;
   553		prot[TCP_BPF_BASE].recvmsg		= tcp_bpf_recvmsg;
   554		prot[TCP_BPF_BASE].stream_memory_read	= tcp_bpf_stream_read;
   555	
   556		prot[TCP_BPF_TX]			= prot[TCP_BPF_BASE];
   557		prot[TCP_BPF_TX].sendmsg		= tcp_bpf_sendmsg;
   558		prot[TCP_BPF_TX].sendpage		= tcp_bpf_sendpage;
   559	}
   560	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 13182 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next 4/7] bpf: sockmap: allow UDP sockets
  2020-02-25 13:56 ` [PATCH bpf-next 4/7] bpf: sockmap: allow UDP sockets Lorenz Bauer
@ 2020-02-25 23:36   ` kbuild test robot
  2020-02-26 18:47   ` Martin KaFai Lau
  1 sibling, 0 replies; 21+ messages in thread
From: kbuild test robot @ 2020-02-25 23:36 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 2277 bytes --]

Hi Lorenz,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on net-next/master]
[also build test ERROR on next-20200225]
[cannot apply to bpf-next/master bpf/master net/master linus/master v5.6-rc3]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Lorenz-Bauer/bpf-sockmap-sockhash-support-storing-UDP-sockets/20200226-020708
base:   https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git 3b3e808cd883dd2e39c85e6d8debc0020b5ef5e7
config: m68k-sun3_defconfig (attached as .config)
compiler: m68k-linux-gcc (GCC) 7.5.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        GCC_VERSION=7.5.0 make.cross ARCH=m68k 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   net//ipv4/udp_bpf.c: In function 'udp_bpf_rebuild_protos':
>> net//ipv4/udp_bpf.c:16:17: error: 'sock_map_unhash' undeclared (first use in this function); did you mean 'lock_map_release'?
     prot->unhash = sock_map_unhash;
                    ^~~~~~~~~~~~~~~
                    lock_map_release
   net//ipv4/udp_bpf.c:16:17: note: each undeclared identifier is reported only once for each function it appears in
>> net//ipv4/udp_bpf.c:17:17: error: 'sock_map_close' undeclared (first use in this function); did you mean 'sock_map_unhash'?
     prot->close  = sock_map_close;
                    ^~~~~~~~~~~~~~
                    sock_map_unhash

vim +16 net//ipv4/udp_bpf.c

    12	
    13	static int udp_bpf_rebuild_protos(struct proto *prot, struct proto *base)
    14	{
    15		*prot        = *base;
  > 16		prot->unhash = sock_map_unhash;
  > 17		prot->close  = sock_map_close;
    18		return 0;
    19	}
    20	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 13182 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next 7/7] selftests: bpf: enable UDP sockmap reuseport tests
  2020-02-25 13:56 ` [PATCH bpf-next 7/7] selftests: bpf: enable UDP sockmap reuseport tests Lorenz Bauer
@ 2020-02-26 13:12   ` Jakub Sitnicki
  0 siblings, 0 replies; 21+ messages in thread
From: Jakub Sitnicki @ 2020-02-26 13:12 UTC (permalink / raw)
  To: Lorenz Bauer; +Cc: ast, daniel, john.fastabend, netdev, bpf, kernel-team

On Tue, Feb 25, 2020 at 02:56 PM CET, Lorenz Bauer wrote:
> Remove the guard that disables UDP tests now that sockmap has support for them.
>
> Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
> ---
>  tools/testing/selftests/bpf/prog_tests/select_reuseport.c | 7 -------
>  1 file changed, 7 deletions(-)
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
> index 68d452bb9fd9..4c09766344a4 100644
> --- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
> +++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
> @@ -816,13 +816,6 @@ static void test_config(int sotype, sa_family_t family, bool inany)
>  		if (!test__start_subtest(s))
>  			continue;
>
> -		if (sotype == SOCK_DGRAM &&
> -		    inner_map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
> -			/* SOCKMAP/SOCKHASH don't support UDP yet */
> -			test__skip();
> -			continue;
> -		}
> -
>  		setup_per_test(sotype, family, inany, t->no_inner_map);
>  		t->fn(sotype, family);
>  		cleanup_per_test(t->no_inner_map);

This one will need a respin due to 779e422d1198 ("selftests/bpf: Run
reuseport tests only with supported socket types") that landed recently
in bpf-next.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next 3/7] skmsg: introduce sk_psock_hooks
  2020-02-25 13:56 ` [PATCH bpf-next 3/7] skmsg: introduce sk_psock_hooks Lorenz Bauer
@ 2020-02-26 14:57   ` Jakub Sitnicki
  2020-02-26 18:37   ` Martin KaFai Lau
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 21+ messages in thread
From: Jakub Sitnicki @ 2020-02-26 14:57 UTC (permalink / raw)
  To: Lorenz Bauer; +Cc: ast, daniel, john.fastabend, netdev, bpf, kernel-team

On Tue, Feb 25, 2020 at 02:56 PM CET, Lorenz Bauer wrote:
> The sockmap works by overriding some of the callbacks in sk->sk_prot, while
> leaving others untouched. This means that we need access to the struct proto
> for any protocol we want to support. For IPv4 this is trivial, since both
> TCP and UDP are always compiled in. IPv6 may be disabled or compiled as a
> module, so the existing TCP sockmap hooks use some trickery to lazily
> initialize the modified struct proto for TCPv6.
>
> Pull this logic into a standalone struct sk_psock_hooks, so that it can be
> re-used by UDP sockmap.
>
> Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
> ---
>  include/linux/skmsg.h |  36 ++++++++-----
>  include/net/tcp.h     |   1 -
>  net/core/skmsg.c      |  52 +++++++++++++++++++
>  net/core/sock_map.c   |  24 ++++-----
>  net/ipv4/tcp_bpf.c    | 114 ++++++++++++------------------------------
>  5 files changed, 116 insertions(+), 111 deletions(-)
>
> diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
> index c881094387db..70d65ab10b5c 100644
> --- a/include/linux/skmsg.h
> +++ b/include/linux/skmsg.h

[...]

> @@ -424,4 +425,13 @@ static inline void psock_progs_drop(struct sk_psock_progs *progs)
>  	psock_set_prog(&progs->skb_verdict, NULL);
>  }
>  
> +static inline int sk_psock_hooks_init(struct sk_psock_hooks *hooks,
> +				       struct proto *ipv4_base)
> +{
> +	hooks->ipv6_lock = __SPIN_LOCK_UNLOCKED();

We will need spin_lock_init instead to play nice with CONFIG_DEBUG_SPINLOCK.

> +	return hooks->rebuild_proto(hooks->ipv4, ipv4_base);
> +}
> +
> +int sk_psock_hooks_install(struct sk_psock_hooks *hooks, struct sock *sk);
> +
>  #endif /* _LINUX_SKMSG_H */

[...]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next 3/7] skmsg: introduce sk_psock_hooks
  2020-02-25 13:56 ` [PATCH bpf-next 3/7] skmsg: introduce sk_psock_hooks Lorenz Bauer
  2020-02-26 14:57   ` Jakub Sitnicki
@ 2020-02-26 18:37   ` Martin KaFai Lau
  2020-02-28 10:48     ` Lorenz Bauer
  2020-02-27  9:27   ` Jakub Sitnicki
  2020-02-27  9:40   ` Jakub Sitnicki
  3 siblings, 1 reply; 21+ messages in thread
From: Martin KaFai Lau @ 2020-02-26 18:37 UTC (permalink / raw)
  To: Lorenz Bauer; +Cc: ast, daniel, john.fastabend, netdev, bpf, kernel-team

On Tue, Feb 25, 2020 at 01:56:32PM +0000, Lorenz Bauer wrote:
> The sockmap works by overriding some of the callbacks in sk->sk_prot, while
> leaving others untouched. This means that we need access to the struct proto
> for any protocol we want to support. For IPv4 this is trivial, since both
> TCP and UDP are always compiled in. IPv6 may be disabled or compiled as a
> module, so the existing TCP sockmap hooks use some trickery to lazily
> initialize the modified struct proto for TCPv6.
> 
> Pull this logic into a standalone struct sk_psock_hooks, so that it can be
> re-used by UDP sockmap.
> 
> Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
> ---
>  include/linux/skmsg.h |  36 ++++++++-----
>  include/net/tcp.h     |   1 -
>  net/core/skmsg.c      |  52 +++++++++++++++++++
>  net/core/sock_map.c   |  24 ++++-----
>  net/ipv4/tcp_bpf.c    | 114 ++++++++++++------------------------------
>  5 files changed, 116 insertions(+), 111 deletions(-)
> 
> diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
> index c881094387db..70d65ab10b5c 100644
> --- a/include/linux/skmsg.h
> +++ b/include/linux/skmsg.h
> @@ -109,6 +109,16 @@ struct sk_psock {
>  	};
>  };
>  
> +struct sk_psock_hooks {
> +	struct proto *base_ipv6;
> +	struct proto *ipv4;
> +	struct proto *ipv6;
> +	spinlock_t ipv6_lock;
> +	int (*rebuild_proto)(struct proto prot[], struct proto *base);
> +	struct proto *(*choose_proto)(struct proto prot[],
> +				      struct sk_psock *psock);
> +};
> +
>  int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
>  		 int elem_first_coalesce);
>  int sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src,
> @@ -335,23 +345,14 @@ static inline void sk_psock_cork_free(struct sk_psock *psock)
>  	}
>  }
>  
> -static inline void sk_psock_update_proto(struct sock *sk,
> -					 struct sk_psock *psock,
> -					 struct proto *ops)
> -{
> -	psock->saved_unhash = sk->sk_prot->unhash;
> -	psock->saved_close = sk->sk_prot->close;
> -	psock->saved_write_space = sk->sk_write_space;
> -
> -	psock->sk_proto = sk->sk_prot;
> -	/* Pairs with lockless read in sk_clone_lock() */
> -	WRITE_ONCE(sk->sk_prot, ops);
> -}
> -
>  static inline void sk_psock_restore_proto(struct sock *sk,
>  					  struct sk_psock *psock)
>  {
> +	if (!psock->sk_proto)
> +		return;
> +
>  	sk->sk_prot->unhash = psock->saved_unhash;
> +
>  	if (inet_sk(sk)->is_icsk) {
>  		tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space);
>  	} else {
> @@ -424,4 +425,13 @@ static inline void psock_progs_drop(struct sk_psock_progs *progs)
>  	psock_set_prog(&progs->skb_verdict, NULL);
>  }
>  
> +static inline int sk_psock_hooks_init(struct sk_psock_hooks *hooks,
> +				       struct proto *ipv4_base)
> +{
> +	hooks->ipv6_lock = __SPIN_LOCK_UNLOCKED();
> +	return hooks->rebuild_proto(hooks->ipv4, ipv4_base);
> +}
> +
> +int sk_psock_hooks_install(struct sk_psock_hooks *hooks, struct sock *sk);
> +
>  #endif /* _LINUX_SKMSG_H */
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 07f947cc80e6..ccf39d80b695 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -2196,7 +2196,6 @@ struct sk_msg;
>  struct sk_psock;
>  
>  int tcp_bpf_init(struct sock *sk);
> -void tcp_bpf_reinit(struct sock *sk);
>  int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
>  			  int flags);
>  int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
> diff --git a/net/core/skmsg.c b/net/core/skmsg.c
> index eeb28cb85664..a9bdf02c2539 100644
> --- a/net/core/skmsg.c
> +++ b/net/core/skmsg.c
> @@ -844,3 +844,55 @@ void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
>  	strp_stop(&parser->strp);
>  	parser->enabled = false;
>  }
> +
> +static inline int sk_psock_hooks_init_ipv6(struct sk_psock_hooks *hooks,
> +					    struct proto *base)
> +{
> +	int ret = 0;
> +
> +	if (likely(base == smp_load_acquire(&hooks->base_ipv6)))
> +		return 0;
> +
> +	spin_lock_bh(&hooks->ipv6_lock);
> +	if (likely(base != hooks->base_ipv6)) {
> +		ret = hooks->rebuild_proto(hooks->ipv6, base);
> +		if (!ret)
> +			smp_store_release(&hooks->base_ipv6, base);
> +	}
> +	spin_unlock_bh(&hooks->ipv6_lock);
> +	return ret;
> +}
> +
> +int sk_psock_hooks_install(struct sk_psock_hooks *hooks, struct sock *sk)
> +{
> +	struct sk_psock *psock = sk_psock(sk);
> +	struct proto *prot_base;
> +
> +	WARN_ON_ONCE(!rcu_read_lock_held());
Is this only for the earlier sk_psock(sk)?

> +
> +	if (unlikely(!psock))
When will this happen?

> +		return -EINVAL;
> +
> +	/* Initialize saved callbacks and original proto only once.
> +	 * Since we've not installed the hooks, psock is not yet in use and
> +	 * we can initialize it without synchronization.
> +	 */
> +	if (!psock->sk_proto) {
If I read it correctly, this is to replace the tcp_bpf_reinit_sk_prot()?

I think some of the current reinit comment is useful to keep also:

/* Reinit occurs when program types change e.g. TCP_BPF_TX is removed ... */

> +		struct proto *prot = READ_ONCE(sk->sk_prot);
> +
> +		if (sk->sk_family == AF_INET6 &&
> +		    sk_psock_hooks_init_ipv6(hooks, prot))
> +			return -EINVAL;
> +
> +		psock->saved_unhash = prot->unhash;
> +		psock->saved_close = prot->close;
> +		psock->saved_write_space = sk->sk_write_space;
> +
> +		psock->sk_proto = prot;
> +	}
> +
> +	/* Pairs with lockless read in sk_clone_lock() */
> +	prot_base = sk->sk_family == AF_INET ? hooks->ipv4 : hooks->ipv6;
> +	WRITE_ONCE(sk->sk_prot, hooks->choose_proto(prot_base, psock));
> +	return 0;
> +}

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next 4/7] bpf: sockmap: allow UDP sockets
  2020-02-25 13:56 ` [PATCH bpf-next 4/7] bpf: sockmap: allow UDP sockets Lorenz Bauer
  2020-02-25 23:36   ` kbuild test robot
@ 2020-02-26 18:47   ` Martin KaFai Lau
  1 sibling, 0 replies; 21+ messages in thread
From: Martin KaFai Lau @ 2020-02-26 18:47 UTC (permalink / raw)
  To: Lorenz Bauer; +Cc: ast, daniel, john.fastabend, netdev, bpf, kernel-team

On Tue, Feb 25, 2020 at 01:56:33PM +0000, Lorenz Bauer wrote:
> Add basic psock hooks for UDP sockets. This allows adding and
> removing sockets, as well as automatic removal on unhash and close.
> 
> Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
> ---
>  MAINTAINERS         |  1 +
>  include/linux/udp.h |  4 ++++
>  net/core/sock_map.c | 47 +++++++++++++++++++++++-----------------
>  net/ipv4/Makefile   |  1 +
>  net/ipv4/udp_bpf.c  | 53 +++++++++++++++++++++++++++++++++++++++++++++
>  5 files changed, 86 insertions(+), 20 deletions(-)
>  create mode 100644 net/ipv4/udp_bpf.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 2af5fa73155e..495ba52038ad 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -9358,6 +9358,7 @@ F:	include/linux/skmsg.h
>  F:	net/core/skmsg.c
>  F:	net/core/sock_map.c
>  F:	net/ipv4/tcp_bpf.c
> +F:	net/ipv4/udp_bpf.c
>  
>  LANTIQ / INTEL Ethernet drivers
>  M:	Hauke Mehrtens <hauke@hauke-m.de>
> diff --git a/include/linux/udp.h b/include/linux/udp.h
> index aa84597bdc33..d90d8fd5f73d 100644
> --- a/include/linux/udp.h
> +++ b/include/linux/udp.h
> @@ -143,4 +143,8 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
>  
>  #define IS_UDPLITE(__sk) (__sk->sk_protocol == IPPROTO_UDPLITE)
>  
> +#if defined(CONFIG_NET_SOCK_MSG)
> +int udp_bpf_init(struct sock *sk);
> +#endif
> +
>  #endif	/* _LINUX_UDP_H */
> diff --git a/net/core/sock_map.c b/net/core/sock_map.c
> index c84cc9fc7f6b..f998192c425f 100644
> --- a/net/core/sock_map.c
> +++ b/net/core/sock_map.c
> @@ -153,7 +153,7 @@ static struct sk_psock *sock_map_psock_get_checked(struct sock *sk)
>  	rcu_read_lock();
>  	psock = sk_psock(sk);
>  	if (psock) {
> -		if (sk->sk_prot->recvmsg != tcp_bpf_recvmsg) {
> +		if (sk->sk_prot->close != sock_map_close) {
>  			psock = ERR_PTR(-EBUSY);
>  			goto out;
>  		}
> @@ -166,6 +166,14 @@ static struct sk_psock *sock_map_psock_get_checked(struct sock *sk)
>  	return psock;
>  }
>  
> +static int sock_map_init_hooks(struct sock *sk)
> +{
> +	if (sk->sk_type == SOCK_DGRAM)
> +		return udp_bpf_init(sk);
> +	else
> +		return tcp_bpf_init(sk);
> +}
> +
>  static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
>  			 struct sock *sk)
>  {
> @@ -220,7 +228,7 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
>  	if (msg_parser)
>  		psock_set_prog(&psock->progs.msg_parser, msg_parser);
>  
> -	ret = tcp_bpf_init(sk);
> +	ret = sock_map_init_hooks(sk);
>  	if (ret < 0)
>  		goto out_drop;
>  
> @@ -267,7 +275,7 @@ static int sock_map_link_no_progs(struct bpf_map *map, struct sock *sk)
>  		return -ENOMEM;
>  
>  init:
> -	ret = tcp_bpf_init(sk);
> +	ret = sock_map_init_hooks(sk);
>  	if (ret < 0)
>  		sk_psock_put(sk, psock);
>  	return ret;
> @@ -394,9 +402,14 @@ static int sock_map_get_next_key(struct bpf_map *map, void *key, void *next)
>  	return 0;
>  }
>  
> +static bool sock_map_sk_is_tcp(const struct sock *sk)
> +{
> +	return sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP;
> +}
> +
>  static bool sock_map_redirect_allowed(const struct sock *sk)
>  {
> -	return sk->sk_state != TCP_LISTEN;
> +	return sock_map_sk_is_tcp(sk) && sk->sk_state != TCP_LISTEN;
>  }
>  
>  static int sock_map_update_common(struct bpf_map *map, u32 idx,
> @@ -466,15 +479,17 @@ static bool sock_map_op_okay(const struct bpf_sock_ops_kern *ops)
>  	       ops->op == BPF_SOCK_OPS_TCP_LISTEN_CB;
>  }
>  
> +static bool sock_map_sk_is_udp(const struct sock *sk)
> +{
> +	return sk->sk_type == SOCK_DGRAM && sk->sk_protocol == IPPROTO_UDP;
> +}
> +
>  static bool sock_map_sk_is_suitable(const struct sock *sk)
>  {
> -	return sk->sk_type == SOCK_STREAM &&
> -	       sk->sk_protocol == IPPROTO_TCP;
> -}
> +	const int tcp_flags = TCPF_ESTABLISHED | TCPF_LISTEN | TCPF_SYN_RECV;
hmm... I thought this patch is for adding UDP only.  However, if I read
it correctly, the tcp_flags is changed (| TCPF_SYN_RECV)?
Please elaborate in the commit message.

>  
> -static bool sock_map_sk_state_allowed(const struct sock *sk)
> -{
> -	return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN);
> +	return (sock_map_sk_is_udp(sk) && sk_hashed(sk)) ||
> +	       (sock_map_sk_is_tcp(sk) && (1 << sk->sk_state) & tcp_flags);
>  }
>  
>  static int sock_map_update_elem(struct bpf_map *map, void *key,

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next 3/7] skmsg: introduce sk_psock_hooks
  2020-02-25 13:56 ` [PATCH bpf-next 3/7] skmsg: introduce sk_psock_hooks Lorenz Bauer
  2020-02-26 14:57   ` Jakub Sitnicki
  2020-02-26 18:37   ` Martin KaFai Lau
@ 2020-02-27  9:27   ` Jakub Sitnicki
  2020-02-27  9:40   ` Jakub Sitnicki
  3 siblings, 0 replies; 21+ messages in thread
From: Jakub Sitnicki @ 2020-02-27  9:27 UTC (permalink / raw)
  To: Lorenz Bauer; +Cc: ast, daniel, john.fastabend, netdev, bpf, kernel-team

On Tue, Feb 25, 2020 at 02:56 PM CET, Lorenz Bauer wrote:
> The sockmap works by overriding some of the callbacks in sk->sk_prot, while
> leaving others untouched. This means that we need access to the struct proto
> for any protocol we want to support. For IPv4 this is trivial, since both
> TCP and UDP are always compiled in. IPv6 may be disabled or compiled as a
> module, so the existing TCP sockmap hooks use some trickery to lazily
> initialize the modified struct proto for TCPv6.
>
> Pull this logic into a standalone struct sk_psock_hooks, so that it can be
> re-used by UDP sockmap.
>
> Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
> ---
>  include/linux/skmsg.h |  36 ++++++++-----
>  include/net/tcp.h     |   1 -
>  net/core/skmsg.c      |  52 +++++++++++++++++++
>  net/core/sock_map.c   |  24 ++++-----
>  net/ipv4/tcp_bpf.c    | 114 ++++++++++++------------------------------
>  5 files changed, 116 insertions(+), 111 deletions(-)
>

[...]

> diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
> index 90955c96a9a8..81c0431a8dbd 100644
> --- a/net/ipv4/tcp_bpf.c
> +++ b/net/ipv4/tcp_bpf.c

[...]

> @@ -650,9 +599,10 @@ int tcp_bpf_init(struct sock *sk)
>   */
>  void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
>  {
> -	int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
> -	struct proto *prot = newsk->sk_prot;
> +	struct proto *prot = READ_ONCE(sk->sk_prot);

For the sake of keeping the review open - we've identified a regression
here. sk->sk_prot can change by the time we get here, since the moment
we copied the listener sock. We need to stick to checking newsk->sk_prot
here.

>
> -	if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE])
> +	/* TCP_LISTEN can only use TCP_BPF_BASE, so this is safe */
> +	if (unlikely(prot == &tcp_bpf_ipv4[TCP_BPF_BASE] ||
> +	             prot == &tcp_bpf_ipv6[TCP_BPF_BASE]))
>  		newsk->sk_prot = sk->sk_prot_creator;
>  }

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next 3/7] skmsg: introduce sk_psock_hooks
  2020-02-25 13:56 ` [PATCH bpf-next 3/7] skmsg: introduce sk_psock_hooks Lorenz Bauer
                     ` (2 preceding siblings ...)
  2020-02-27  9:27   ` Jakub Sitnicki
@ 2020-02-27  9:40   ` Jakub Sitnicki
  3 siblings, 0 replies; 21+ messages in thread
From: Jakub Sitnicki @ 2020-02-27  9:40 UTC (permalink / raw)
  To: Lorenz Bauer; +Cc: ast, daniel, john.fastabend, netdev, bpf, kernel-team

On Tue, Feb 25, 2020 at 02:56 PM CET, Lorenz Bauer wrote:
> The sockmap works by overriding some of the callbacks in sk->sk_prot, while
> leaving others untouched. This means that we need access to the struct proto
> for any protocol we want to support. For IPv4 this is trivial, since both
> TCP and UDP are always compiled in. IPv6 may be disabled or compiled as a
> module, so the existing TCP sockmap hooks use some trickery to lazily
> initialize the modified struct proto for TCPv6.
>
> Pull this logic into a standalone struct sk_psock_hooks, so that it can be
> re-used by UDP sockmap.
>
> Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
> ---

I've been looking at how to simplify this a bit. One thing that seems
like an easy win is to fold sk_psock_hooks_init into its callers. Then
we can go back to using spinlock initializer macros. Patch below.

This highlights some inconsistency in naming instances of
sk_psock_hooks, that is tcp_bpf_hooks vs udp_psock_proto.

---
 include/linux/skmsg.h | 7 -------
 net/ipv4/tcp_bpf.c    | 3 ++-
 net/ipv4/udp_bpf.c    | 3 ++-
 3 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 174c76c725fb..4566724dc0c9 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -425,13 +425,6 @@ static inline void psock_progs_drop(struct sk_psock_progs *progs)
 	psock_set_prog(&progs->skb_verdict, NULL);
 }

-static inline int sk_psock_hooks_init(struct sk_psock_hooks *hooks,
-				       struct proto *ipv4_base)
-{
-	spin_lock_init(&hooks->ipv6_lock);
-	return hooks->rebuild_proto(hooks->ipv4, ipv4_base);
-}
-
 int sk_psock_hooks_install(struct sk_psock_hooks *hooks, struct sock *sk);

 #endif /* _LINUX_SKMSG_H */
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index fa7e474b981b..5cb9a0724cf6 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -570,13 +570,14 @@ static struct proto tcp_bpf_ipv6[TCP_BPF_NUM_CFGS];
 static struct sk_psock_hooks tcp_bpf_hooks __read_mostly = {
 	.ipv4 = &tcp_bpf_ipv4[0],
 	.ipv6 = &tcp_bpf_ipv6[0],
+	.ipv6_lock = __SPIN_LOCK_UNLOCKED(tcp_bpf_hooks.ipv6_lock),
 	.rebuild_proto = tcp_bpf_rebuild_proto,
 	.choose_proto = tcp_bpf_choose_proto,
 };

 static int __init tcp_bpf_init_psock_hooks(void)
 {
-	return sk_psock_hooks_init(&tcp_bpf_hooks, &tcp_prot);
+	return tcp_bpf_rebuild_proto(tcp_bpf_ipv4, &tcp_prot);
 }
 core_initcall(tcp_bpf_init_psock_hooks);

diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index e085a0648a94..da5eb1d2265d 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -30,13 +30,14 @@ static struct proto udpv6_proto;
 static struct sk_psock_hooks udp_psock_proto __read_mostly = {
 	.ipv4 = &udpv4_proto,
 	.ipv6 = &udpv6_proto,
+	.ipv6_lock = __SPIN_LOCK_UNLOCKED(udp_psock_proto.ipv6_lock),
 	.rebuild_proto = udp_bpf_rebuild_protos,
 	.choose_proto = udp_bpf_choose_proto,
 };

 static int __init udp_bpf_init_psock_hooks(void)
 {
-	return sk_psock_hooks_init(&udp_psock_proto, &udp_prot);
+	return udp_bpf_rebuild_protos(&udpv4_proto, &udp_prot);
 }
 core_initcall(udp_bpf_init_psock_hooks);

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next 6/7] selftests: bpf: add tests for UDP sockets in sockmap
  2020-02-25 13:56 ` [PATCH bpf-next 6/7] selftests: bpf: add tests for UDP sockets in sockmap Lorenz Bauer
@ 2020-02-27 11:49   ` Jakub Sitnicki
  2020-02-27 12:02     ` Lorenz Bauer
  0 siblings, 1 reply; 21+ messages in thread
From: Jakub Sitnicki @ 2020-02-27 11:49 UTC (permalink / raw)
  To: Lorenz Bauer; +Cc: ast, daniel, john.fastabend, netdev, bpf, kernel-team

On Tue, Feb 25, 2020 at 02:56 PM CET, Lorenz Bauer wrote:
> Expand the TCP sockmap test suite to also check UDP sockets.
>
> Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
> ---
>  .../selftests/bpf/prog_tests/sockmap_listen.c | 92 +++++++++++++------
>  1 file changed, 63 insertions(+), 29 deletions(-)
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
> index 4ba41dd26d6b..72e578a5a5d2 100644
> --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
> +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
> @@ -330,7 +330,7 @@ static void test_insert_bound(int family, int sotype, int mapfd)
>  	xclose(s);
>  }
>
> -static void test_insert_listening(int family, int sotype, int mapfd)
> +static void test_insert(int family, int sotype, int mapfd)
>  {
>  	u64 value;
>  	u32 key;
> @@ -467,7 +467,7 @@ static void test_lookup_32_bit_value(int family, int sotype, int mapfd)
>  	xclose(s);
>  }
>
> -static void test_update_listening(int family, int sotype, int mapfd)
> +static void test_update_existing(int family, int sotype, int mapfd)
>  {
>  	int s1, s2;
>  	u64 value;
> @@ -1302,11 +1302,15 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
>  	xclose(s1);
>  }
>
> -#define TEST(fn)                                                               \
> +#define TEST_SOTYPE(fn, sotype)                                                \
>  	{                                                                      \
> -		fn, #fn                                                        \
> +		fn, #fn, sotype                                                \
>  	}
>
> +#define TEST(fn) TEST_SOTYPE(fn, 0)
> +#define TEST_STREAM(fn) TEST_SOTYPE(fn, SOCK_STREAM)
> +#define TEST_DGRAM(fn) TEST_SOTYPE(fn, SOCK_DGRAM)
> +

An alternative would be to use __VA_ARGS__ and designated intializers,
as I did recently in e0360423d020 ("selftests/bpf: Run SYN cookies with
reuseport BPF test only for TCP"). TEST_DGRAM is unused at the moment,
so that's something to consider.

>  static void test_ops_cleanup(const struct bpf_map *map)
>  {
>  	const struct bpf_map_def *def;
> @@ -1353,18 +1357,31 @@ static const char *map_type_str(const struct bpf_map *map)
>  	}
>  }
>
> +static const char *sotype_str(int sotype)
> +{
> +	switch (sotype) {
> +	case SOCK_DGRAM:
> +		return "UDP";
> +	case SOCK_STREAM:
> +		return "TCP";
> +	default:
> +		return "unknown";
> +	}
> +}
> +
>  static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
>  		     int family, int sotype)
>  {
>  	const struct op_test {
>  		void (*fn)(int family, int sotype, int mapfd);
>  		const char *name;
> +		int sotype;
>  	} tests[] = {
>  		/* insert */
>  		TEST(test_insert_invalid),
>  		TEST(test_insert_opened),
> -		TEST(test_insert_bound),
> -		TEST(test_insert_listening),
> +		TEST_STREAM(test_insert_bound),
> +		TEST(test_insert),
>  		/* delete */
>  		TEST(test_delete_after_insert),
>  		TEST(test_delete_after_close),
> @@ -1373,28 +1390,33 @@ static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
>  		TEST(test_lookup_after_delete),
>  		TEST(test_lookup_32_bit_value),
>  		/* update */
> -		TEST(test_update_listening),
> +		TEST(test_update_existing),
>  		/* races with insert/delete */
> -		TEST(test_destroy_orphan_child),
> -		TEST(test_syn_recv_insert_delete),
> -		TEST(test_race_insert_listen),
> +		TEST_STREAM(test_destroy_orphan_child),
> +		TEST_STREAM(test_syn_recv_insert_delete),
> +		TEST_STREAM(test_race_insert_listen),
>  		/* child clone */
> -		TEST(test_clone_after_delete),
> -		TEST(test_accept_after_delete),
> -		TEST(test_accept_before_delete),
> +		TEST_STREAM(test_clone_after_delete),
> +		TEST_STREAM(test_accept_after_delete),
> +		TEST_STREAM(test_accept_before_delete),
>  	};
> -	const char *family_name, *map_name;
> +	const char *family_name, *map_name, *sotype_name;
>  	const struct op_test *t;
>  	char s[MAX_TEST_NAME];
>  	int map_fd;
>
>  	family_name = family_str(family);
>  	map_name = map_type_str(map);
> +	sotype_name = sotype_str(sotype);
>  	map_fd = bpf_map__fd(map);
>
> +
>  	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
> -		snprintf(s, sizeof(s), "%s %s %s", map_name, family_name,
> -			 t->name);
> +		snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
> +			 sotype_name, t->name);
> +
> +		if (t->sotype != 0 && t->sotype != sotype)
> +			continue;
>
>  		if (!test__start_subtest(s))
>  			continue;
> @@ -1411,22 +1433,28 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
>  		void (*fn)(struct test_sockmap_listen *skel,
>  			   struct bpf_map *map, int family, int sotype);
>  		const char *name;
> +		int sotype;
>  	} tests[] = {
> -		TEST(test_skb_redir_to_connected),
> -		TEST(test_skb_redir_to_listening),
> -		TEST(test_msg_redir_to_connected),
> -		TEST(test_msg_redir_to_listening),
> +		TEST_STREAM(test_skb_redir_to_connected),
> +		TEST_STREAM(test_skb_redir_to_listening),
> +		TEST_STREAM(test_msg_redir_to_connected),
> +		TEST_STREAM(test_msg_redir_to_listening),
>  	};
> -	const char *family_name, *map_name;
> +	const char *family_name, *map_name, *sotype_name;
>  	const struct redir_test *t;
>  	char s[MAX_TEST_NAME];
>
>  	family_name = family_str(family);
>  	map_name = map_type_str(map);
> +	sotype_name = sotype_str(sotype);
>
>  	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
> -		snprintf(s, sizeof(s), "%s %s %s", map_name, family_name,
> -			 t->name);
> +		snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
> +			 sotype_name, t->name);
> +
> +		if (t->sotype != 0 && t->sotype != sotype)
> +			continue;
> +
>  		if (!test__start_subtest(s))
>  			continue;
>

test_redir() doesn't get called with SOCK_DGRAM, because redirection
with sockmap and UDP is not supported yet, so perhaps no need to touch
this function. 

> @@ -1441,26 +1469,31 @@ static void test_reuseport(struct test_sockmap_listen *skel,
>  		void (*fn)(int family, int sotype, int socket_map,
>  			   int verdict_map, int reuseport_prog);
>  		const char *name;
> +		int sotype;
>  	} tests[] = {
> -		TEST(test_reuseport_select_listening),
> -		TEST(test_reuseport_select_connected),
> -		TEST(test_reuseport_mixed_groups),
> +		TEST_STREAM(test_reuseport_select_listening),
> +		TEST_STREAM(test_reuseport_select_connected),
> +		TEST_STREAM(test_reuseport_mixed_groups),
>  	};
>  	int socket_map, verdict_map, reuseport_prog;
> -	const char *family_name, *map_name;
> +	const char *family_name, *map_name, *sotype_name;
>  	const struct reuseport_test *t;
>  	char s[MAX_TEST_NAME];
>
>  	family_name = family_str(family);
>  	map_name = map_type_str(map);
> +	sotype_name = sotype_str(sotype);
>
>  	socket_map = bpf_map__fd(map);
>  	verdict_map = bpf_map__fd(skel->maps.verdict_map);
>  	reuseport_prog = bpf_program__fd(skel->progs.prog_reuseport);
>
>  	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
> -		snprintf(s, sizeof(s), "%s %s %s", map_name, family_name,
> -			 t->name);
> +		snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
> +			 sotype_name, t->name);
> +
> +		if (t->sotype != 0 && t->sotype != sotype)
> +			continue;
>
>  		if (!test__start_subtest(s))
>  			continue;
> @@ -1473,6 +1506,7 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
>  		      int family)
>  {
>  	test_ops(skel, map, family, SOCK_STREAM);
> +	test_ops(skel, map, family, SOCK_DGRAM);
>  	test_redir(skel, map, family, SOCK_STREAM);
>  	test_reuseport(skel, map, family, SOCK_STREAM);
>  }

Looks like we can enable reuseport tests with not too much effort.
I managed to get them working with the changes below.

---
 .../selftests/bpf/prog_tests/sockmap_listen.c | 102 +++++++++++++++---
 1 file changed, 87 insertions(+), 15 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 72e578a5a5d2..6850994fc4d5 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -108,6 +108,22 @@
 		__ret;                                                         \
 	})
 
+#define xsend(fd, buf, len, flags)                                             \
+	({                                                                     \
+		ssize_t __ret = send((fd), (buf), (len), (flags));             \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("send");                                    \
+		__ret;                                                         \
+	})
+
+#define xrecv(fd, buf, len, flags)                                             \
+	({                                                                     \
+		ssize_t __ret = recv((fd), (buf), (len), (flags));             \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("recv");                                    \
+		__ret;                                                         \
+	})
+
 #define xsocket(family, sotype, flags)                                         \
 	({                                                                     \
 		int __ret = socket(family, sotype, flags);                     \
@@ -1116,7 +1132,7 @@ static void test_reuseport_select_listening(int family, int sotype,
 {
 	struct sockaddr_storage addr;
 	unsigned int pass;
-	int s, c, p, err;
+	int s, c, err;
 	socklen_t len;
 	u64 value;
 	u32 key;
@@ -1145,19 +1161,33 @@ static void test_reuseport_select_listening(int family, int sotype,
 	if (err)
 		goto close_cli;
 
-	p = xaccept(s, NULL, NULL);
-	if (p < 0)
-		goto close_cli;
+	if (sotype == SOCK_STREAM) {
+		int p;
+
+		p = xaccept(s, NULL, NULL);
+		if (p < 0)
+			goto close_cli;
+		xclose(p);
+	} else {
+		char b = 'a';
+		ssize_t n;
+
+		n = xsend(c, &b, sizeof(b), 0);
+		if (n == -1)
+			goto close_cli;
+
+		n = xrecv(s, &b, sizeof(b), 0);
+		if (n == -1)
+			goto close_cli;
+	}
 
 	key = SK_PASS;
 	err = xbpf_map_lookup_elem(verd_map, &key, &pass);
 	if (err)
-		goto close_peer;
+		goto close_cli;
 	if (pass != 1)
 		FAIL("want pass count 1, have %d", pass);
 
-close_peer:
-	xclose(p);
 close_cli:
 	xclose(c);
 close_srv:
@@ -1201,9 +1231,24 @@ static void test_reuseport_select_connected(int family, int sotype,
 	if (err)
 		goto close_cli0;
 
-	p0 = xaccept(s, NULL, NULL);
-	if (err)
-		goto close_cli0;
+	if (sotype == SOCK_STREAM) {
+		p0 = xaccept(s, NULL, NULL);
+		if (p0 < 0)
+			goto close_cli0;
+	} else {
+		p0 = xsocket(family, sotype, 0);
+		if (p0 < 0)
+			goto close_cli0;
+
+		len = sizeof(addr);
+		err = xgetsockname(c0, sockaddr(&addr), &len);
+		if (err)
+			goto close_cli0;
+
+		err = xconnect(p0, sockaddr(&addr), len);
+		if (err)
+			goto close_cli0;
+	}
 
 	/* Update sock_map[0] to redirect to a connected socket */
 	key = 0;
@@ -1216,8 +1261,24 @@ static void test_reuseport_select_connected(int family, int sotype,
 	if (c1 < 0)
 		goto close_peer0;
 
+	len = sizeof(addr);
+	err = xgetsockname(s, sockaddr(&addr), &len);
+	if (err)
+		goto close_srv;
+
 	errno = 0;
 	err = connect(c1, sockaddr(&addr), len);
+	if (sotype == SOCK_DGRAM) {
+		char b = 'a';
+		ssize_t n;
+
+		n = xsend(c1, &b, sizeof(b), 0);
+		if (n == -1)
+			goto close_cli1;
+
+		n = recv(c1, &b, sizeof(b), 0);
+		err = n == -1;
+	}
 	if (!err || errno != ECONNREFUSED)
 		FAIL_ERRNO("connect: expected ECONNREFUSED");
 
@@ -1281,7 +1342,18 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
 		goto close_srv2;
 
 	err = connect(c, sockaddr(&addr), len);
-	if (err && errno != ECONNREFUSED) {
+	if (sotype == SOCK_DGRAM) {
+		char b = 'a';
+		ssize_t n;
+
+		n = xsend(c, &b, sizeof(b), 0);
+		if (n == -1)
+			goto close_cli;
+
+		n = recv(c, &b, sizeof(b), 0);
+		err = n == -1;
+	}
+	if (!err || errno != ECONNREFUSED) {
 		FAIL_ERRNO("connect: expected ECONNREFUSED");
 		goto close_cli;
 	}
@@ -1410,7 +1482,6 @@ static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
 	sotype_name = sotype_str(sotype);
 	map_fd = bpf_map__fd(map);
 
-
 	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
 		snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
 			 sotype_name, t->name);
@@ -1471,9 +1542,9 @@ static void test_reuseport(struct test_sockmap_listen *skel,
 		const char *name;
 		int sotype;
 	} tests[] = {
-		TEST_STREAM(test_reuseport_select_listening),
-		TEST_STREAM(test_reuseport_select_connected),
-		TEST_STREAM(test_reuseport_mixed_groups),
+		TEST(test_reuseport_select_listening),
+		TEST(test_reuseport_select_connected),
+		TEST(test_reuseport_mixed_groups),
 	};
 	int socket_map, verdict_map, reuseport_prog;
 	const char *family_name, *map_name, *sotype_name;
@@ -1509,6 +1580,7 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
 	test_ops(skel, map, family, SOCK_DGRAM);
 	test_redir(skel, map, family, SOCK_STREAM);
 	test_reuseport(skel, map, family, SOCK_STREAM);
+	test_reuseport(skel, map, family, SOCK_DGRAM);
 }
 
 void test_sockmap_listen(void)
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next 6/7] selftests: bpf: add tests for UDP sockets in sockmap
  2020-02-27 11:49   ` Jakub Sitnicki
@ 2020-02-27 12:02     ` Lorenz Bauer
  0 siblings, 0 replies; 21+ messages in thread
From: Lorenz Bauer @ 2020-02-27 12:02 UTC (permalink / raw)
  To: Jakub Sitnicki
  Cc: Alexei Starovoitov, Daniel Borkmann, John Fastabend, Networking,
	bpf, kernel-team

On Thu, 27 Feb 2020 at 11:49, Jakub Sitnicki <jakub@cloudflare.com> wrote:
> > +#define TEST(fn) TEST_SOTYPE(fn, 0)
> > +#define TEST_STREAM(fn) TEST_SOTYPE(fn, SOCK_STREAM)
> > +#define TEST_DGRAM(fn) TEST_SOTYPE(fn, SOCK_DGRAM)
> > +
>
> An alternative would be to use __VA_ARGS__ and designated intializers,
> as I did recently in e0360423d020 ("selftests/bpf: Run SYN cookies with
> reuseport BPF test only for TCP"). TEST_DGRAM is unused at the moment,
> so that's something to consider.

Good idea, I'll pick it up!

> test_redir() doesn't get called with SOCK_DGRAM, because redirection
> with sockmap and UDP is not supported yet, so perhaps no need to touch
> this function.

I did this to avoid needing another macro, it should be possible to skip this
with your VA_ARGS idea.

> Looks like we can enable reuseport tests with not too much effort.
> I managed to get them working with the changes below.

Thanks, I'll add this to the next revision!

-- 
Lorenz Bauer  |  Systems Engineer
6th Floor, County Hall/The Riverside Building, SE1 7PB, UK

www.cloudflare.com

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH bpf-next 3/7] skmsg: introduce sk_psock_hooks
  2020-02-26 18:37   ` Martin KaFai Lau
@ 2020-02-28 10:48     ` Lorenz Bauer
  0 siblings, 0 replies; 21+ messages in thread
From: Lorenz Bauer @ 2020-02-28 10:48 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: Alexei Starovoitov, Daniel Borkmann, John Fastabend, Networking,
	bpf, kernel-team

On Wed, 26 Feb 2020 at 18:37, Martin KaFai Lau <kafai@fb.com> wrote:
>
> > +int sk_psock_hooks_install(struct sk_psock_hooks *hooks, struct sock *sk)
> > +{
> > +     struct sk_psock *psock = sk_psock(sk);
> > +     struct proto *prot_base;
> > +
> > +     WARN_ON_ONCE(!rcu_read_lock_held());
> Is this only for the earlier sk_psock(sk)?

The function is an amalgamation of tcp_bpf_reinit and tcp_bpf_init,
which both take the
read lock. I figured it would make sense to assert this behaviour in
sk_psock_hooks_install.

>
> > +
> > +     if (unlikely(!psock))
> When will this happen?

I don't know to be honest, this is adapted from tcp_bpf_init:

       psock = sk_psock(sk);
       if (unlikely(!psock || psock->sk_proto ||
                    tcp_bpf_assert_proto_ops(ops))) {
               rcu_read_unlock();
               return -EINVAL;
       }

>
> > +             return -EINVAL;
> > +
> > +     /* Initialize saved callbacks and original proto only once.
> > +      * Since we've not installed the hooks, psock is not yet in use and
> > +      * we can initialize it without synchronization.
> > +      */
> > +     if (!psock->sk_proto) {
> If I read it correctly, this is to replace the tcp_bpf_reinit_sk_prot()?
>
> I think some of the current reinit comment is useful to keep also:
>
> /* Reinit occurs when program types change e.g. TCP_BPF_TX is removed ... */

Ack, I will elaborate.

-- 
Lorenz Bauer  |  Systems Engineer
6th Floor, County Hall/The Riverside Building, SE1 7PB, UK

www.cloudflare.com

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2020-02-28 10:48 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-25 13:56 [PATCH bpf-next 0/7] bpf: sockmap, sockhash: support storing UDP sockets Lorenz Bauer
2020-02-25 13:56 ` [PATCH bpf-next 1/7] bpf: sockmap: only check ULP for TCP sockets Lorenz Bauer
2020-02-25 16:45   ` Song Liu
2020-02-25 13:56 ` [PATCH bpf-next 2/7] bpf: sockmap: move generic sockmap hooks from BPF TCP Lorenz Bauer
2020-02-25 17:22   ` Song Liu
2020-02-25 22:15   ` kbuild test robot
2020-02-25 13:56 ` [PATCH bpf-next 3/7] skmsg: introduce sk_psock_hooks Lorenz Bauer
2020-02-26 14:57   ` Jakub Sitnicki
2020-02-26 18:37   ` Martin KaFai Lau
2020-02-28 10:48     ` Lorenz Bauer
2020-02-27  9:27   ` Jakub Sitnicki
2020-02-27  9:40   ` Jakub Sitnicki
2020-02-25 13:56 ` [PATCH bpf-next 4/7] bpf: sockmap: allow UDP sockets Lorenz Bauer
2020-02-25 23:36   ` kbuild test robot
2020-02-26 18:47   ` Martin KaFai Lau
2020-02-25 13:56 ` [PATCH bpf-next 5/7] selftests: bpf: don't listen() on " Lorenz Bauer
2020-02-25 13:56 ` [PATCH bpf-next 6/7] selftests: bpf: add tests for UDP sockets in sockmap Lorenz Bauer
2020-02-27 11:49   ` Jakub Sitnicki
2020-02-27 12:02     ` Lorenz Bauer
2020-02-25 13:56 ` [PATCH bpf-next 7/7] selftests: bpf: enable UDP sockmap reuseport tests Lorenz Bauer
2020-02-26 13:12   ` Jakub Sitnicki

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.