bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jakub Sitnicki <jakub@cloudflare.com>
To: bpf@vger.kernel.org
Cc: netdev@vger.kernel.org, kernel-team@cloudflare.com,
	Martin KaFai Lau <kafai@fb.com>
Subject: [PATCH bpf-next v5 05/12] bpf, sockmap: Allow inserting listening TCP sockets into sockmap
Date: Mon, 27 Jan 2020 13:55:27 +0100	[thread overview]
Message-ID: <20200127125534.137492-6-jakub@cloudflare.com> (raw)
In-Reply-To: <20200127125534.137492-1-jakub@cloudflare.com>

In order for sockmap type to become a generic collection for storing TCP
sockets we need to loosen the checks during map update, while tightening
the checks in redirect helpers.

Currently sockmap requires the TCP socket to be in established state (or
transitioning out of SYN_RECV into established state when done from BPF),
which prevents inserting listening sockets.

Change the update pre-checks so the socket can also be in listening state.

Since it doesn't make sense to redirect with sockmap to listening sockets,
add appropriate socket state checks to BPF redirect helpers too.

We leave sockhash as is for the moment, with no support for holding
listening sockets. Therefore sockhash needs its own set of checks.

Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
---
 net/core/sock_map.c                     | 62 +++++++++++++++++++------
 tools/testing/selftests/bpf/test_maps.c |  6 +--
 2 files changed, 50 insertions(+), 18 deletions(-)

diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 8998e356f423..954c4d23bc01 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -388,15 +388,44 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
 }
 
 static bool sock_map_op_okay(const struct bpf_sock_ops_kern *ops)
+{
+	return ops->op == BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB ||
+	       ops->op == BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB ||
+	       ops->op == BPF_SOCK_OPS_TCP_LISTEN_CB;
+}
+
+static bool sock_hash_op_okay(const struct bpf_sock_ops_kern *ops)
 {
 	return ops->op == BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB ||
 	       ops->op == BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB;
 }
 
+/* Only TCP sockets can be inserted into the map. They must be either
+ * in established or listening state. SYN_RECV is also allowed because
+ * BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB happens just before socket
+ * enters established state.
+ */
 static bool sock_map_sk_is_suitable(const struct sock *sk)
 {
 	return sk->sk_type == SOCK_STREAM &&
-	       sk->sk_protocol == IPPROTO_TCP;
+	       sk->sk_protocol == IPPROTO_TCP &&
+	       (1 << sk->sk_state) & (TCPF_ESTABLISHED |
+				      TCPF_SYN_RECV |
+				      TCPF_LISTEN);
+}
+
+static bool sock_hash_sk_is_suitable(const struct sock *sk)
+{
+	return sk->sk_type == SOCK_STREAM &&
+	       sk->sk_protocol == IPPROTO_TCP &&
+	       (1 << sk->sk_state) & (TCPF_ESTABLISHED |
+				      TCPF_SYN_RECV);
+}
+
+/* Is sock in a state that allows redirecting into it? */
+static bool sock_map_redirect_okay(const struct sock *sk)
+{
+	return sk->sk_state != TCP_LISTEN;
 }
 
 static int sock_map_update_elem(struct bpf_map *map, void *key,
@@ -416,8 +445,7 @@ static int sock_map_update_elem(struct bpf_map *map, void *key,
 		ret = -EINVAL;
 		goto out;
 	}
-	if (!sock_map_sk_is_suitable(sk) ||
-	    sk->sk_state != TCP_ESTABLISHED) {
+	if (!sock_map_sk_is_suitable(sk)) {
 		ret = -EOPNOTSUPP;
 		goto out;
 	}
@@ -457,13 +485,17 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
 	   struct bpf_map *, map, u32, key, u64, flags)
 {
 	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
+	struct sock *sk;
 
 	if (unlikely(flags & ~(BPF_F_INGRESS)))
 		return SK_DROP;
-	tcb->bpf.flags = flags;
-	tcb->bpf.sk_redir = __sock_map_lookup_elem(map, key);
-	if (!tcb->bpf.sk_redir)
+
+	sk = __sock_map_lookup_elem(map, key);
+	if (unlikely(!sk || !sock_map_redirect_okay(sk)))
 		return SK_DROP;
+
+	tcb->bpf.flags = flags;
+	tcb->bpf.sk_redir = sk;
 	return SK_PASS;
 }
 
@@ -480,12 +512,17 @@ const struct bpf_func_proto bpf_sk_redirect_map_proto = {
 BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg *, msg,
 	   struct bpf_map *, map, u32, key, u64, flags)
 {
+	struct sock *sk;
+
 	if (unlikely(flags & ~(BPF_F_INGRESS)))
 		return SK_DROP;
-	msg->flags = flags;
-	msg->sk_redir = __sock_map_lookup_elem(map, key);
-	if (!msg->sk_redir)
+
+	sk = __sock_map_lookup_elem(map, key);
+	if (unlikely(!sk || !sock_map_redirect_okay(sk)))
 		return SK_DROP;
+
+	msg->flags = flags;
+	msg->sk_redir = sk;
 	return SK_PASS;
 }
 
@@ -739,8 +776,7 @@ static int sock_hash_update_elem(struct bpf_map *map, void *key,
 		ret = -EINVAL;
 		goto out;
 	}
-	if (!sock_map_sk_is_suitable(sk) ||
-	    sk->sk_state != TCP_ESTABLISHED) {
+	if (!sock_hash_sk_is_suitable(sk)) {
 		ret = -EOPNOTSUPP;
 		goto out;
 	}
@@ -887,8 +923,8 @@ BPF_CALL_4(bpf_sock_hash_update, struct bpf_sock_ops_kern *, sops,
 {
 	WARN_ON_ONCE(!rcu_read_lock_held());
 
-	if (likely(sock_map_sk_is_suitable(sops->sk) &&
-		   sock_map_op_okay(sops)))
+	if (likely(sock_hash_sk_is_suitable(sops->sk) &&
+		   sock_hash_op_okay(sops)))
 		return sock_hash_update_common(map, key, sops->sk, flags);
 	return -EOPNOTSUPP;
 }
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 02eae1e864c2..c6766b2cff85 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -756,11 +756,7 @@ static void test_sockmap(unsigned int tasks, void *data)
 	/* Test update without programs */
 	for (i = 0; i < 6; i++) {
 		err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
-		if (i < 2 && !err) {
-			printf("Allowed update sockmap '%i:%i' not in ESTABLISHED\n",
-			       i, sfd[i]);
-			goto out_sockmap;
-		} else if (i >= 2 && err) {
+		if (err) {
 			printf("Failed noprog update sockmap '%i:%i'\n",
 			       i, sfd[i]);
 			goto out_sockmap;
-- 
2.24.1


  parent reply	other threads:[~2020-01-27 12:55 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-01-27 12:55 [PATCH bpf-next v5 00/12] Extend SOCKMAP to store listening sockets Jakub Sitnicki
2020-01-27 12:55 ` [PATCH bpf-next v5 01/12] bpf, sk_msg: Don't clear saved sock proto on restore Jakub Sitnicki
2020-01-27 12:55 ` [PATCH bpf-next v5 02/12] net, sk_msg: Annotate lockless access to sk_prot on clone Jakub Sitnicki
2020-01-27 12:55 ` [PATCH bpf-next v5 03/12] net, sk_msg: Clear sk_user_data pointer on clone if tagged Jakub Sitnicki
2020-01-27 12:55 ` [PATCH bpf-next v5 04/12] tcp_bpf: Don't let child socket inherit parent protocol ops on copy Jakub Sitnicki
2020-01-27 12:55 ` Jakub Sitnicki [this message]
2020-01-27 12:55 ` [PATCH bpf-next v5 06/12] bpf, sockmap: Don't set up sockmap progs for listening sockets Jakub Sitnicki
2020-01-27 12:55 ` [PATCH bpf-next v5 07/12] bpf, sockmap: Return socket cookie on lookup from syscall Jakub Sitnicki
2020-01-27 12:55 ` [PATCH bpf-next v5 08/12] bpf, sockmap: Let all kernel-land lookup values in SOCKMAP Jakub Sitnicki
2020-01-27 12:55 ` [PATCH bpf-next v5 09/12] bpf: Allow selecting reuseport socket from a SOCKMAP Jakub Sitnicki
2020-01-27 12:55 ` [PATCH bpf-next v5 10/12] net: Generate reuseport group ID on group creation Jakub Sitnicki
2020-01-27 12:55 ` [PATCH bpf-next v5 11/12] selftests/bpf: Extend SK_REUSEPORT tests to cover SOCKMAP Jakub Sitnicki
2020-01-27 12:55 ` [PATCH bpf-next v5 12/12] selftests/bpf: Tests for SOCKMAP holding listening sockets Jakub Sitnicki
2020-03-11 18:48   ` Andrii Nakryiko
2020-03-11 19:49     ` Jakub Sitnicki
2020-03-11 21:02       ` Andrii Nakryiko
2020-03-12 17:25         ` Jakub Sitnicki
2020-01-27 13:01 ` [PATCH bpf-next v5 00/12] Extend SOCKMAP to store " Jakub Sitnicki
2020-01-27 13:05 ` Jakub Sitnicki

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200127125534.137492-6-jakub@cloudflare.com \
    --to=jakub@cloudflare.com \
    --cc=bpf@vger.kernel.org \
    --cc=kafai@fb.com \
    --cc=kernel-team@cloudflare.com \
    --cc=netdev@vger.kernel.org \
    --subject='Re: [PATCH bpf-next v5 05/12] bpf, sockmap: Allow inserting listening TCP sockets into sockmap' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).