bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [bpf-next PATCH 0/3] fix ktls with sk_skb_verdict programs
@ 2020-05-29 23:06 John Fastabend
  2020-05-29 23:06 ` [bpf-next PATCH 1/3] bpf: refactor sockmap redirect code so its easy to reuse John Fastabend
                   ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: John Fastabend @ 2020-05-29 23:06 UTC (permalink / raw)
  To: alexei.starovoitov, daniel; +Cc: netdev, bpf, john.fastabend

If a socket is running a BPF_SK_SKB_SREAM_VERDICT program and KTLS is
enabled the data stream may be broken if both TLS stream parser and
BPF stream parser try to handle data. Fix this here by making KTLS
stream parser run first to ensure TLS messages are received correctly
and then calling the verdict program. This analogous to how we handle
a similar conflict on the TX side.

Note, this is a fix but it doesn't make sense to push this late to
bpf tree so targeting bpf-next and keeping fixes tags.

---

John Fastabend (3):
      bpf: refactor sockmap redirect code so its easy to reuse
      bpf: fix running sk_skb program types with ktls
      bpf, selftests: add test for ktls with skb bpf ingress policy


 include/linux/skmsg.h                              |    8 +
 include/net/tls.h                                  |    9 +
 net/core/skmsg.c                                   |   98 +++++++++---
 net/tls/tls_sw.c                                   |   20 ++
 .../selftests/bpf/progs/test_sockmap_kern.h        |   46 ++++++
 tools/testing/selftests/bpf/test_sockmap.c         |  163 +++++++++++++++++---
 6 files changed, 296 insertions(+), 48 deletions(-)

--
Signature

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [bpf-next PATCH 1/3] bpf: refactor sockmap redirect code so its easy to reuse
  2020-05-29 23:06 [bpf-next PATCH 0/3] fix ktls with sk_skb_verdict programs John Fastabend
@ 2020-05-29 23:06 ` John Fastabend
  2020-06-01 14:16   ` Jakub Sitnicki
                     ` (2 more replies)
  2020-05-29 23:06 ` [bpf-next PATCH 2/3] bpf: fix running sk_skb program types with ktls John Fastabend
  2020-05-29 23:07 ` [bpf-next PATCH 3/3] bpf, selftests: add test for ktls with skb bpf ingress policy John Fastabend
  2 siblings, 3 replies; 13+ messages in thread
From: John Fastabend @ 2020-05-29 23:06 UTC (permalink / raw)
  To: alexei.starovoitov, daniel; +Cc: netdev, bpf, john.fastabend

We will need this block of code called from tls context shortly
lets refactor the redirect logic so its easy to use. This also
cleans up the switch stmt so we have fewer fallthrough cases.

No logic changes are intended.

Fixes: d829e9c4112b5 ("tls: convert to generic sk_msg interface")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 net/core/skmsg.c |   55 +++++++++++++++++++++++++++++++++---------------------
 1 file changed, 34 insertions(+), 21 deletions(-)

diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index c479372..9d72f71 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -682,13 +682,43 @@ static struct sk_psock *sk_psock_from_strp(struct strparser *strp)
 	return container_of(parser, struct sk_psock, parser);
 }
 
-static void sk_psock_verdict_apply(struct sk_psock *psock,
-				   struct sk_buff *skb, int verdict)
+static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb)
 {
 	struct sk_psock *psock_other;
 	struct sock *sk_other;
 	bool ingress;
 
+	sk_other = tcp_skb_bpf_redirect_fetch(skb);
+	if (unlikely(!sk_other)) {
+		kfree_skb(skb);
+		return;
+	}
+	psock_other = sk_psock(sk_other);
+	if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
+	    !sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
+		kfree_skb(skb);
+		return;
+	}
+
+	ingress = tcp_skb_bpf_ingress(skb);
+	if ((!ingress && sock_writeable(sk_other)) ||
+	    (ingress &&
+	     atomic_read(&sk_other->sk_rmem_alloc) <=
+	     sk_other->sk_rcvbuf)) {
+		if (!ingress)
+			skb_set_owner_w(skb, sk_other);
+		skb_queue_tail(&psock_other->ingress_skb, skb);
+		schedule_work(&psock_other->work);
+	} else {
+		kfree_skb(skb);
+	}
+}
+
+static void sk_psock_verdict_apply(struct sk_psock *psock,
+				   struct sk_buff *skb, int verdict)
+{
+	struct sock *sk_other;
+
 	switch (verdict) {
 	case __SK_PASS:
 		sk_other = psock->sk;
@@ -707,25 +737,8 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
 		}
 		goto out_free;
 	case __SK_REDIRECT:
-		sk_other = tcp_skb_bpf_redirect_fetch(skb);
-		if (unlikely(!sk_other))
-			goto out_free;
-		psock_other = sk_psock(sk_other);
-		if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
-		    !sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED))
-			goto out_free;
-		ingress = tcp_skb_bpf_ingress(skb);
-		if ((!ingress && sock_writeable(sk_other)) ||
-		    (ingress &&
-		     atomic_read(&sk_other->sk_rmem_alloc) <=
-		     sk_other->sk_rcvbuf)) {
-			if (!ingress)
-				skb_set_owner_w(skb, sk_other);
-			skb_queue_tail(&psock_other->ingress_skb, skb);
-			schedule_work(&psock_other->work);
-			break;
-		}
-		/* fall-through */
+		sk_psock_skb_redirect(psock, skb);
+		break;
 	case __SK_DROP:
 		/* fall-through */
 	default:


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [bpf-next PATCH 2/3] bpf: fix running sk_skb program types with ktls
  2020-05-29 23:06 [bpf-next PATCH 0/3] fix ktls with sk_skb_verdict programs John Fastabend
  2020-05-29 23:06 ` [bpf-next PATCH 1/3] bpf: refactor sockmap redirect code so its easy to reuse John Fastabend
@ 2020-05-29 23:06 ` John Fastabend
  2020-06-01 14:57   ` Jakub Sitnicki
  2020-05-29 23:07 ` [bpf-next PATCH 3/3] bpf, selftests: add test for ktls with skb bpf ingress policy John Fastabend
  2 siblings, 1 reply; 13+ messages in thread
From: John Fastabend @ 2020-05-29 23:06 UTC (permalink / raw)
  To: alexei.starovoitov, daniel; +Cc: netdev, bpf, john.fastabend

KTLS uses a stream parser to collect TLS messages and send them to
the upper layer tls receive handler. This ensures the tls receiver
has a full TLS header to parse when it is run. However, when a
socket has BPF_SK_SKB_STREAM_VERDICT program attached before KTLS
is enabled we end up with two stream parsers running on the same
socket.

The result is both try to run on the same socket. First the KTLS
stream parser runs and calls read_sock() which will tcp_read_sock
which in turn calls tcp_rcv_skb(). This dequeues the skb from the
sk_receive_queue. When this is done KTLS code then data_ready()
callback which because we stacked KTLS on top of the bpf stream
verdict program has been replaced with sk_psock_start_strp(). This
will in turn kick the stream parser again and eventually do the
same thing KTLS did above calling into tcp_rcv_skb() and dequeuing
a skb from the sk_receive_queue.

At this point the data stream is broke. Part of the stream was
handled by the KTLS side some other bytes may have been handled
by the BPF side. Generally this results in either missing data
or more likely a "Bad Message" complaint from the kTLS receive
handler as the BPF program steals some bytes meant to be in a
TLS header and/or the TLS header length is no longer correct.

We've already broke the idealized model where we can stack ULPs
in any order with generic callbacks on the TX side to handle this.
So in this patch we do the same thing but for RX side. We add
a sk_psock_strp_enabled() helper so TLS can learn a BPF verdict
program is running and add a tls_sw_has_ctx_rx() helper so BPF
side can learn there is a TLS ULP on the socket.

Then on BPF side we omit calling our stream parser to avoid
breaking the data stream for the KTLS receiver. Then on the
KTLS side we call BPF_SK_SKB_STREAM_VERDICT once the KTLS
receiver is done with the packet but before it posts the
msg to userspace. This gives us symmetry between the TX and
RX halfs and IMO makes it usable again. On the TX side we
process packets in this order BPF -> TLS -> TCP and on
the receive side in the reverse order TCP -> TLS -> BPF.

Discovered while testing OpenSSL 3.0 Alpha2.0 release.

Fixes: d829e9c4112b5 ("tls: convert to generic sk_msg interface")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 include/linux/skmsg.h |    8 ++++++++
 include/net/tls.h     |    9 +++++++++
 net/core/skmsg.c      |   43 ++++++++++++++++++++++++++++++++++++++++---
 net/tls/tls_sw.c      |   20 ++++++++++++++++++--
 4 files changed, 75 insertions(+), 5 deletions(-)

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index ad31c9f..08674cd 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -437,4 +437,12 @@ static inline void psock_progs_drop(struct sk_psock_progs *progs)
 	psock_set_prog(&progs->skb_verdict, NULL);
 }
 
+int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb);
+
+static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
+{
+	if (!psock)
+		return false;
+	return psock->parser.enabled;
+}
 #endif /* _LINUX_SKMSG_H */
diff --git a/include/net/tls.h b/include/net/tls.h
index bf9eb48..b74d59b 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -567,6 +567,15 @@ static inline bool tls_sw_has_ctx_tx(const struct sock *sk)
 	return !!tls_sw_ctx_tx(ctx);
 }
 
+static inline bool tls_sw_has_ctx_rx(const struct sock *sk)
+{
+	struct tls_context *ctx = tls_get_ctx(sk);
+
+	if (!ctx)
+		return false;
+	return !!tls_sw_ctx_rx(ctx);
+}
+
 void tls_sw_write_space(struct sock *sk, struct tls_context *ctx);
 void tls_device_write_space(struct sock *sk, struct tls_context *ctx);
 
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 9d72f71..351afbf 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -7,6 +7,7 @@
 
 #include <net/sock.h>
 #include <net/tcp.h>
+#include <net/tls.h>
 
 static bool sk_msg_try_coalesce_ok(struct sk_msg *msg, int elem_first_coalesce)
 {
@@ -714,6 +715,38 @@ static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb)
 	}
 }
 
+static void sk_psock_tls_verdict_apply(struct sk_psock *psock,
+				       struct sk_buff *skb, int verdict)
+{
+	switch (verdict) {
+	case __SK_REDIRECT:
+		sk_psock_skb_redirect(psock, skb);
+		break;
+	case __SK_PASS:
+	case __SK_DROP:
+	default:
+		break;
+	}
+}
+
+int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb)
+{
+	struct bpf_prog *prog;
+	int ret = __SK_PASS;
+
+	rcu_read_lock();
+	prog = READ_ONCE(psock->progs.skb_verdict);
+	if (likely(prog)) {
+		tcp_skb_bpf_redirect_clear(skb);
+		ret = sk_psock_bpf_run(psock, prog, skb);
+		ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
+	}
+	rcu_read_unlock();
+	sk_psock_tls_verdict_apply(psock, skb, ret);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read);
+
 static void sk_psock_verdict_apply(struct sk_psock *psock,
 				   struct sk_buff *skb, int verdict)
 {
@@ -792,9 +825,13 @@ static void sk_psock_strp_data_ready(struct sock *sk)
 	rcu_read_lock();
 	psock = sk_psock(sk);
 	if (likely(psock)) {
-		write_lock_bh(&sk->sk_callback_lock);
-		strp_data_ready(&psock->parser.strp);
-		write_unlock_bh(&sk->sk_callback_lock);
+		if (tls_sw_has_ctx_rx(sk)) {
+			psock->parser.saved_data_ready(sk);
+		} else {
+			write_lock_bh(&sk->sk_callback_lock);
+			strp_data_ready(&psock->parser.strp);
+			write_unlock_bh(&sk->sk_callback_lock);
+		}
 	}
 	rcu_read_unlock();
 }
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 2d399b6..61043c6 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1731,6 +1731,7 @@ int tls_sw_recvmsg(struct sock *sk,
 	long timeo;
 	bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
 	bool is_peek = flags & MSG_PEEK;
+	bool bpf_strp_enabled;
 	int num_async = 0;
 
 	flags |= nonblock;
@@ -1740,6 +1741,7 @@ int tls_sw_recvmsg(struct sock *sk,
 
 	psock = sk_psock_get(sk);
 	lock_sock(sk);
+	bpf_strp_enabled = sk_psock_strp_enabled(psock);
 
 	/* Process pending decrypted records. It must be non-zero-copy */
 	err = process_rx_list(ctx, msg, &control, &cmsg, 0, len, false,
@@ -1793,11 +1795,12 @@ int tls_sw_recvmsg(struct sock *sk,
 
 		if (to_decrypt <= len && !is_kvec && !is_peek &&
 		    ctx->control == TLS_RECORD_TYPE_DATA &&
-		    prot->version != TLS_1_3_VERSION)
+		    prot->version != TLS_1_3_VERSION &&
+		    !sk_psock_strp_enabled(psock))
 			zc = true;
 
 		/* Do not use async mode if record is non-data */
-		if (ctx->control == TLS_RECORD_TYPE_DATA)
+		if (ctx->control == TLS_RECORD_TYPE_DATA && !bpf_strp_enabled)
 			async_capable = ctx->async_capable;
 		else
 			async_capable = false;
@@ -1847,6 +1850,19 @@ int tls_sw_recvmsg(struct sock *sk,
 			goto pick_next_record;
 
 		if (!zc) {
+			if (bpf_strp_enabled) {
+				err = sk_psock_tls_strp_read(psock, skb);
+				if (err != __SK_PASS) {
+					rxm->offset = rxm->offset + rxm->full_len;
+					rxm->full_len = 0;
+					if (err == __SK_DROP)
+						consume_skb(skb);
+					ctx->recv_pkt = NULL;
+					__strp_unpause(&ctx->strp);
+					continue;
+				}
+			}
+
 			if (rxm->full_len > len) {
 				retain_skb = true;
 				chunk = len;


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [bpf-next PATCH 3/3] bpf, selftests: add test for ktls with skb bpf ingress policy
  2020-05-29 23:06 [bpf-next PATCH 0/3] fix ktls with sk_skb_verdict programs John Fastabend
  2020-05-29 23:06 ` [bpf-next PATCH 1/3] bpf: refactor sockmap redirect code so its easy to reuse John Fastabend
  2020-05-29 23:06 ` [bpf-next PATCH 2/3] bpf: fix running sk_skb program types with ktls John Fastabend
@ 2020-05-29 23:07 ` John Fastabend
  2 siblings, 0 replies; 13+ messages in thread
From: John Fastabend @ 2020-05-29 23:07 UTC (permalink / raw)
  To: alexei.starovoitov, daniel; +Cc: netdev, bpf, john.fastabend

This adds a test for bpf ingress policy. To ensure data writes happen
as expected with extra TLS headers we run these tests with data
verification enabled by default. This will test receive packets have
"PASS" stamped into the front of the payload.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 .../selftests/bpf/progs/test_sockmap_kern.h        |   46 ++++++
 tools/testing/selftests/bpf/test_sockmap.c         |  163 +++++++++++++++++---
 2 files changed, 187 insertions(+), 22 deletions(-)

diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
index a443d36..057036c 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
@@ -79,11 +79,18 @@ struct {
 
 struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY);
-	__uint(max_entries, 1);
+	__uint(max_entries, 2);
 	__type(key, int);
 	__type(value, int);
 } sock_skb_opts SEC(".maps");
 
+struct {
+	__uint(type, TEST_MAP_TYPE);
+	__uint(max_entries, 20);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} tls_sock_map SEC(".maps");
+
 SEC("sk_skb1")
 int bpf_prog1(struct __sk_buff *skb)
 {
@@ -118,6 +125,43 @@ int bpf_prog2(struct __sk_buff *skb)
 
 }
 
+SEC("sk_skb3")
+int bpf_prog3(struct __sk_buff *skb)
+{
+	const int one = 1;
+	int err, *f, ret = SK_PASS;
+	void *data_end;
+	char *c;
+
+	err = bpf_skb_pull_data(skb, 19);
+	if (err)
+		goto tls_out;
+
+	c = (char *)(long)skb->data;
+	data_end = (void *)(long)skb->data_end;
+
+	if (c + 18 < data_end)
+		memcpy(&c[13], "PASS", 4);
+	f = bpf_map_lookup_elem(&sock_skb_opts, &one);
+	if (f && *f) {
+		__u64 flags = 0;
+
+		ret = 0;
+		flags = *f;
+#ifdef SOCKMAP
+		return bpf_sk_redirect_map(skb, &tls_sock_map, ret, flags);
+#else
+		return bpf_sk_redirect_hash(skb, &tls_sock_map, &ret, flags);
+#endif
+	}
+
+	f = bpf_map_lookup_elem(&sock_skb_opts, &one);
+	if (f && *f)
+		ret = SK_DROP;
+tls_out:
+	return ret;
+}
+
 SEC("sockops")
 int bpf_sockmap(struct bpf_sock_ops *skops)
 {
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index c806438..37695fc 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -63,8 +63,8 @@ int s1, s2, c1, c2, p1, p2;
 int test_cnt;
 int passed;
 int failed;
-int map_fd[8];
-struct bpf_map *maps[8];
+int map_fd[9];
+struct bpf_map *maps[9];
 int prog_fd[11];
 
 int txmsg_pass;
@@ -79,7 +79,10 @@ int txmsg_end_push;
 int txmsg_start_pop;
 int txmsg_pop;
 int txmsg_ingress;
-int txmsg_skb;
+int txmsg_redir_skb;
+int txmsg_ktls_skb;
+int txmsg_ktls_skb_drop;
+int txmsg_ktls_skb_redir;
 int ktls;
 int peek_flag;
 
@@ -104,7 +107,7 @@ static const struct option long_options[] = {
 	{"txmsg_start_pop",  required_argument,	NULL, 'w'},
 	{"txmsg_pop",	     required_argument,	NULL, 'x'},
 	{"txmsg_ingress", no_argument,		&txmsg_ingress, 1 },
-	{"txmsg_skb", no_argument,		&txmsg_skb, 1 },
+	{"txmsg_redir_skb", no_argument,	&txmsg_redir_skb, 1 },
 	{"ktls", no_argument,			&ktls, 1 },
 	{"peek", no_argument,			&peek_flag, 1 },
 	{"whitelist", required_argument,	NULL, 'n' },
@@ -169,7 +172,8 @@ static void test_reset(void)
 	txmsg_start_push = txmsg_end_push = 0;
 	txmsg_pass = txmsg_drop = txmsg_redir = 0;
 	txmsg_apply = txmsg_cork = 0;
-	txmsg_ingress = txmsg_skb = 0;
+	txmsg_ingress = txmsg_redir_skb = 0;
+	txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0;
 }
 
 static int test_start_subtest(const struct _test *t, struct sockmap_options *o)
@@ -502,14 +506,41 @@ static int msg_alloc_iov(struct msghdr *msg,
 
 static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz)
 {
-	int i, j, bytes_cnt = 0;
+	int i, j = 0, bytes_cnt = 0;
 	unsigned char k = 0;
 
 	for (i = 0; i < msg->msg_iovlen; i++) {
 		unsigned char *d = msg->msg_iov[i].iov_base;
 
-		for (j = 0;
-		     j < msg->msg_iov[i].iov_len && size; j++) {
+		/* Special case test for skb ingress + ktls */
+		if (i == 0 && txmsg_ktls_skb) {
+			if (msg->msg_iov[i].iov_len < 4)
+				return -EIO;
+			if (txmsg_ktls_skb_redir) {
+				if (memcmp(&d[13], "PASS", 4) != 0) {
+					fprintf(stderr,
+						"detected redirect ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[13], d[14], d[15], d[16]);
+					return -EIO;
+				}
+				d[13] = 0;
+				d[14] = 1;
+				d[15] = 2;
+				d[16] = 3;
+				j = 13;
+			} else if (txmsg_ktls_skb) {
+				if (memcmp(d, "PASS", 4) != 0) {
+					fprintf(stderr,
+						"detected ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[0], d[1], d[2], d[3]);
+					return -EIO;
+				}
+				d[0] = 0;
+				d[1] = 1;
+				d[2] = 2;
+				d[3] = 3;
+			}
+		}
+
+		for (; j < msg->msg_iov[i].iov_len && size; j++) {
 			if (d[j] != k++) {
 				fprintf(stderr,
 					"detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
@@ -724,7 +755,7 @@ static int sendmsg_test(struct sockmap_options *opt)
 	rxpid = fork();
 	if (rxpid == 0) {
 		iov_buf -= (txmsg_pop - txmsg_start_pop + 1);
-		if (opt->drop_expected)
+		if (opt->drop_expected || txmsg_ktls_skb_drop)
 			_exit(0);
 
 		if (!iov_buf) /* zero bytes sent case */
@@ -911,8 +942,28 @@ static int run_options(struct sockmap_options *options, int cg_fd,  int test)
 		return err;
 	}
 
+	/* Attach programs to TLS sockmap */
+	if (txmsg_ktls_skb) {
+		err = bpf_prog_attach(prog_fd[0], map_fd[8],
+					BPF_SK_SKB_STREAM_PARSER, 0);
+		if (err) {
+			fprintf(stderr,
+				"ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n",
+				prog_fd[0], map_fd[8], err, strerror(errno));
+			return err;
+		}
+
+		err = bpf_prog_attach(prog_fd[2], map_fd[8],
+				      BPF_SK_SKB_STREAM_VERDICT, 0);
+		if (err) {
+			fprintf(stderr, "ERROR: bpf_prog_attach (TLS sockmap): %d (%s)\n",
+				err, strerror(errno));
+			return err;
+		}
+	}
+
 	/* Attach to cgroups */
-	err = bpf_prog_attach(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+	err = bpf_prog_attach(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
 	if (err) {
 		fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
 			err, strerror(errno));
@@ -928,15 +979,15 @@ static int run_options(struct sockmap_options *options, int cg_fd,  int test)
 
 	/* Attach txmsg program to sockmap */
 	if (txmsg_pass)
-		tx_prog_fd = prog_fd[3];
-	else if (txmsg_redir)
 		tx_prog_fd = prog_fd[4];
-	else if (txmsg_apply)
+	else if (txmsg_redir)
 		tx_prog_fd = prog_fd[5];
-	else if (txmsg_cork)
+	else if (txmsg_apply)
 		tx_prog_fd = prog_fd[6];
-	else if (txmsg_drop)
+	else if (txmsg_cork)
 		tx_prog_fd = prog_fd[7];
+	else if (txmsg_drop)
+		tx_prog_fd = prog_fd[8];
 	else
 		tx_prog_fd = 0;
 
@@ -1108,7 +1159,35 @@ static int run_options(struct sockmap_options *options, int cg_fd,  int test)
 			}
 		}
 
-		if (txmsg_skb) {
+		if (txmsg_ktls_skb) {
+			int ingress = BPF_F_INGRESS;
+
+			i = 0;
+			err = bpf_map_update_elem(map_fd[8], &i, &p2, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
+					err, strerror(errno));
+			}
+
+			if (txmsg_ktls_skb_redir) {
+				i = 1;
+				err = bpf_map_update_elem(map_fd[7],
+							  &i, &ingress, BPF_ANY);
+				if (err) {
+					fprintf(stderr,
+						"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+						err, strerror(errno));
+				}
+			}
+
+			if (txmsg_ktls_skb_drop) {
+				i = 1;
+				err = bpf_map_update_elem(map_fd[7], &i, &i, BPF_ANY);
+			}
+		}
+
+		if (txmsg_redir_skb) {
 			int skb_fd = (test == SENDMSG || test == SENDPAGE) ?
 					p2 : p1;
 			int ingress = BPF_F_INGRESS;
@@ -1123,8 +1202,7 @@ static int run_options(struct sockmap_options *options, int cg_fd,  int test)
 			}
 
 			i = 3;
-			err = bpf_map_update_elem(map_fd[0],
-						  &i, &skb_fd, BPF_ANY);
+			err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY);
 			if (err) {
 				fprintf(stderr,
 					"ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
@@ -1158,9 +1236,12 @@ static int run_options(struct sockmap_options *options, int cg_fd,  int test)
 		fprintf(stderr, "unknown test\n");
 out:
 	/* Detatch and zero all the maps */
-	bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
+	bpf_prog_detach2(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS);
 	bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER);
 	bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT);
+	bpf_prog_detach2(prog_fd[0], map_fd[8], BPF_SK_SKB_STREAM_PARSER);
+	bpf_prog_detach2(prog_fd[2], map_fd[8], BPF_SK_SKB_STREAM_VERDICT);
+
 	if (tx_prog_fd >= 0)
 		bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT);
 
@@ -1229,8 +1310,10 @@ static void test_options(char *options)
 	}
 	if (txmsg_ingress)
 		strncat(options, "ingress,", OPTSTRING);
-	if (txmsg_skb)
-		strncat(options, "skb,", OPTSTRING);
+	if (txmsg_redir_skb)
+		strncat(options, "redir_skb,", OPTSTRING);
+	if (txmsg_ktls_skb)
+		strncat(options, "ktls_skb,", OPTSTRING);
 	if (ktls)
 		strncat(options, "ktls,", OPTSTRING);
 	if (peek_flag)
@@ -1362,6 +1445,40 @@ static void test_txmsg_ingress_redir(int cgrp, struct sockmap_options *opt)
 	test_send(opt, cgrp);
 }
 
+static void test_txmsg_skb(int cgrp, struct sockmap_options *opt)
+{
+	bool data = opt->data_test;
+	int k = ktls;
+
+	opt->data_test = true;
+	ktls = 1;
+
+	txmsg_pass = txmsg_drop = 0;
+	txmsg_ingress = txmsg_redir = 0;
+	txmsg_ktls_skb = 1;
+	txmsg_pass = 1;
+
+	/* Using data verification so ensure iov layout is
+	 * expected from test receiver side. e.g. has enough
+	 * bytes to write test code.
+	 */
+	opt->iov_length = 100;
+	opt->iov_count = 1;
+	opt->rate = 1;
+	test_exec(cgrp, opt);
+
+	txmsg_ktls_skb_drop = 1;
+	test_exec(cgrp, opt);
+
+	txmsg_ktls_skb_drop = 0;
+	txmsg_ktls_skb_redir = 1;
+	test_exec(cgrp, opt);
+
+	opt->data_test = data;
+	ktls = k;
+}
+
+
 /* Test cork with hung data. This tests poor usage patterns where
  * cork can leave data on the ring if user program is buggy and
  * doesn't flush them somehow. They do take some time however
@@ -1542,11 +1659,13 @@ char *map_names[] = {
 	"sock_bytes",
 	"sock_redir_flags",
 	"sock_skb_opts",
+	"tls_sock_map",
 };
 
 int prog_attach_type[] = {
 	BPF_SK_SKB_STREAM_PARSER,
 	BPF_SK_SKB_STREAM_VERDICT,
+	BPF_SK_SKB_STREAM_VERDICT,
 	BPF_CGROUP_SOCK_OPS,
 	BPF_SK_MSG_VERDICT,
 	BPF_SK_MSG_VERDICT,
@@ -1560,6 +1679,7 @@ int prog_attach_type[] = {
 int prog_type[] = {
 	BPF_PROG_TYPE_SK_SKB,
 	BPF_PROG_TYPE_SK_SKB,
+	BPF_PROG_TYPE_SK_SKB,
 	BPF_PROG_TYPE_SOCK_OPS,
 	BPF_PROG_TYPE_SK_MSG,
 	BPF_PROG_TYPE_SK_MSG,
@@ -1620,6 +1740,7 @@ struct _test test[] = {
 	{"txmsg test redirect", test_txmsg_redir},
 	{"txmsg test drop", test_txmsg_drop},
 	{"txmsg test ingress redirect", test_txmsg_ingress_redir},
+	{"txmsg test skb", test_txmsg_skb},
 	{"txmsg test apply", test_txmsg_apply},
 	{"txmsg test cork", test_txmsg_cork},
 	{"txmsg test hanging corks", test_txmsg_cork_hangs},


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [bpf-next PATCH 1/3] bpf: refactor sockmap redirect code so its easy to reuse
  2020-05-29 23:06 ` [bpf-next PATCH 1/3] bpf: refactor sockmap redirect code so its easy to reuse John Fastabend
@ 2020-06-01 14:16   ` Jakub Sitnicki
  2020-06-01 15:17     ` John Fastabend
  2020-06-01 20:48   ` Song Liu
  2020-06-06 16:26   ` Jakub Sitnicki
  2 siblings, 1 reply; 13+ messages in thread
From: Jakub Sitnicki @ 2020-06-01 14:16 UTC (permalink / raw)
  To: John Fastabend; +Cc: netdev, bpf, alexei.starovoitov, daniel

On Fri, 29 May 2020 16:06:41 -0700
John Fastabend <john.fastabend@gmail.com> wrote:

> We will need this block of code called from tls context shortly
> lets refactor the redirect logic so its easy to use. This also
> cleans up the switch stmt so we have fewer fallthrough cases.
> 
> No logic changes are intended.
> 
> Fixes: d829e9c4112b5 ("tls: convert to generic sk_msg interface")
> Signed-off-by: John Fastabend <john.fastabend@gmail.com>
> ---

Keeping out_free in the extracted helper might have been cleaner.

Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [bpf-next PATCH 2/3] bpf: fix running sk_skb program types with ktls
  2020-05-29 23:06 ` [bpf-next PATCH 2/3] bpf: fix running sk_skb program types with ktls John Fastabend
@ 2020-06-01 14:57   ` Jakub Sitnicki
  2020-06-01 15:20     ` John Fastabend
  0 siblings, 1 reply; 13+ messages in thread
From: Jakub Sitnicki @ 2020-06-01 14:57 UTC (permalink / raw)
  To: John Fastabend; +Cc: netdev, bpf, alexei.starovoitov, daniel

On Fri, 29 May 2020 16:06:59 -0700
John Fastabend <john.fastabend@gmail.com> wrote:

> KTLS uses a stream parser to collect TLS messages and send them to
> the upper layer tls receive handler. This ensures the tls receiver
> has a full TLS header to parse when it is run. However, when a
> socket has BPF_SK_SKB_STREAM_VERDICT program attached before KTLS
> is enabled we end up with two stream parsers running on the same
> socket.
> 
> The result is both try to run on the same socket. First the KTLS
> stream parser runs and calls read_sock() which will tcp_read_sock
> which in turn calls tcp_rcv_skb(). This dequeues the skb from the
> sk_receive_queue. When this is done KTLS code then data_ready()
> callback which because we stacked KTLS on top of the bpf stream
> verdict program has been replaced with sk_psock_start_strp(). This
> will in turn kick the stream parser again and eventually do the
> same thing KTLS did above calling into tcp_rcv_skb() and dequeuing
> a skb from the sk_receive_queue.
> 
> At this point the data stream is broke. Part of the stream was
> handled by the KTLS side some other bytes may have been handled
> by the BPF side. Generally this results in either missing data
> or more likely a "Bad Message" complaint from the kTLS receive
> handler as the BPF program steals some bytes meant to be in a
> TLS header and/or the TLS header length is no longer correct.
> 
> We've already broke the idealized model where we can stack ULPs
> in any order with generic callbacks on the TX side to handle this.
> So in this patch we do the same thing but for RX side. We add
> a sk_psock_strp_enabled() helper so TLS can learn a BPF verdict
> program is running and add a tls_sw_has_ctx_rx() helper so BPF
> side can learn there is a TLS ULP on the socket.
> 
> Then on BPF side we omit calling our stream parser to avoid
> breaking the data stream for the KTLS receiver. Then on the
> KTLS side we call BPF_SK_SKB_STREAM_VERDICT once the KTLS
> receiver is done with the packet but before it posts the
> msg to userspace. This gives us symmetry between the TX and
> RX halfs and IMO makes it usable again. On the TX side we
> process packets in this order BPF -> TLS -> TCP and on
> the receive side in the reverse order TCP -> TLS -> BPF.
> 
> Discovered while testing OpenSSL 3.0 Alpha2.0 release.
> 
> Fixes: d829e9c4112b5 ("tls: convert to generic sk_msg interface")
> Signed-off-by: John Fastabend <john.fastabend@gmail.com>
> ---
>  include/linux/skmsg.h |    8 ++++++++
>  include/net/tls.h     |    9 +++++++++
>  net/core/skmsg.c      |   43 ++++++++++++++++++++++++++++++++++++++++---
>  net/tls/tls_sw.c      |   20 ++++++++++++++++++--
>  4 files changed, 75 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
> index ad31c9f..08674cd 100644
> --- a/include/linux/skmsg.h
> +++ b/include/linux/skmsg.h
> @@ -437,4 +437,12 @@ static inline void psock_progs_drop(struct sk_psock_progs *progs)
>  	psock_set_prog(&progs->skb_verdict, NULL);
>  }
>  
> +int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb);
> +
> +static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
> +{
> +	if (!psock)
> +		return false;
> +	return psock->parser.enabled;
> +}
>  #endif /* _LINUX_SKMSG_H */
> diff --git a/include/net/tls.h b/include/net/tls.h
> index bf9eb48..b74d59b 100644
> --- a/include/net/tls.h
> +++ b/include/net/tls.h
> @@ -567,6 +567,15 @@ static inline bool tls_sw_has_ctx_tx(const struct sock *sk)
>  	return !!tls_sw_ctx_tx(ctx);
>  }
>  
> +static inline bool tls_sw_has_ctx_rx(const struct sock *sk)
> +{
> +	struct tls_context *ctx = tls_get_ctx(sk);
> +
> +	if (!ctx)
> +		return false;
> +	return !!tls_sw_ctx_rx(ctx);
> +}
> +
>  void tls_sw_write_space(struct sock *sk, struct tls_context *ctx);
>  void tls_device_write_space(struct sock *sk, struct tls_context *ctx);
>  
> diff --git a/net/core/skmsg.c b/net/core/skmsg.c
> index 9d72f71..351afbf 100644
> --- a/net/core/skmsg.c
> +++ b/net/core/skmsg.c
> @@ -7,6 +7,7 @@
>  
>  #include <net/sock.h>
>  #include <net/tcp.h>
> +#include <net/tls.h>
>  
>  static bool sk_msg_try_coalesce_ok(struct sk_msg *msg, int elem_first_coalesce)
>  {
> @@ -714,6 +715,38 @@ static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb)
>  	}
>  }
>  
> +static void sk_psock_tls_verdict_apply(struct sk_psock *psock,
> +				       struct sk_buff *skb, int verdict)
> +{
> +	switch (verdict) {
> +	case __SK_REDIRECT:
> +		sk_psock_skb_redirect(psock, skb);
> +		break;
> +	case __SK_PASS:
> +	case __SK_DROP:

The two cases above need a "fallthrough;", right?

> +	default:
> +		break;
> +	}
> +}
> +
> +int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb)
> +{
> +	struct bpf_prog *prog;
> +	int ret = __SK_PASS;
> +
> +	rcu_read_lock();
> +	prog = READ_ONCE(psock->progs.skb_verdict);
> +	if (likely(prog)) {
> +		tcp_skb_bpf_redirect_clear(skb);
> +		ret = sk_psock_bpf_run(psock, prog, skb);
> +		ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
> +	}
> +	rcu_read_unlock();
> +	sk_psock_tls_verdict_apply(psock, skb, ret);
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read);
> +
>  static void sk_psock_verdict_apply(struct sk_psock *psock,
>  				   struct sk_buff *skb, int verdict)
>  {
> @@ -792,9 +825,13 @@ static void sk_psock_strp_data_ready(struct sock *sk)
>  	rcu_read_lock();
>  	psock = sk_psock(sk);
>  	if (likely(psock)) {
> -		write_lock_bh(&sk->sk_callback_lock);
> -		strp_data_ready(&psock->parser.strp);
> -		write_unlock_bh(&sk->sk_callback_lock);
> +		if (tls_sw_has_ctx_rx(sk)) {
> +			psock->parser.saved_data_ready(sk);
> +		} else {
> +			write_lock_bh(&sk->sk_callback_lock);
> +			strp_data_ready(&psock->parser.strp);
> +			write_unlock_bh(&sk->sk_callback_lock);
> +		}
>  	}
>  	rcu_read_unlock();
>  }
> diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
> index 2d399b6..61043c6 100644
> --- a/net/tls/tls_sw.c
> +++ b/net/tls/tls_sw.c
> @@ -1731,6 +1731,7 @@ int tls_sw_recvmsg(struct sock *sk,
>  	long timeo;
>  	bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
>  	bool is_peek = flags & MSG_PEEK;
> +	bool bpf_strp_enabled;
>  	int num_async = 0;
>  
>  	flags |= nonblock;
> @@ -1740,6 +1741,7 @@ int tls_sw_recvmsg(struct sock *sk,
>  
>  	psock = sk_psock_get(sk);
>  	lock_sock(sk);
> +	bpf_strp_enabled = sk_psock_strp_enabled(psock);
>  
>  	/* Process pending decrypted records. It must be non-zero-copy */
>  	err = process_rx_list(ctx, msg, &control, &cmsg, 0, len, false,
> @@ -1793,11 +1795,12 @@ int tls_sw_recvmsg(struct sock *sk,
>  
>  		if (to_decrypt <= len && !is_kvec && !is_peek &&
>  		    ctx->control == TLS_RECORD_TYPE_DATA &&
> -		    prot->version != TLS_1_3_VERSION)
> +		    prot->version != TLS_1_3_VERSION &&
> +		    !sk_psock_strp_enabled(psock))

Is this recheck of parser state intentional? Or can we test for
"!bpf_strp_enabled" here also?

>  			zc = true;
>  
>  		/* Do not use async mode if record is non-data */
> -		if (ctx->control == TLS_RECORD_TYPE_DATA)
> +		if (ctx->control == TLS_RECORD_TYPE_DATA && !bpf_strp_enabled)
>  			async_capable = ctx->async_capable;
>  		else
>  			async_capable = false;
> @@ -1847,6 +1850,19 @@ int tls_sw_recvmsg(struct sock *sk,
>  			goto pick_next_record;
>  
>  		if (!zc) {
> +			if (bpf_strp_enabled) {
> +				err = sk_psock_tls_strp_read(psock, skb);
> +				if (err != __SK_PASS) {
> +					rxm->offset = rxm->offset + rxm->full_len;
> +					rxm->full_len = 0;
> +					if (err == __SK_DROP)
> +						consume_skb(skb);
> +					ctx->recv_pkt = NULL;
> +					__strp_unpause(&ctx->strp);
> +					continue;
> +				}
> +			}
> +
>  			if (rxm->full_len > len) {
>  				retain_skb = true;
>  				chunk = len;
> 


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [bpf-next PATCH 1/3] bpf: refactor sockmap redirect code so its easy to reuse
  2020-06-01 14:16   ` Jakub Sitnicki
@ 2020-06-01 15:17     ` John Fastabend
  0 siblings, 0 replies; 13+ messages in thread
From: John Fastabend @ 2020-06-01 15:17 UTC (permalink / raw)
  To: Jakub Sitnicki, John Fastabend; +Cc: netdev, bpf, alexei.starovoitov, daniel

Jakub Sitnicki wrote:
> On Fri, 29 May 2020 16:06:41 -0700
> John Fastabend <john.fastabend@gmail.com> wrote:
> 
> > We will need this block of code called from tls context shortly
> > lets refactor the redirect logic so its easy to use. This also
> > cleans up the switch stmt so we have fewer fallthrough cases.
> > 
> > No logic changes are intended.
> > 
> > Fixes: d829e9c4112b5 ("tls: convert to generic sk_msg interface")
> > Signed-off-by: John Fastabend <john.fastabend@gmail.com>
> > ---
> 
> Keeping out_free in the extracted helper might have been cleaner.

Perhaps. I had the out_free there at first then went back and did it
the way its done here after writing 2/3.

Thanks for the review!

> 
> Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [bpf-next PATCH 2/3] bpf: fix running sk_skb program types with ktls
  2020-06-01 14:57   ` Jakub Sitnicki
@ 2020-06-01 15:20     ` John Fastabend
  2020-06-01 15:50       ` John Fastabend
  2020-06-01 21:23       ` Alexei Starovoitov
  0 siblings, 2 replies; 13+ messages in thread
From: John Fastabend @ 2020-06-01 15:20 UTC (permalink / raw)
  To: Jakub Sitnicki, John Fastabend; +Cc: netdev, bpf, alexei.starovoitov, daniel

Jakub Sitnicki wrote:
> On Fri, 29 May 2020 16:06:59 -0700
> John Fastabend <john.fastabend@gmail.com> wrote:
> 
> > KTLS uses a stream parser to collect TLS messages and send them to
> > the upper layer tls receive handler. This ensures the tls receiver
> > has a full TLS header to parse when it is run. However, when a
> > socket has BPF_SK_SKB_STREAM_VERDICT program attached before KTLS
> > is enabled we end up with two stream parsers running on the same
> > socket.
> > 
> > The result is both try to run on the same socket. First the KTLS
> > stream parser runs and calls read_sock() which will tcp_read_sock
> > which in turn calls tcp_rcv_skb(). This dequeues the skb from the
> > sk_receive_queue. When this is done KTLS code then data_ready()
> > callback which because we stacked KTLS on top of the bpf stream
> > verdict program has been replaced with sk_psock_start_strp(). This
> > will in turn kick the stream parser again and eventually do the
> > same thing KTLS did above calling into tcp_rcv_skb() and dequeuing
> > a skb from the sk_receive_queue.
> > 
> > At this point the data stream is broke. Part of the stream was
> > handled by the KTLS side some other bytes may have been handled
> > by the BPF side. Generally this results in either missing data
> > or more likely a "Bad Message" complaint from the kTLS receive
> > handler as the BPF program steals some bytes meant to be in a
> > TLS header and/or the TLS header length is no longer correct.
> > 
> > We've already broke the idealized model where we can stack ULPs
> > in any order with generic callbacks on the TX side to handle this.
> > So in this patch we do the same thing but for RX side. We add
> > a sk_psock_strp_enabled() helper so TLS can learn a BPF verdict
> > program is running and add a tls_sw_has_ctx_rx() helper so BPF
> > side can learn there is a TLS ULP on the socket.
> > 
> > Then on BPF side we omit calling our stream parser to avoid
> > breaking the data stream for the KTLS receiver. Then on the
> > KTLS side we call BPF_SK_SKB_STREAM_VERDICT once the KTLS
> > receiver is done with the packet but before it posts the
> > msg to userspace. This gives us symmetry between the TX and
> > RX halfs and IMO makes it usable again. On the TX side we
> > process packets in this order BPF -> TLS -> TCP and on
> > the receive side in the reverse order TCP -> TLS -> BPF.
> > 
> > Discovered while testing OpenSSL 3.0 Alpha2.0 release.
> > 
> > Fixes: d829e9c4112b5 ("tls: convert to generic sk_msg interface")
> > Signed-off-by: John Fastabend <john.fastabend@gmail.com>
> > ---
> >  include/linux/skmsg.h |    8 ++++++++
> >  include/net/tls.h     |    9 +++++++++
> >  net/core/skmsg.c      |   43 ++++++++++++++++++++++++++++++++++++++++---
> >  net/tls/tls_sw.c      |   20 ++++++++++++++++++--
> >  4 files changed, 75 insertions(+), 5 deletions(-)
> > 
> > diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
> > index ad31c9f..08674cd 100644
> > --- a/include/linux/skmsg.h
> > +++ b/include/linux/skmsg.h
> > @@ -437,4 +437,12 @@ static inline void psock_progs_drop(struct sk_psock_progs *progs)
> >  	psock_set_prog(&progs->skb_verdict, NULL);
> >  }
> >  
> > +int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb);
> > +
> > +static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
> > +{
> > +	if (!psock)
> > +		return false;
> > +	return psock->parser.enabled;
> > +}
> >  #endif /* _LINUX_SKMSG_H */
> > diff --git a/include/net/tls.h b/include/net/tls.h
> > index bf9eb48..b74d59b 100644
> > --- a/include/net/tls.h
> > +++ b/include/net/tls.h
> > @@ -567,6 +567,15 @@ static inline bool tls_sw_has_ctx_tx(const struct sock *sk)
> >  	return !!tls_sw_ctx_tx(ctx);
> >  }
> >  
> > +static inline bool tls_sw_has_ctx_rx(const struct sock *sk)
> > +{
> > +	struct tls_context *ctx = tls_get_ctx(sk);
> > +
> > +	if (!ctx)
> > +		return false;
> > +	return !!tls_sw_ctx_rx(ctx);
> > +}
> > +
> >  void tls_sw_write_space(struct sock *sk, struct tls_context *ctx);
> >  void tls_device_write_space(struct sock *sk, struct tls_context *ctx);
> >  
> > diff --git a/net/core/skmsg.c b/net/core/skmsg.c
> > index 9d72f71..351afbf 100644
> > --- a/net/core/skmsg.c
> > +++ b/net/core/skmsg.c
> > @@ -7,6 +7,7 @@
> >  
> >  #include <net/sock.h>
> >  #include <net/tcp.h>
> > +#include <net/tls.h>
> >  
> >  static bool sk_msg_try_coalesce_ok(struct sk_msg *msg, int elem_first_coalesce)
> >  {
> > @@ -714,6 +715,38 @@ static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb)
> >  	}
> >  }
> >  
> > +static void sk_psock_tls_verdict_apply(struct sk_psock *psock,
> > +				       struct sk_buff *skb, int verdict)
> > +{
> > +	switch (verdict) {
> > +	case __SK_REDIRECT:
> > +		sk_psock_skb_redirect(psock, skb);
> > +		break;
> > +	case __SK_PASS:
> > +	case __SK_DROP:
> 
> The two cases above need a "fallthrough;", right?

Correct otherwise will get the "fallthrough" patch shortly after this
lands. Thanks I'll add it.

> 
> > +	default:
> > +		break;
> > +	}
> > +}

[...]

> > diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
> > index 2d399b6..61043c6 100644
> > --- a/net/tls/tls_sw.c
> > +++ b/net/tls/tls_sw.c
> > @@ -1731,6 +1731,7 @@ int tls_sw_recvmsg(struct sock *sk,
> >  	long timeo;
> >  	bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
> >  	bool is_peek = flags & MSG_PEEK;
> > +	bool bpf_strp_enabled;
> >  	int num_async = 0;
> >  
> >  	flags |= nonblock;
> > @@ -1740,6 +1741,7 @@ int tls_sw_recvmsg(struct sock *sk,
> >  
> >  	psock = sk_psock_get(sk);
> >  	lock_sock(sk);
> > +	bpf_strp_enabled = sk_psock_strp_enabled(psock);
> >  
> >  	/* Process pending decrypted records. It must be non-zero-copy */
> >  	err = process_rx_list(ctx, msg, &control, &cmsg, 0, len, false,
> > @@ -1793,11 +1795,12 @@ int tls_sw_recvmsg(struct sock *sk,
> >  
> >  		if (to_decrypt <= len && !is_kvec && !is_peek &&
> >  		    ctx->control == TLS_RECORD_TYPE_DATA &&
> > -		    prot->version != TLS_1_3_VERSION)
> > +		    prot->version != TLS_1_3_VERSION &&
> > +		    !sk_psock_strp_enabled(psock))
> 
> Is this recheck of parser state intentional? Or can we test for
> "!bpf_strp_enabled" here also?

Yes I'll fix it up to use bpf_strp_enabled. Thanks

> 
> >  			zc = true;
> >  
> >  		/* Do not use async mode if record is non-data */
> > -		if (ctx->control == TLS_RECORD_TYPE_DATA)
> > +		if (ctx->control == TLS_RECORD_TYPE_DATA && !bpf_strp_enabled)
> >  			async_capable = ctx->async_capable;
> >  		else
> >  			async_capable = false;
> > @@ -1847,6 +1850,19 @@ int tls_sw_recvmsg(struct sock *sk,
> >  			goto pick_next_record;
> >  
> >  		if (!zc) {
> > +			if (bpf_strp_enabled) {
> > +				err = sk_psock_tls_strp_read(psock, skb);
> > +				if (err != __SK_PASS) {
> > +					rxm->offset = rxm->offset + rxm->full_len;
> > +					rxm->full_len = 0;
> > +					if (err == __SK_DROP)
> > +						consume_skb(skb);
> > +					ctx->recv_pkt = NULL;
> > +					__strp_unpause(&ctx->strp);
> > +					continue;
> > +				}
> > +			}
> > +
> >  			if (rxm->full_len > len) {
> >  				retain_skb = true;
> >  				chunk = len;
> > 
> 



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [bpf-next PATCH 2/3] bpf: fix running sk_skb program types with ktls
  2020-06-01 15:20     ` John Fastabend
@ 2020-06-01 15:50       ` John Fastabend
  2020-06-02  8:55         ` Jakub Sitnicki
  2020-06-01 21:23       ` Alexei Starovoitov
  1 sibling, 1 reply; 13+ messages in thread
From: John Fastabend @ 2020-06-01 15:50 UTC (permalink / raw)
  To: John Fastabend, Jakub Sitnicki, John Fastabend
  Cc: netdev, bpf, alexei.starovoitov, daniel

John Fastabend wrote:
> Jakub Sitnicki wrote:
> > On Fri, 29 May 2020 16:06:59 -0700
> > John Fastabend <john.fastabend@gmail.com> wrote:
> > 
> > > KTLS uses a stream parser to collect TLS messages and send them to
> > > the upper layer tls receive handler. This ensures the tls receiver
> > > has a full TLS header to parse when it is run. However, when a
> > > socket has BPF_SK_SKB_STREAM_VERDICT program attached before KTLS
> > > is enabled we end up with two stream parsers running on the same
> > > socket.
> > > 
> > > The result is both try to run on the same socket. First the KTLS
> > > stream parser runs and calls read_sock() which will tcp_read_sock
> > > which in turn calls tcp_rcv_skb(). This dequeues the skb from the
> > > sk_receive_queue. When this is done KTLS code then data_ready()
> > > callback which because we stacked KTLS on top of the bpf stream
> > > verdict program has been replaced with sk_psock_start_strp(). This
> > > will in turn kick the stream parser again and eventually do the
> > > same thing KTLS did above calling into tcp_rcv_skb() and dequeuing
> > > a skb from the sk_receive_queue.
> > > 
> > > At this point the data stream is broke. Part of the stream was
> > > handled by the KTLS side some other bytes may have been handled
> > > by the BPF side. Generally this results in either missing data
> > > or more likely a "Bad Message" complaint from the kTLS receive
> > > handler as the BPF program steals some bytes meant to be in a
> > > TLS header and/or the TLS header length is no longer correct.
> > > 
> > > We've already broke the idealized model where we can stack ULPs
> > > in any order with generic callbacks on the TX side to handle this.
> > > So in this patch we do the same thing but for RX side. We add
> > > a sk_psock_strp_enabled() helper so TLS can learn a BPF verdict
> > > program is running and add a tls_sw_has_ctx_rx() helper so BPF
> > > side can learn there is a TLS ULP on the socket.
> > > 
> > > Then on BPF side we omit calling our stream parser to avoid
> > > breaking the data stream for the KTLS receiver. Then on the
> > > KTLS side we call BPF_SK_SKB_STREAM_VERDICT once the KTLS
> > > receiver is done with the packet but before it posts the
> > > msg to userspace. This gives us symmetry between the TX and
> > > RX halfs and IMO makes it usable again. On the TX side we
> > > process packets in this order BPF -> TLS -> TCP and on
> > > the receive side in the reverse order TCP -> TLS -> BPF.
> > > 
> > > Discovered while testing OpenSSL 3.0 Alpha2.0 release.
> > > 
> > > Fixes: d829e9c4112b5 ("tls: convert to generic sk_msg interface")
> > > Signed-off-by: John Fastabend <john.fastabend@gmail.com>
> > > ---

[...]

> > > +static void sk_psock_tls_verdict_apply(struct sk_psock *psock,
> > > +				       struct sk_buff *skb, int verdict)
> > > +{
> > > +	switch (verdict) {
> > > +	case __SK_REDIRECT:
> > > +		sk_psock_skb_redirect(psock, skb);
> > > +		break;
> > > +	case __SK_PASS:
> > > +	case __SK_DROP:
> > 
> > The two cases above need a "fallthrough;", right?
> 
> Correct otherwise will get the "fallthrough" patch shortly after this
> lands. Thanks I'll add it.
> 

hmm actually I don't think we need 'fallthrough;' here when the
case doesn't have statements,

 switch (a) {
 case 1:
 case 2:
 default:
     break;
 }

seems OK to me. I don't have a preference though so feel free to
correct me.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [bpf-next PATCH 1/3] bpf: refactor sockmap redirect code so its easy to reuse
  2020-05-29 23:06 ` [bpf-next PATCH 1/3] bpf: refactor sockmap redirect code so its easy to reuse John Fastabend
  2020-06-01 14:16   ` Jakub Sitnicki
@ 2020-06-01 20:48   ` Song Liu
  2020-06-06 16:26   ` Jakub Sitnicki
  2 siblings, 0 replies; 13+ messages in thread
From: Song Liu @ 2020-06-01 20:48 UTC (permalink / raw)
  To: John Fastabend; +Cc: Alexei Starovoitov, Daniel Borkmann, Networking, bpf

On Fri, May 29, 2020 at 4:07 PM John Fastabend <john.fastabend@gmail.com> wrote:
>
> We will need this block of code called from tls context shortly
> lets refactor the redirect logic so its easy to use. This also
> cleans up the switch stmt so we have fewer fallthrough cases.
>
> No logic changes are intended.
>
> Fixes: d829e9c4112b5 ("tls: convert to generic sk_msg interface")
> Signed-off-by: John Fastabend <john.fastabend@gmail.com>

Acked-by: Song Liu <songliubraving@fb.com>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [bpf-next PATCH 2/3] bpf: fix running sk_skb program types with ktls
  2020-06-01 15:20     ` John Fastabend
  2020-06-01 15:50       ` John Fastabend
@ 2020-06-01 21:23       ` Alexei Starovoitov
  1 sibling, 0 replies; 13+ messages in thread
From: Alexei Starovoitov @ 2020-06-01 21:23 UTC (permalink / raw)
  To: John Fastabend; +Cc: Jakub Sitnicki, Network Development, bpf, Daniel Borkmann

On Mon, Jun 1, 2020 at 8:20 AM John Fastabend <john.fastabend@gmail.com> wrote:
> > > @@ -1793,11 +1795,12 @@ int tls_sw_recvmsg(struct sock *sk,
> > >
> > >             if (to_decrypt <= len && !is_kvec && !is_peek &&
> > >                 ctx->control == TLS_RECORD_TYPE_DATA &&
> > > -               prot->version != TLS_1_3_VERSION)
> > > +               prot->version != TLS_1_3_VERSION &&
> > > +               !sk_psock_strp_enabled(psock))
> >
> > Is this recheck of parser state intentional? Or can we test for
> > "!bpf_strp_enabled" here also?
>
> Yes I'll fix it up to use bpf_strp_enabled. Thanks

I fixed that bit and applied the set.

Thanks!

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [bpf-next PATCH 2/3] bpf: fix running sk_skb program types with ktls
  2020-06-01 15:50       ` John Fastabend
@ 2020-06-02  8:55         ` Jakub Sitnicki
  0 siblings, 0 replies; 13+ messages in thread
From: Jakub Sitnicki @ 2020-06-02  8:55 UTC (permalink / raw)
  To: John Fastabend; +Cc: netdev, bpf, alexei.starovoitov, daniel

On Mon, Jun 01, 2020 at 05:50 PM CEST, John Fastabend wrote:
> John Fastabend wrote:
>> Jakub Sitnicki wrote:
>> > On Fri, 29 May 2020 16:06:59 -0700
>> > John Fastabend <john.fastabend@gmail.com> wrote:
>> >
>> > > KTLS uses a stream parser to collect TLS messages and send them to
>> > > the upper layer tls receive handler. This ensures the tls receiver
>> > > has a full TLS header to parse when it is run. However, when a
>> > > socket has BPF_SK_SKB_STREAM_VERDICT program attached before KTLS
>> > > is enabled we end up with two stream parsers running on the same
>> > > socket.
>> > >
>> > > The result is both try to run on the same socket. First the KTLS
>> > > stream parser runs and calls read_sock() which will tcp_read_sock
>> > > which in turn calls tcp_rcv_skb(). This dequeues the skb from the
>> > > sk_receive_queue. When this is done KTLS code then data_ready()
>> > > callback which because we stacked KTLS on top of the bpf stream
>> > > verdict program has been replaced with sk_psock_start_strp(). This
>> > > will in turn kick the stream parser again and eventually do the
>> > > same thing KTLS did above calling into tcp_rcv_skb() and dequeuing
>> > > a skb from the sk_receive_queue.
>> > >
>> > > At this point the data stream is broke. Part of the stream was
>> > > handled by the KTLS side some other bytes may have been handled
>> > > by the BPF side. Generally this results in either missing data
>> > > or more likely a "Bad Message" complaint from the kTLS receive
>> > > handler as the BPF program steals some bytes meant to be in a
>> > > TLS header and/or the TLS header length is no longer correct.
>> > >
>> > > We've already broke the idealized model where we can stack ULPs
>> > > in any order with generic callbacks on the TX side to handle this.
>> > > So in this patch we do the same thing but for RX side. We add
>> > > a sk_psock_strp_enabled() helper so TLS can learn a BPF verdict
>> > > program is running and add a tls_sw_has_ctx_rx() helper so BPF
>> > > side can learn there is a TLS ULP on the socket.
>> > >
>> > > Then on BPF side we omit calling our stream parser to avoid
>> > > breaking the data stream for the KTLS receiver. Then on the
>> > > KTLS side we call BPF_SK_SKB_STREAM_VERDICT once the KTLS
>> > > receiver is done with the packet but before it posts the
>> > > msg to userspace. This gives us symmetry between the TX and
>> > > RX halfs and IMO makes it usable again. On the TX side we
>> > > process packets in this order BPF -> TLS -> TCP and on
>> > > the receive side in the reverse order TCP -> TLS -> BPF.
>> > >
>> > > Discovered while testing OpenSSL 3.0 Alpha2.0 release.
>> > >
>> > > Fixes: d829e9c4112b5 ("tls: convert to generic sk_msg interface")
>> > > Signed-off-by: John Fastabend <john.fastabend@gmail.com>
>> > > ---
>
> [...]
>
>> > > +static void sk_psock_tls_verdict_apply(struct sk_psock *psock,
>> > > +				       struct sk_buff *skb, int verdict)
>> > > +{
>> > > +	switch (verdict) {
>> > > +	case __SK_REDIRECT:
>> > > +		sk_psock_skb_redirect(psock, skb);
>> > > +		break;
>> > > +	case __SK_PASS:
>> > > +	case __SK_DROP:
>> >
>> > The two cases above need a "fallthrough;", right?
>>
>> Correct otherwise will get the "fallthrough" patch shortly after this
>> lands. Thanks I'll add it.
>>
>
> hmm actually I don't think we need 'fallthrough;' here when the
> case doesn't have statements,
>
>  switch (a) {
>  case 1:
>  case 2:
>  default:
>      break;
>  }
>
> seems OK to me. I don't have a preference though so feel free to
> correct me.

I misunderstood guidance in [0]. You're right, it seems too verbose to
annotate cases without statements. Didn't mean to nit-pick :-)

[0] https://www.kernel.org/doc/html/latest/process/deprecated.html#implicit-switch-case-fall-through

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [bpf-next PATCH 1/3] bpf: refactor sockmap redirect code so its easy to reuse
  2020-05-29 23:06 ` [bpf-next PATCH 1/3] bpf: refactor sockmap redirect code so its easy to reuse John Fastabend
  2020-06-01 14:16   ` Jakub Sitnicki
  2020-06-01 20:48   ` Song Liu
@ 2020-06-06 16:26   ` Jakub Sitnicki
  2 siblings, 0 replies; 13+ messages in thread
From: Jakub Sitnicki @ 2020-06-06 16:26 UTC (permalink / raw)
  To: John Fastabend; +Cc: netdev, bpf

On Fri, 29 May 2020 23:06:41 +0000
John Fastabend <john.fastabend@gmail.com> wrote:

> We will need this block of code called from tls context shortly
> lets refactor the redirect logic so its easy to use. This also
> cleans up the switch stmt so we have fewer fallthrough cases.
> 
> No logic changes are intended.
> 
> Fixes: d829e9c4112b5 ("tls: convert to generic sk_msg interface")
> Signed-off-by: John Fastabend <john.fastabend@gmail.com>
> ---
>  net/core/skmsg.c |   55 +++++++++++++++++++++++++++++++++---------------------
>  1 file changed, 34 insertions(+), 21 deletions(-)
> 
> diff --git a/net/core/skmsg.c b/net/core/skmsg.c
> index c479372..9d72f71 100644
> --- a/net/core/skmsg.c
> +++ b/net/core/skmsg.c
> @@ -682,13 +682,43 @@ static struct sk_psock *sk_psock_from_strp(struct strparser *strp)
>  	return container_of(parser, struct sk_psock, parser);
>  }
>  
> -static void sk_psock_verdict_apply(struct sk_psock *psock,
> -				   struct sk_buff *skb, int verdict)
> +static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb)
>  {
>  	struct sk_psock *psock_other;
>  	struct sock *sk_other;
>  	bool ingress;
>  
> +	sk_other = tcp_skb_bpf_redirect_fetch(skb);
> +	if (unlikely(!sk_other)) {
> +		kfree_skb(skb);
> +		return;
> +	}
> +	psock_other = sk_psock(sk_other);

I think we're not in RCU read-side critical section and so lockdep-RCU
[0] is complaining about accessing sk_user_data with rcu_dereference:

bash-5.0# ./test_sockmap
# 1/ 6  sockmap::txmsg test passthrough:OK
# 2/ 6  sockmap::txmsg test redirect:OK
# 3/ 6  sockmap::txmsg test drop:OK
# 4/ 6  sockmap::txmsg test ingress redirect:OK
[   96.791996]
[   96.792211] =============================
[   96.792763] WARNING: suspicious RCU usage
[   96.793297] 5.7.0-rc7-02964-g615b5749876a-dirty #692 Not tainted
[   96.794032] -----------------------------
[   96.794480] include/linux/skmsg.h:284 suspicious rcu_dereference_check() usage!
[   96.795154]
[   96.795154] other info that might help us debug this:
[   96.795154]
[   96.795926]
[   96.795926] rcu_scheduler_active = 2, debug_locks = 1
[   96.796556] 1 lock held by test_sockmap/1060:
[   96.796970]  #0: ffff8880a0d35f20 (sk_lock-AF_INET){+.+.}-{0:0}, at: tls_sw_recvmsg+0x238/0xc10
[   96.797813]
[   96.797813] stack backtrace:
[   96.798224] CPU: 1 PID: 1060 Comm: test_sockmap Not tainted 5.7.0-rc7-02964-g615b5749876a-dirty #692
[   96.799071] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-ppc64le-16.ppc.fedoraproject.org-3.fc31 04/01/2014
[   96.800294] Call Trace:
[   96.800543]  dump_stack+0x97/0xe0
[   96.800864]  sk_psock_skb_redirect.isra.0+0xa6/0x1b0
[   96.801338]  sk_psock_tls_strp_read+0x298/0x310
[   96.801769]  tls_sw_recvmsg+0xa47/0xc10
[   96.802144]  ? decrypt_skb+0x80/0x80
[   96.802491]  ? lock_downgrade+0x330/0x330
[   96.802887]  inet_recvmsg+0xae/0x2a0
[   96.803224]  ? rw_copy_check_uvector+0x15e/0x1b0
[   96.803669]  ? inet_sendpage+0xc0/0xc0
[   96.804029]  ____sys_recvmsg+0x110/0x210
[   96.804417]  ? kernel_recvmsg+0x60/0x60
[   96.804785]  ? copy_msghdr_from_user+0x91/0xd0
[   96.805200]  ? __copy_msghdr_from_user+0x230/0x230
[   96.805665]  ? lock_acquire+0x120/0x4b0
[   96.806025]  ? match_held_lock+0x1b/0x230
[   96.806417]  ___sys_recvmsg+0xb8/0x100
[   96.806778]  ? ___sys_sendmsg+0x110/0x110
[   96.807155]  ? lock_downgrade+0x330/0x330
[   96.807555]  ? __fget_light+0xad/0x110
[   96.807917]  ? sockfd_lookup_light+0x91/0xb0
[   96.808334]  __sys_recvmsg+0x87/0xe0
[   96.808674]  ? __sys_recvmsg_sock+0x70/0x70
[   96.809064]  ? rcu_read_lock_sched_held+0x81/0xb0
[   96.809540]  ? switch_fpu_return+0x1/0x250
[   96.809948]  ? do_syscall_64+0x5f/0x9a0
[   96.810325]  do_syscall_64+0xad/0x9a0
[   96.810676]  ? handle_mm_fault+0x21e/0x3d0
[   96.811060]  ? syscall_return_slowpath+0x530/0x530
[   96.811524]  ? trace_hardirqs_on_thunk+0x1a/0x1c
[   96.811955]  ? lockdep_hardirqs_off+0xb5/0x100
[   96.812383]  ? trace_hardirqs_off_thunk+0x1a/0x1c
[   96.812871]  entry_SYSCALL_64_after_hwframe+0x49/0xb3
[   96.813425] RIP: 0033:0x7f92ff15c737
[   96.813762] Code: 02 b8 ff ff ff ff eb bd 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2f 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10
[   96.815477] RSP: 002b:00007ffd80734b68 EFLAGS: 00000246 ORIG_RAX: 000000000000002f
[   96.816177] RAX: ffffffffffffffda RBX: 000000000000001c RCX: 00007f92ff15c737
[   96.816847] RDX: 0000000000004000 RSI: 00007ffd80734bd0 RDI: 000000000000001c
[   96.817602] RBP: 00007ffd80734c50 R08: 00007ffd80734bc0 R09: 0000000000000060
[   96.818351] R10: fffffffffffffb15 R11: 0000000000000246 R12: 0000000000000000
[   96.819107] R13: 0000000000004000 R14: 00007f92fef7a6b8 R15: 00007ffd80734d50

[0] https://www.kernel.org/doc/Documentation/RCU/lockdep-splat.txt

> +	if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
> +	    !sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
> +		kfree_skb(skb);
> +		return;
> +	}
> +
> +	ingress = tcp_skb_bpf_ingress(skb);
> +	if ((!ingress && sock_writeable(sk_other)) ||
> +	    (ingress &&
> +	     atomic_read(&sk_other->sk_rmem_alloc) <=
> +	     sk_other->sk_rcvbuf)) {
> +		if (!ingress)
> +			skb_set_owner_w(skb, sk_other);
> +		skb_queue_tail(&psock_other->ingress_skb, skb);
> +		schedule_work(&psock_other->work);
> +	} else {
> +		kfree_skb(skb);
> +	}
> +}
> +
> +static void sk_psock_verdict_apply(struct sk_psock *psock,
> +				   struct sk_buff *skb, int verdict)
> +{
> +	struct sock *sk_other;
> +
>  	switch (verdict) {
>  	case __SK_PASS:
>  		sk_other = psock->sk;
> @@ -707,25 +737,8 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
>  		}
>  		goto out_free;
>  	case __SK_REDIRECT:
> -		sk_other = tcp_skb_bpf_redirect_fetch(skb);
> -		if (unlikely(!sk_other))
> -			goto out_free;
> -		psock_other = sk_psock(sk_other);
> -		if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
> -		    !sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED))
> -			goto out_free;
> -		ingress = tcp_skb_bpf_ingress(skb);
> -		if ((!ingress && sock_writeable(sk_other)) ||
> -		    (ingress &&
> -		     atomic_read(&sk_other->sk_rmem_alloc) <=
> -		     sk_other->sk_rcvbuf)) {
> -			if (!ingress)
> -				skb_set_owner_w(skb, sk_other);
> -			skb_queue_tail(&psock_other->ingress_skb, skb);
> -			schedule_work(&psock_other->work);
> -			break;
> -		}
> -		/* fall-through */
> +		sk_psock_skb_redirect(psock, skb);
> +		break;
>  	case __SK_DROP:
>  		/* fall-through */
>  	default:
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2020-06-06 16:27 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-29 23:06 [bpf-next PATCH 0/3] fix ktls with sk_skb_verdict programs John Fastabend
2020-05-29 23:06 ` [bpf-next PATCH 1/3] bpf: refactor sockmap redirect code so its easy to reuse John Fastabend
2020-06-01 14:16   ` Jakub Sitnicki
2020-06-01 15:17     ` John Fastabend
2020-06-01 20:48   ` Song Liu
2020-06-06 16:26   ` Jakub Sitnicki
2020-05-29 23:06 ` [bpf-next PATCH 2/3] bpf: fix running sk_skb program types with ktls John Fastabend
2020-06-01 14:57   ` Jakub Sitnicki
2020-06-01 15:20     ` John Fastabend
2020-06-01 15:50       ` John Fastabend
2020-06-02  8:55         ` Jakub Sitnicki
2020-06-01 21:23       ` Alexei Starovoitov
2020-05-29 23:07 ` [bpf-next PATCH 3/3] bpf, selftests: add test for ktls with skb bpf ingress policy John Fastabend

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).