bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] skmsg: lose offset info in sk_psock_skb_ingress
@ 2021-09-17  1:32 Liu Jian
  2021-09-17 22:34 ` John Fastabend
  2021-09-20  1:14 ` Cong Wang
  0 siblings, 2 replies; 4+ messages in thread
From: Liu Jian @ 2021-09-17  1:32 UTC (permalink / raw)
  To: john.fastabend, daniel, jakub, lmb, davem, kuba, netdev, bpf; +Cc: liujian56

If sockmap enable strparser, there are lose offset info in
sk_psock_skb_ingress. If the length determined by parse_msg function
is not skb->len, the skb will be converted to sk_msg multiple times,
and userspace app will get the data multiple times.

Fix this by get the offset and length from strp_msg.

Signed-off-by: Liu Jian <liujian56@huawei.com>
---
v1->v2: fix build error when disable CONFIG_BPF_STREAM_PARSER

 net/core/skmsg.c | 51 ++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 41 insertions(+), 10 deletions(-)

diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 2d6249b28928..c958e3642cb3 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -9,6 +9,10 @@
 #include <net/tcp.h>
 #include <net/tls.h>
 
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
+static void sk_psock_strp_data_ready(struct sock *sk);
+#endif
+
 static bool sk_msg_try_coalesce_ok(struct sk_msg *msg, int elem_first_coalesce)
 {
 	if (msg->sg.end > msg->sg.start &&
@@ -494,6 +498,7 @@ static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
 }
 
 static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
+					u32 off, u32 len,
 					struct sk_psock *psock,
 					struct sock *sk,
 					struct sk_msg *msg)
@@ -507,11 +512,11 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
 	 */
 	if (skb_linearize(skb))
 		return -EAGAIN;
-	num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len);
+	num_sge = skb_to_sgvec(skb, msg->sg.data, off, len);
 	if (unlikely(num_sge < 0))
 		return num_sge;
 
-	copied = skb->len;
+	copied = len;
 	msg->sg.start = 0;
 	msg->sg.size = copied;
 	msg->sg.end = num_sge;
@@ -522,9 +527,11 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
 	return copied;
 }
 
-static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb);
+static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
+				     u32 off, u32 len);
 
-static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
+static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
+				u32 off, u32 len)
 {
 	struct sock *sk = psock->sk;
 	struct sk_msg *msg;
@@ -535,7 +542,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
 	 * correctly.
 	 */
 	if (unlikely(skb->sk == sk))
-		return sk_psock_skb_ingress_self(psock, skb);
+		return sk_psock_skb_ingress_self(psock, skb, off, len);
 	msg = sk_psock_create_ingress_msg(sk, skb);
 	if (!msg)
 		return -EAGAIN;
@@ -547,7 +554,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
 	 * into user buffers.
 	 */
 	skb_set_owner_r(skb, sk);
-	err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+	err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
 	if (err < 0)
 		kfree(msg);
 	return err;
@@ -557,7 +564,8 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
  * skb. In this case we do not need to check memory limits or skb_set_owner_r
  * because the skb is already accounted for here.
  */
-static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb)
+static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
+				     u32 off, u32 len)
 {
 	struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
 	struct sock *sk = psock->sk;
@@ -567,7 +575,7 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
 		return -EAGAIN;
 	sk_msg_init(msg);
 	skb_set_owner_r(skb, sk);
-	err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+	err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
 	if (err < 0)
 		kfree(msg);
 	return err;
@@ -581,7 +589,7 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
 			return -EAGAIN;
 		return skb_send_sock(psock->sk, skb, off, len);
 	}
-	return sk_psock_skb_ingress(psock, skb);
+	return sk_psock_skb_ingress(psock, skb, off, len);
 }
 
 static void sk_psock_skb_state(struct sk_psock *psock,
@@ -604,6 +612,9 @@ static void sk_psock_backlog(struct work_struct *work)
 {
 	struct sk_psock *psock = container_of(work, struct sk_psock, work);
 	struct sk_psock_work_state *state = &psock->work_state;
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
+	struct strp_msg *stm = NULL;
+#endif
 	struct sk_buff *skb = NULL;
 	bool ingress;
 	u32 len, off;
@@ -624,6 +635,13 @@ static void sk_psock_backlog(struct work_struct *work)
 	while ((skb = skb_dequeue(&psock->ingress_skb))) {
 		len = skb->len;
 		off = 0;
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
+		if (psock->sk->sk_data_ready == sk_psock_strp_data_ready) {
+			stm = strp_msg(skb);
+			off = stm->offset;
+			len = stm->full_len;
+		}
+#endif
 start:
 		ingress = skb_bpf_ingress(skb);
 		skb_bpf_redirect_clear(skb);
@@ -930,6 +948,10 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
 {
 	struct sock *sk_other;
 	int err = 0;
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
+	struct strp_msg *stm = NULL;
+#endif
+	u32 len, off;
 
 	switch (verdict) {
 	case __SK_PASS:
@@ -949,7 +971,16 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
 		 * retrying later from workqueue.
 		 */
 		if (skb_queue_empty(&psock->ingress_skb)) {
-			err = sk_psock_skb_ingress_self(psock, skb);
+			len = skb->len;
+			off = 0;
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
+			if (psock->sk->sk_data_ready == sk_psock_strp_data_ready) {
+				stm = strp_msg(skb);
+				off = stm->offset;
+				len = stm->full_len;
+			}
+#endif
+			err = sk_psock_skb_ingress_self(psock, skb, off, len);
 		}
 		if (err < 0) {
 			spin_lock_bh(&psock->ingress_lock);
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* RE: [PATCH v2] skmsg: lose offset info in sk_psock_skb_ingress
  2021-09-17  1:32 [PATCH v2] skmsg: lose offset info in sk_psock_skb_ingress Liu Jian
@ 2021-09-17 22:34 ` John Fastabend
  2021-09-20  1:14 ` Cong Wang
  1 sibling, 0 replies; 4+ messages in thread
From: John Fastabend @ 2021-09-17 22:34 UTC (permalink / raw)
  To: Liu Jian, john.fastabend, daniel, jakub, lmb, davem, kuba, netdev, bpf
  Cc: liujian56

Liu Jian wrote:
> If sockmap enable strparser, there are lose offset info in
> sk_psock_skb_ingress. If the length determined by parse_msg function
> is not skb->len, the skb will be converted to sk_msg multiple times,
> and userspace app will get the data multiple times.
> 
> Fix this by get the offset and length from strp_msg.
> 
> Signed-off-by: Liu Jian <liujian56@huawei.com>
> ---
> v1->v2: fix build error when disable CONFIG_BPF_STREAM_PARSER
> 

LGTM thanks.

Acked-by: John Fastabend <john.fastabend@gmail.com>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] skmsg: lose offset info in sk_psock_skb_ingress
  2021-09-17  1:32 [PATCH v2] skmsg: lose offset info in sk_psock_skb_ingress Liu Jian
  2021-09-17 22:34 ` John Fastabend
@ 2021-09-20  1:14 ` Cong Wang
  2021-09-22  2:21   ` liujian (CE)
  1 sibling, 1 reply; 4+ messages in thread
From: Cong Wang @ 2021-09-20  1:14 UTC (permalink / raw)
  To: Liu Jian
  Cc: John Fastabend, Daniel Borkmann, Jakub Sitnicki, Lorenz Bauer,
	David Miller, Jakub Kicinski, Linux Kernel Network Developers,
	bpf

On Fri, Sep 17, 2021 at 3:05 AM Liu Jian <liujian56@huawei.com> wrote:
> @@ -624,6 +635,13 @@ static void sk_psock_backlog(struct work_struct *work)
>         while ((skb = skb_dequeue(&psock->ingress_skb))) {
>                 len = skb->len;
>                 off = 0;
> +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
> +               if (psock->sk->sk_data_ready == sk_psock_strp_data_ready) {
> +                       stm = strp_msg(skb);
> +                       off = stm->offset;
> +                       len = stm->full_len;
> +               }
> +#endif

How does this work? You are testing psock->sk->sk_data_ready here
but it is already the dest sock here, so, if we redirect a strp_msg() from
strp socket to non-strp socket, this does not work at all?

And this code looks ugly itself. If you want to distinguish this type of
packet from others, you can add a bit in, for example skb->_sk_redir.

Thanks.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH v2] skmsg: lose offset info in sk_psock_skb_ingress
  2021-09-20  1:14 ` Cong Wang
@ 2021-09-22  2:21   ` liujian (CE)
  0 siblings, 0 replies; 4+ messages in thread
From: liujian (CE) @ 2021-09-22  2:21 UTC (permalink / raw)
  To: Cong Wang
  Cc: John Fastabend, Daniel Borkmann, Jakub Sitnicki, Lorenz Bauer,
	David Miller, Jakub Kicinski, Linux Kernel Network Developers,
	bpf



> -----Original Message-----
> From: Cong Wang [mailto:xiyou.wangcong@gmail.com]
> Sent: Monday, September 20, 2021 9:14 AM
> To: liujian (CE) <liujian56@huawei.com>
> Cc: John Fastabend <john.fastabend@gmail.com>; Daniel Borkmann
> <daniel@iogearbox.net>; Jakub Sitnicki <jakub@cloudflare.com>; Lorenz
> Bauer <lmb@cloudflare.com>; David Miller <davem@davemloft.net>; Jakub
> Kicinski <kuba@kernel.org>; Linux Kernel Network Developers
> <netdev@vger.kernel.org>; bpf <bpf@vger.kernel.org>
> Subject: Re: [PATCH v2] skmsg: lose offset info in sk_psock_skb_ingress
> 
> On Fri, Sep 17, 2021 at 3:05 AM Liu Jian <liujian56@huawei.com> wrote:
> > @@ -624,6 +635,13 @@ static void sk_psock_backlog(struct work_struct
> *work)
> >         while ((skb = skb_dequeue(&psock->ingress_skb))) {
> >                 len = skb->len;
> >                 off = 0;
> > +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
> > +               if (psock->sk->sk_data_ready == sk_psock_strp_data_ready) {
> > +                       stm = strp_msg(skb);
> > +                       off = stm->offset;
> > +                       len = stm->full_len;
> > +               }
> > +#endif
> 
> How does this work? You are testing psock->sk->sk_data_ready here but it is
> already the dest sock here, so, if we redirect a strp_msg() from strp socket to
> non-strp socket, this does not work at all?
> 
Yes, it is not work in this case, I did not consider this case.

> And this code looks ugly itself. If you want to distinguish this type of packet
> from others, you can add a bit in, for example skb->_sk_redir.
It looks better this way. 
I will send v3 later. Thank you~
> 
> Thanks.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-09-22  2:21 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-17  1:32 [PATCH v2] skmsg: lose offset info in sk_psock_skb_ingress Liu Jian
2021-09-17 22:34 ` John Fastabend
2021-09-20  1:14 ` Cong Wang
2021-09-22  2:21   ` liujian (CE)

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).