bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3] skmsg: lose offset info in sk_psock_skb_ingress
@ 2021-09-22  9:32 Liu Jian
  2021-09-28  5:52 ` Cong Wang
  0 siblings, 1 reply; 3+ messages in thread
From: Liu Jian @ 2021-09-22  9:32 UTC (permalink / raw)
  To: john.fastabend, daniel, jakub, lmb, davem, kuba, ast, andrii,
	kafai, songliubraving, yhs, kpsingh, netdev, bpf, xiyou.wangcong
  Cc: liujian56

If sockmap enable strparser, there are lose offset info in
sk_psock_skb_ingress. If the length determined by parse_msg function
is not skb->len, the skb will be converted to sk_msg multiple times,
and userspace app will get the data multiple times.

Fix this by get the offset and length from strp_msg.
And as Cong suggestion, add one bit in skb->_sk_redir to distinguish
enable or disable strparser.

Signed-off-by: Liu Jian <liujian56@huawei.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>

---
v1->v2: fix build error when disable CONFIG_BPF_STREAM_PARSER
v2->v3: Add one bit in skb->_sk_redir to distinguish enable or disable strparser

 include/linux/skmsg.h | 18 ++++++++++++++--
 net/core/skmsg.c      | 48 ++++++++++++++++++++++++++++++++++---------
 2 files changed, 54 insertions(+), 12 deletions(-)

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 14ab0c0bc924..74eb0e97b4bd 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -508,8 +508,22 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
 
 #if IS_ENABLED(CONFIG_NET_SOCK_MSG)
 
-/* We only have one bit so far. */
-#define BPF_F_PTR_MASK ~(BPF_F_INGRESS)
+#define BPF_F_STRPARSER	(1UL << 1)
+
+/* We only have two bit so far. */
+#define BPF_F_PTR_MASK ~(BPF_F_INGRESS | BPF_F_STRPARSER)
+
+static inline bool skb_bpf_strparser(const struct sk_buff *skb)
+{
+	unsigned long sk_redir = skb->_sk_redir;
+
+	return sk_redir & BPF_F_STRPARSER;
+}
+
+static inline void skb_bpf_set_strparser(struct sk_buff *skb)
+{
+	skb->_sk_redir |= BPF_F_STRPARSER;
+}
 
 static inline bool skb_bpf_ingress(const struct sk_buff *skb)
 {
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 2d6249b28928..2faf2fa110f2 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -494,6 +494,7 @@ static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
 }
 
 static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
+					u32 off, u32 len,
 					struct sk_psock *psock,
 					struct sock *sk,
 					struct sk_msg *msg)
@@ -507,11 +508,11 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
 	 */
 	if (skb_linearize(skb))
 		return -EAGAIN;
-	num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len);
+	num_sge = skb_to_sgvec(skb, msg->sg.data, off, len);
 	if (unlikely(num_sge < 0))
 		return num_sge;
 
-	copied = skb->len;
+	copied = len;
 	msg->sg.start = 0;
 	msg->sg.size = copied;
 	msg->sg.end = num_sge;
@@ -522,9 +523,11 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
 	return copied;
 }
 
-static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb);
+static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
+				     u32 off, u32 len);
 
-static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
+static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
+				u32 off, u32 len)
 {
 	struct sock *sk = psock->sk;
 	struct sk_msg *msg;
@@ -535,7 +538,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
 	 * correctly.
 	 */
 	if (unlikely(skb->sk == sk))
-		return sk_psock_skb_ingress_self(psock, skb);
+		return sk_psock_skb_ingress_self(psock, skb, off, len);
 	msg = sk_psock_create_ingress_msg(sk, skb);
 	if (!msg)
 		return -EAGAIN;
@@ -547,7 +550,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
 	 * into user buffers.
 	 */
 	skb_set_owner_r(skb, sk);
-	err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+	err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
 	if (err < 0)
 		kfree(msg);
 	return err;
@@ -557,7 +560,8 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
  * skb. In this case we do not need to check memory limits or skb_set_owner_r
  * because the skb is already accounted for here.
  */
-static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb)
+static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
+				     u32 off, u32 len)
 {
 	struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
 	struct sock *sk = psock->sk;
@@ -567,7 +571,7 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
 		return -EAGAIN;
 	sk_msg_init(msg);
 	skb_set_owner_r(skb, sk);
-	err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+	err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
 	if (err < 0)
 		kfree(msg);
 	return err;
@@ -581,7 +585,7 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
 			return -EAGAIN;
 		return skb_send_sock(psock->sk, skb, off, len);
 	}
-	return sk_psock_skb_ingress(psock, skb);
+	return sk_psock_skb_ingress(psock, skb, off, len);
 }
 
 static void sk_psock_skb_state(struct sk_psock *psock,
@@ -604,6 +608,9 @@ static void sk_psock_backlog(struct work_struct *work)
 {
 	struct sk_psock *psock = container_of(work, struct sk_psock, work);
 	struct sk_psock_work_state *state = &psock->work_state;
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
+	struct strp_msg *stm = NULL;
+#endif
 	struct sk_buff *skb = NULL;
 	bool ingress;
 	u32 len, off;
@@ -624,6 +631,13 @@ static void sk_psock_backlog(struct work_struct *work)
 	while ((skb = skb_dequeue(&psock->ingress_skb))) {
 		len = skb->len;
 		off = 0;
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
+		if (skb_bpf_strparser(skb)) {
+			stm = strp_msg(skb);
+			off = stm->offset;
+			len = stm->full_len;
+		}
+#endif
 start:
 		ingress = skb_bpf_ingress(skb);
 		skb_bpf_redirect_clear(skb);
@@ -930,6 +944,10 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
 {
 	struct sock *sk_other;
 	int err = 0;
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
+	struct strp_msg *stm = NULL;
+#endif
+	u32 len, off;
 
 	switch (verdict) {
 	case __SK_PASS:
@@ -949,7 +967,16 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
 		 * retrying later from workqueue.
 		 */
 		if (skb_queue_empty(&psock->ingress_skb)) {
-			err = sk_psock_skb_ingress_self(psock, skb);
+			len = skb->len;
+			off = 0;
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
+			if (skb_bpf_strparser(skb)) {
+				stm = strp_msg(skb);
+				off = stm->offset;
+				len = stm->full_len;
+			}
+#endif
+			err = sk_psock_skb_ingress_self(psock, skb, off, len);
 		}
 		if (err < 0) {
 			spin_lock_bh(&psock->ingress_lock);
@@ -1018,6 +1045,7 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
 		ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
 		skb->sk = NULL;
 	}
+	skb_bpf_set_strparser(skb);
 	sk_psock_verdict_apply(psock, skb, ret);
 out:
 	rcu_read_unlock();
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v3] skmsg: lose offset info in sk_psock_skb_ingress
  2021-09-22  9:32 [PATCH v3] skmsg: lose offset info in sk_psock_skb_ingress Liu Jian
@ 2021-09-28  5:52 ` Cong Wang
  2021-09-29  2:08   ` liujian (CE)
  0 siblings, 1 reply; 3+ messages in thread
From: Cong Wang @ 2021-09-28  5:52 UTC (permalink / raw)
  To: Liu Jian
  Cc: John Fastabend, Daniel Borkmann, Jakub Sitnicki, Lorenz Bauer,
	David Miller, Jakub Kicinski, Alexei Starovoitov,
	Andrii Nakryiko, Martin KaFai Lau, Song Liu, Yonghong Song,
	KP Singh, Linux Kernel Network Developers, bpf

On Wed, Sep 22, 2021 at 2:32 AM Liu Jian <liujian56@huawei.com> wrote:
>  static void sk_psock_skb_state(struct sk_psock *psock,
> @@ -604,6 +608,9 @@ static void sk_psock_backlog(struct work_struct *work)
>  {
>         struct sk_psock *psock = container_of(work, struct sk_psock, work);
>         struct sk_psock_work_state *state = &psock->work_state;
> +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
> +       struct strp_msg *stm = NULL;
> +#endif
>         struct sk_buff *skb = NULL;
>         bool ingress;
>         u32 len, off;
> @@ -624,6 +631,13 @@ static void sk_psock_backlog(struct work_struct *work)
>         while ((skb = skb_dequeue(&psock->ingress_skb))) {
>                 len = skb->len;
>                 off = 0;
> +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
> +               if (skb_bpf_strparser(skb)) {

If CONFIG_BPF_STREAM_PARSER is disabled, this
should always return false, hence you don't need this #ifdef.
Or alternatively, you can at least define for nop for
skb_bpf_strparser() if !CONFIG_BPF_STREAM_PARSER.
And you can move the above "stm" down here too.

(Ditto for the other place below.)

Thanks.

^ permalink raw reply	[flat|nested] 3+ messages in thread

* RE: [PATCH v3] skmsg: lose offset info in sk_psock_skb_ingress
  2021-09-28  5:52 ` Cong Wang
@ 2021-09-29  2:08   ` liujian (CE)
  0 siblings, 0 replies; 3+ messages in thread
From: liujian (CE) @ 2021-09-29  2:08 UTC (permalink / raw)
  To: Cong Wang
  Cc: John Fastabend, Daniel Borkmann, Jakub Sitnicki, Lorenz Bauer,
	David Miller, Jakub Kicinski, Alexei Starovoitov,
	Andrii Nakryiko, Martin KaFai Lau, Song Liu, Yonghong Song,
	KP Singh, Linux Kernel Network Developers, bpf



> -----Original Message-----
> From: Cong Wang [mailto:xiyou.wangcong@gmail.com]
> Sent: Tuesday, September 28, 2021 1:52 PM
> To: liujian (CE) <liujian56@huawei.com>
> Cc: John Fastabend <john.fastabend@gmail.com>; Daniel Borkmann
> <daniel@iogearbox.net>; Jakub Sitnicki <jakub@cloudflare.com>; Lorenz
> Bauer <lmb@cloudflare.com>; David Miller <davem@davemloft.net>; Jakub
> Kicinski <kuba@kernel.org>; Alexei Starovoitov <ast@kernel.org>; Andrii
> Nakryiko <andrii@kernel.org>; Martin KaFai Lau <kafai@fb.com>; Song Liu
> <songliubraving@fb.com>; Yonghong Song <yhs@fb.com>; KP Singh
> <kpsingh@kernel.org>; Linux Kernel Network Developers
> <netdev@vger.kernel.org>; bpf <bpf@vger.kernel.org>
> Subject: Re: [PATCH v3] skmsg: lose offset info in sk_psock_skb_ingress
> 
> On Wed, Sep 22, 2021 at 2:32 AM Liu Jian <liujian56@huawei.com> wrote:
> >  static void sk_psock_skb_state(struct sk_psock *psock, @@ -604,6
> > +608,9 @@ static void sk_psock_backlog(struct work_struct *work)  {
> >         struct sk_psock *psock = container_of(work, struct sk_psock, work);
> >         struct sk_psock_work_state *state = &psock->work_state;
> > +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
> > +       struct strp_msg *stm = NULL;
> > +#endif
> >         struct sk_buff *skb = NULL;
> >         bool ingress;
> >         u32 len, off;
> > @@ -624,6 +631,13 @@ static void sk_psock_backlog(struct work_struct
> *work)
> >         while ((skb = skb_dequeue(&psock->ingress_skb))) {
> >                 len = skb->len;
> >                 off = 0;
> > +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
> > +               if (skb_bpf_strparser(skb)) {
> 
> If CONFIG_BPF_STREAM_PARSER is disabled, this should always return false,
> hence you don't need this #ifdef.
> Or alternatively, you can at least define for nop for
> skb_bpf_strparser() if !CONFIG_BPF_STREAM_PARSER.
> And you can move the above "stm" down here too.
> 
V4 has been sent, thank you~

> (Ditto for the other place below.)
> 
> Thanks.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-09-29  2:08 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-22  9:32 [PATCH v3] skmsg: lose offset info in sk_psock_skb_ingress Liu Jian
2021-09-28  5:52 ` Cong Wang
2021-09-29  2:08   ` liujian (CE)

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).