All of lore.kernel.org
 help / color / mirror / Atom feed
From: Cong Wang <xiyou.wangcong@gmail.com>
To: netdev@vger.kernel.org
Cc: bpf@vger.kernel.org, duanxiongchun@bytedance.com,
	wangdongdong.6@bytedance.com, jiang.wang@bytedance.com,
	Cong Wang <cong.wang@bytedance.com>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Lorenz Bauer <lmb@cloudflare.com>,
	Jakub Sitnicki <jakub@cloudflare.com>,
	John Fastabend <john.fastabend@gmail.com>
Subject: [Patch bpf-next v8 02/16] skmsg: introduce a spinlock to protect ingress_msg
Date: Tue, 30 Mar 2021 19:32:23 -0700	[thread overview]
Message-ID: <20210331023237.41094-3-xiyou.wangcong@gmail.com> (raw)
In-Reply-To: <20210331023237.41094-1-xiyou.wangcong@gmail.com>

From: Cong Wang <cong.wang@bytedance.com>

Currently we rely on lock_sock to protect ingress_msg,
it is too big for this, we can actually just use a spinlock
to protect this list like protecting other skb queues.

__tcp_bpf_recvmsg() is still special because of peeking,
it still has to use lock_sock.

Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Lorenz Bauer <lmb@cloudflare.com>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
 include/linux/skmsg.h | 46 +++++++++++++++++++++++++++++++++++++++++++
 net/core/skmsg.c      |  3 +++
 net/ipv4/tcp_bpf.c    | 18 ++++++-----------
 3 files changed, 55 insertions(+), 12 deletions(-)

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 6c09d94be2e9..f2d45a73b2b2 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -89,6 +89,7 @@ struct sk_psock {
 #endif
 	struct sk_buff_head		ingress_skb;
 	struct list_head		ingress_msg;
+	spinlock_t			ingress_lock;
 	unsigned long			state;
 	struct list_head		link;
 	spinlock_t			link_lock;
@@ -284,7 +285,45 @@ static inline struct sk_psock *sk_psock(const struct sock *sk)
 static inline void sk_psock_queue_msg(struct sk_psock *psock,
 				      struct sk_msg *msg)
 {
+	spin_lock_bh(&psock->ingress_lock);
 	list_add_tail(&msg->list, &psock->ingress_msg);
+	spin_unlock_bh(&psock->ingress_lock);
+}
+
+static inline struct sk_msg *sk_psock_dequeue_msg(struct sk_psock *psock)
+{
+	struct sk_msg *msg;
+
+	spin_lock_bh(&psock->ingress_lock);
+	msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list);
+	if (msg)
+		list_del(&msg->list);
+	spin_unlock_bh(&psock->ingress_lock);
+	return msg;
+}
+
+static inline struct sk_msg *sk_psock_peek_msg(struct sk_psock *psock)
+{
+	struct sk_msg *msg;
+
+	spin_lock_bh(&psock->ingress_lock);
+	msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list);
+	spin_unlock_bh(&psock->ingress_lock);
+	return msg;
+}
+
+static inline struct sk_msg *sk_psock_next_msg(struct sk_psock *psock,
+					       struct sk_msg *msg)
+{
+	struct sk_msg *ret;
+
+	spin_lock_bh(&psock->ingress_lock);
+	if (list_is_last(&msg->list, &psock->ingress_msg))
+		ret = NULL;
+	else
+		ret = list_next_entry(msg, list);
+	spin_unlock_bh(&psock->ingress_lock);
+	return ret;
 }
 
 static inline bool sk_psock_queue_empty(const struct sk_psock *psock)
@@ -292,6 +331,13 @@ static inline bool sk_psock_queue_empty(const struct sk_psock *psock)
 	return psock ? list_empty(&psock->ingress_msg) : true;
 }
 
+static inline void kfree_sk_msg(struct sk_msg *msg)
+{
+	if (msg->skb)
+		consume_skb(msg->skb);
+	kfree(msg);
+}
+
 static inline void sk_psock_report_error(struct sk_psock *psock, int err)
 {
 	struct sock *sk = psock->sk;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index bebf84ed4e30..305dddc51857 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -592,6 +592,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
 
 	INIT_WORK(&psock->work, sk_psock_backlog);
 	INIT_LIST_HEAD(&psock->ingress_msg);
+	spin_lock_init(&psock->ingress_lock);
 	skb_queue_head_init(&psock->ingress_skb);
 
 	sk_psock_set_state(psock, SK_PSOCK_TX_ENABLED);
@@ -638,7 +639,9 @@ static void sk_psock_zap_ingress(struct sk_psock *psock)
 		skb_bpf_redirect_clear(skb);
 		kfree_skb(skb);
 	}
+	spin_lock_bh(&psock->ingress_lock);
 	__sk_psock_purge_ingress_msg(psock);
+	spin_unlock_bh(&psock->ingress_lock);
 }
 
 static void sk_psock_link_destroy(struct sk_psock *psock)
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 17c322b875fd..ae980716d896 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -18,9 +18,7 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
 	struct sk_msg *msg_rx;
 	int i, copied = 0;
 
-	msg_rx = list_first_entry_or_null(&psock->ingress_msg,
-					  struct sk_msg, list);
-
+	msg_rx = sk_psock_peek_msg(psock);
 	while (copied != len) {
 		struct scatterlist *sge;
 
@@ -68,22 +66,18 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
 		} while (i != msg_rx->sg.end);
 
 		if (unlikely(peek)) {
-			if (msg_rx == list_last_entry(&psock->ingress_msg,
-						      struct sk_msg, list))
+			msg_rx = sk_psock_next_msg(psock, msg_rx);
+			if (!msg_rx)
 				break;
-			msg_rx = list_next_entry(msg_rx, list);
 			continue;
 		}
 
 		msg_rx->sg.start = i;
 		if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) {
-			list_del(&msg_rx->list);
-			if (msg_rx->skb)
-				consume_skb(msg_rx->skb);
-			kfree(msg_rx);
+			msg_rx = sk_psock_dequeue_msg(psock);
+			kfree_sk_msg(msg_rx);
 		}
-		msg_rx = list_first_entry_or_null(&psock->ingress_msg,
-						  struct sk_msg, list);
+		msg_rx = sk_psock_peek_msg(psock);
 	}
 
 	return copied;
-- 
2.25.1


  parent reply	other threads:[~2021-03-31  2:33 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-31  2:32 [Patch bpf-next v8 00/16] sockmap: introduce BPF_SK_SKB_VERDICT and support UDP Cong Wang
2021-03-31  2:32 ` [Patch bpf-next v8 01/16] skmsg: lock ingress_skb when purging Cong Wang
2021-03-31 22:00   ` John Fastabend
2021-03-31  2:32 ` Cong Wang [this message]
2021-03-31  2:32 ` [Patch bpf-next v8 03/16] net: introduce skb_send_sock() for sock_map Cong Wang
2021-04-01  8:10   ` Jakub Sitnicki
2021-03-31  2:32 ` [Patch bpf-next v8 04/16] skmsg: avoid lock_sock() in sk_psock_backlog() Cong Wang
2021-03-31  2:32 ` [Patch bpf-next v8 05/16] skmsg: use rcu work for destroying psock Cong Wang
2021-03-31  2:32 ` [Patch bpf-next v8 06/16] skmsg: use GFP_KERNEL in sk_psock_create_ingress_msg() Cong Wang
2021-03-31  2:32 ` [Patch bpf-next v8 07/16] sock_map: simplify sock_map_link() a bit Cong Wang
2021-04-01  5:48   ` John Fastabend
2021-03-31  2:32 ` [Patch bpf-next v8 08/16] sock_map: kill sock_map_link_no_progs() Cong Wang
2021-03-31  2:32 ` [Patch bpf-next v8 09/16] sock_map: introduce BPF_SK_SKB_VERDICT Cong Wang
2021-04-01  5:51   ` John Fastabend
2021-03-31  2:32 ` [Patch bpf-next v8 10/16] sock: introduce sk->sk_prot->psock_update_sk_prot() Cong Wang
2021-04-02 10:16   ` Jakub Sitnicki
2021-04-03  5:13     ` Cong Wang
2021-04-05  8:25   ` Eric Dumazet
2021-04-06 18:12     ` John Fastabend
2021-04-06 18:30     ` Cong Wang
2021-04-06 21:07       ` John Fastabend
2021-03-31  2:32 ` [Patch bpf-next v8 11/16] udp: implement ->read_sock() for sockmap Cong Wang
2021-04-01  6:00   ` John Fastabend
2021-04-03  5:08     ` Cong Wang
2021-04-03  6:45       ` Alexei Starovoitov
2021-03-31  2:32 ` [Patch bpf-next v8 12/16] skmsg: extract __tcp_bpf_recvmsg() and tcp_bpf_wait_data() Cong Wang
2021-04-01 16:36   ` John Fastabend
2021-03-31  2:32 ` [Patch bpf-next v8 13/16] udp: implement udp_bpf_recvmsg() for sockmap Cong Wang
2021-04-01 16:24   ` John Fastabend
2021-03-31  2:32 ` [Patch bpf-next v8 14/16] sock_map: update sock type checks for UDP Cong Wang
2021-04-01  6:02   ` John Fastabend
2021-03-31  2:32 ` [Patch bpf-next v8 15/16] selftests/bpf: add a test case for udp sockmap Cong Wang
2021-03-31  2:32 ` [Patch bpf-next v8 16/16] selftests/bpf: add a test case for loading BPF_SK_SKB_VERDICT Cong Wang
2021-04-01 16:51 ` [Patch bpf-next v8 00/16] sockmap: introduce BPF_SK_SKB_VERDICT and support UDP John Fastabend
2021-04-01 18:03   ` Alexei Starovoitov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210331023237.41094-3-xiyou.wangcong@gmail.com \
    --to=xiyou.wangcong@gmail.com \
    --cc=bpf@vger.kernel.org \
    --cc=cong.wang@bytedance.com \
    --cc=daniel@iogearbox.net \
    --cc=duanxiongchun@bytedance.com \
    --cc=jakub@cloudflare.com \
    --cc=jiang.wang@bytedance.com \
    --cc=john.fastabend@gmail.com \
    --cc=lmb@cloudflare.com \
    --cc=netdev@vger.kernel.org \
    --cc=wangdongdong.6@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.