From: Cong Wang <xiyou.wangcong@gmail.com>
To: netdev@vger.kernel.org
Cc: bpf@vger.kernel.org, Cong Wang <cong.wang@bytedance.com>,
Yucong Sun <sunyucong@gmail.com>,
John Fastabend <john.fastabend@gmail.com>,
Daniel Borkmann <daniel@iogearbox.net>,
Jakub Sitnicki <jakub@cloudflare.com>,
Lorenz Bauer <lmb@cloudflare.com>
Subject: [Patch bpf v2 3/4] net: implement ->sock_is_readable for UDP and AF_UNIX
Date: Mon, 27 Sep 2021 17:22:11 -0700 [thread overview]
Message-ID: <20210928002212.14498-4-xiyou.wangcong@gmail.com> (raw)
In-Reply-To: <20210928002212.14498-1-xiyou.wangcong@gmail.com>
From: Cong Wang <cong.wang@bytedance.com>
Yucong noticed we can't poll() sockets in sockmap even when
they are the destination sockets of redirections. This is
because we never poll any psock queues in ->poll(), except
for TCP. Now we can overwrite >sock_is_readable() and
implement and invoke it for UDP and AF_UNIX sockets.
Reported-by: Yucong Sun <sunyucong@gmail.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jakub Sitnicki <jakub@cloudflare.com>
Cc: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
include/linux/skmsg.h | 1 +
net/core/skmsg.c | 14 ++++++++++++++
net/ipv4/udp.c | 2 ++
net/ipv4/udp_bpf.c | 1 +
net/unix/af_unix.c | 4 ++++
net/unix/unix_bpf.c | 2 ++
6 files changed, 24 insertions(+)
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 8f577739fc36..a25434207dca 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -128,6 +128,7 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
struct sk_msg *msg, u32 bytes);
int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
int len, int flags);
+bool sk_msg_is_readable(struct sock *sk);
static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes)
{
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 2d6249b28928..93ae48581ad2 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -474,6 +474,20 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
}
EXPORT_SYMBOL_GPL(sk_msg_recvmsg);
+bool sk_msg_is_readable(struct sock *sk)
+{
+ struct sk_psock *psock;
+ bool empty = true;
+
+ psock = sk_psock_get_checked(sk);
+ if (IS_ERR_OR_NULL(psock))
+ return false;
+ empty = sk_psock_queue_empty(psock);
+ sk_psock_put(sk, psock);
+ return !empty;
+}
+EXPORT_SYMBOL_GPL(sk_msg_is_readable);
+
static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
struct sk_buff *skb)
{
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8851c9463b4b..9f49c0967504 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2866,6 +2866,8 @@ __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait)
!(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1)
mask &= ~(EPOLLIN | EPOLLRDNORM);
+ if (sk_is_readable(sk))
+ mask |= EPOLLIN | EPOLLRDNORM;
return mask;
}
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index 7a1d5f473878..bbe6569c9ad3 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -114,6 +114,7 @@ static void udp_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
*prot = *base;
prot->close = sock_map_close;
prot->recvmsg = udp_bpf_recvmsg;
+ prot->sock_is_readable = sk_msg_is_readable;
}
static void udp_bpf_check_v6_needs_rebuild(struct proto *ops)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 92345c9bb60c..f1cbaa0ccf6b 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -3014,6 +3014,8 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
/* readable? */
if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
mask |= EPOLLIN | EPOLLRDNORM;
+ if (sk_is_readable(sk))
+ mask |= EPOLLIN | EPOLLRDNORM;
/* Connection-based need to check for termination and startup */
if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
@@ -3053,6 +3055,8 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
/* readable? */
if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
mask |= EPOLLIN | EPOLLRDNORM;
+ if (sk_is_readable(sk))
+ mask |= EPOLLIN | EPOLLRDNORM;
/* Connection-based need to check for termination and startup */
if (sk->sk_type == SOCK_SEQPACKET) {
diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c
index b927e2baae50..452376c6f419 100644
--- a/net/unix/unix_bpf.c
+++ b/net/unix/unix_bpf.c
@@ -102,6 +102,7 @@ static void unix_dgram_bpf_rebuild_protos(struct proto *prot, const struct proto
*prot = *base;
prot->close = sock_map_close;
prot->recvmsg = unix_bpf_recvmsg;
+ prot->sock_is_readable = sk_msg_is_readable;
}
static void unix_stream_bpf_rebuild_protos(struct proto *prot,
@@ -110,6 +111,7 @@ static void unix_stream_bpf_rebuild_protos(struct proto *prot,
*prot = *base;
prot->close = sock_map_close;
prot->recvmsg = unix_bpf_recvmsg;
+ prot->sock_is_readable = sk_msg_is_readable;
prot->unhash = sock_map_unhash;
}
--
2.30.2
next prev parent reply other threads:[~2021-09-28 0:22 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-09-28 0:22 [Patch bpf v2 0/4] sock_map: fix ->poll() and update selftests Cong Wang
2021-09-28 0:22 ` [Patch bpf v2 1/4] skmsg: introduce sk_psock_get_checked() Cong Wang
2021-09-28 0:22 ` [Patch bpf v2 2/4] net: rename ->stream_memory_read to ->sock_is_readable Cong Wang
2021-09-28 0:22 ` Cong Wang [this message]
2021-09-30 21:44 ` [Patch bpf v2 3/4] net: implement ->sock_is_readable for UDP and AF_UNIX John Fastabend
2021-10-02 0:00 ` Cong Wang
2021-09-28 0:22 ` [Patch bpf v2 4/4] selftests/bpf: use recv_timeout() instead of retries Cong Wang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210928002212.14498-4-xiyou.wangcong@gmail.com \
--to=xiyou.wangcong@gmail.com \
--cc=bpf@vger.kernel.org \
--cc=cong.wang@bytedance.com \
--cc=daniel@iogearbox.net \
--cc=jakub@cloudflare.com \
--cc=john.fastabend@gmail.com \
--cc=lmb@cloudflare.com \
--cc=netdev@vger.kernel.org \
--cc=sunyucong@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).