From: Eric Dumazet <eric.dumazet@gmail.com>
To: Cong Wang <xiyou.wangcong@gmail.com>, netdev@vger.kernel.org
Cc: bpf@vger.kernel.org, Cong Wang <cong.wang@bytedance.com>,
John Fastabend <john.fastabend@gmail.com>,
Daniel Borkmann <daniel@iogearbox.net>,
Jakub Sitnicki <jakub@cloudflare.com>,
Lorenz Bauer <lmb@cloudflare.com>
Subject: Re: [PATCH bpf-next v5 07/11] af_unix: implement unix_dgram_bpf_recvmsg()
Date: Sun, 18 Jul 2021 19:49:34 +0200 [thread overview]
Message-ID: <a76f89b3-0911-e1f1-d1c1-707b9bc5478a@gmail.com> (raw)
In-Reply-To: <20210704190252.11866-8-xiyou.wangcong@gmail.com>
On 7/4/21 9:02 PM, Cong Wang wrote:
> From: Cong Wang <cong.wang@bytedance.com>
>
> We have to implement unix_dgram_bpf_recvmsg() to replace the
> original ->recvmsg() to retrieve skmsg from ingress_msg.
>
> AF_UNIX is again special here because the lack of
> sk_prot->recvmsg(). I simply add a special case inside
> unix_dgram_recvmsg() to call sk->sk_prot->recvmsg() directly.
>
> Cc: John Fastabend <john.fastabend@gmail.com>
> Cc: Daniel Borkmann <daniel@iogearbox.net>
> Cc: Jakub Sitnicki <jakub@cloudflare.com>
> Cc: Lorenz Bauer <lmb@cloudflare.com>
> Signed-off-by: Cong Wang <cong.wang@bytedance.com>
> ---
> include/net/af_unix.h | 2 ++
> net/unix/af_unix.c | 19 +++++++++--
> net/unix/unix_bpf.c | 75 +++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 93 insertions(+), 3 deletions(-)
>
> diff --git a/include/net/af_unix.h b/include/net/af_unix.h
> index cca645846af1..435a2c3d5a6f 100644
> --- a/include/net/af_unix.h
> +++ b/include/net/af_unix.h
> @@ -82,6 +82,8 @@ static inline struct unix_sock *unix_sk(const struct sock *sk)
> long unix_inq_len(struct sock *sk);
> long unix_outq_len(struct sock *sk);
>
> +int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
> + int flags);
> #ifdef CONFIG_SYSCTL
> int unix_sysctl_register(struct net *net);
> void unix_sysctl_unregister(struct net *net);
> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> index 573253c5b5c2..89927678c0dc 100644
> --- a/net/unix/af_unix.c
> +++ b/net/unix/af_unix.c
> @@ -2098,11 +2098,11 @@ static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
> }
> }
>
> -static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
> - size_t size, int flags)
> +int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
> + int flags)
> {
> struct scm_cookie scm;
> - struct sock *sk = sock->sk;
> + struct socket *sock = sk->sk_socket;
> struct unix_sock *u = unix_sk(sk);
> struct sk_buff *skb, *last;
> long timeo;
> @@ -2205,6 +2205,19 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
> return err;
> }
>
> +static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
> + int flags)
> +{
> + struct sock *sk = sock->sk;
> +
> +#ifdef CONFIG_BPF_SYSCALL
> + if (sk->sk_prot != &unix_proto)
> + return sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
> + flags & ~MSG_DONTWAIT, NULL);
> +#endif
> + return __unix_dgram_recvmsg(sk, msg, size, flags);
> +}
> +
> static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
> sk_read_actor_t recv_actor)
> {
> diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c
> index b1582a659427..db0cda29fb2f 100644
> --- a/net/unix/unix_bpf.c
> +++ b/net/unix/unix_bpf.c
> @@ -6,6 +6,80 @@
> #include <net/sock.h>
> #include <net/af_unix.h>
>
> +#define unix_sk_has_data(__sk, __psock) \
> + ({ !skb_queue_empty(&__sk->sk_receive_queue) || \
> + !skb_queue_empty(&__psock->ingress_skb) || \
> + !list_empty(&__psock->ingress_msg); \
> + })
> +
> +static int unix_msg_wait_data(struct sock *sk, struct sk_psock *psock,
> + long timeo)
> +{
> + DEFINE_WAIT_FUNC(wait, woken_wake_function);
> + struct unix_sock *u = unix_sk(sk);
> + int ret = 0;
> +
> + if (sk->sk_shutdown & RCV_SHUTDOWN)
> + return 1;
> +
> + if (!timeo)
> + return ret;
> +
> + add_wait_queue(sk_sleep(sk), &wait);
> + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
> + if (!unix_sk_has_data(sk, psock)) {
> + mutex_unlock(&u->iolock);
> + wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
> + mutex_lock(&u->iolock);
> + ret = unix_sk_has_data(sk, psock);
> + }
> + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
> + remove_wait_queue(sk_sleep(sk), &wait);
> + return ret;
> +}
> +
> +static int unix_dgram_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
> + size_t len, int nonblock, int flags,
> + int *addr_len)
> +{
> + struct unix_sock *u = unix_sk(sk);
> + struct sk_psock *psock;
> + int copied, ret;
> +
> + psock = sk_psock_get(sk);
> + if (unlikely(!psock))
> + return __unix_dgram_recvmsg(sk, msg, len, flags);
> +
> + mutex_lock(&u->iolock);
u->iolock mutex is owned here.
> + if (!skb_queue_empty(&sk->sk_receive_queue) &&
> + sk_psock_queue_empty(psock)) {
> + ret = __unix_dgram_recvmsg(sk, msg, len, flags);
But __unix_dgram_recvmsg() will also try to grab this mutex ?
> + goto out;
> + }
> +
> +msg_bytes_ready:
> + copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
> + if (!copied) {
> + long timeo;
> + int data;
> +
> + timeo = sock_rcvtimeo(sk, nonblock);
> + data = unix_msg_wait_data(sk, psock, timeo);
> + if (data) {
> + if (!sk_psock_queue_empty(psock))
> + goto msg_bytes_ready;
> + ret = __unix_dgram_recvmsg(sk, msg, len, flags);
> + goto out;
> + }
> + copied = -EAGAIN;
> + }
> + ret = copied;
> +out:
> + mutex_unlock(&u->iolock);
> + sk_psock_put(sk, psock);
> + return ret;
> +}
> +
> static struct proto *unix_prot_saved __read_mostly;
> static DEFINE_SPINLOCK(unix_prot_lock);
> static struct proto unix_bpf_prot;
> @@ -14,6 +88,7 @@ static void unix_bpf_rebuild_protos(struct proto *prot, const struct proto *base
> {
> *prot = *base;
> prot->close = sock_map_close;
> + prot->recvmsg = unix_dgram_bpf_recvmsg;
> }
>
> static void unix_bpf_check_needs_rebuild(struct proto *ops)
>
next prev parent reply other threads:[~2021-07-18 17:49 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-07-04 19:02 [PATCH bpf-next v5 00/11] sockmap: add sockmap support for unix datagram socket Cong Wang
2021-07-04 19:02 ` [PATCH bpf-next v5 01/11] sock_map: relax config dependency to CONFIG_NET Cong Wang
2021-07-04 19:02 ` [PATCH bpf-next v5 02/11] sock_map: lift socket state restriction for datagram sockets Cong Wang
2021-07-04 19:02 ` [PATCH bpf-next v5 03/11] af_unix: implement ->read_sock() for sockmap Cong Wang
2021-07-12 17:04 ` John Fastabend
2021-07-04 19:02 ` [PATCH bpf-next v5 04/11] af_unix: set TCP_ESTABLISHED for datagram sockets too Cong Wang
2021-07-04 19:02 ` [PATCH bpf-next v5 05/11] af_unix: add a dummy ->close() for sockmap Cong Wang
2021-07-04 19:02 ` [PATCH bpf-next v5 06/11] af_unix: implement ->psock_update_sk_prot() Cong Wang
2021-07-04 19:02 ` [PATCH bpf-next v5 07/11] af_unix: implement unix_dgram_bpf_recvmsg() Cong Wang
2021-07-18 17:49 ` Eric Dumazet [this message]
2021-07-20 0:03 ` Cong Wang
2021-07-04 19:02 ` [PATCH bpf-next v5 08/11] selftests/bpf: factor out udp_socketpair() Cong Wang
2021-07-04 19:02 ` [PATCH bpf-next v5 09/11] selftests/bpf: factor out add_to_sockmap() Cong Wang
2021-07-04 19:02 ` [PATCH bpf-next v5 10/11] selftests/bpf: add a test case for unix sockmap Cong Wang
2021-07-04 19:02 ` [PATCH bpf-next v5 11/11] selftests/bpf: add test cases for redirection between udp and unix Cong Wang
2021-08-05 22:43 ` Andrii Nakryiko
2021-08-06 2:34 ` Cong Wang
2021-07-12 17:02 ` [PATCH bpf-next v5 00/11] sockmap: add sockmap support for unix datagram socket John Fastabend
2021-07-16 1:31 ` Alexei Starovoitov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=a76f89b3-0911-e1f1-d1c1-707b9bc5478a@gmail.com \
--to=eric.dumazet@gmail.com \
--cc=bpf@vger.kernel.org \
--cc=cong.wang@bytedance.com \
--cc=daniel@iogearbox.net \
--cc=jakub@cloudflare.com \
--cc=john.fastabend@gmail.com \
--cc=lmb@cloudflare.com \
--cc=netdev@vger.kernel.org \
--cc=xiyou.wangcong@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).