* [Patch bpf-next v4 01/12] sock_map: relax config dependency to CONFIG_NET
2021-05-08 22:08 [Patch bpf-next v4 00/12] sockmap: add sockmap support for unix datagram socket Cong Wang
@ 2021-05-08 22:08 ` Cong Wang
2021-05-08 22:08 ` [Patch bpf-next v4 02/12] af_unix: implement ->read_sock() for sockmap Cong Wang
` (10 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Cong Wang @ 2021-05-08 22:08 UTC (permalink / raw)
To: netdev
Cc: bpf, jiang.wang, duanxiongchun, wangdongdong.6, Cong Wang,
John Fastabend, Daniel Borkmann, Jakub Sitnicki, Lorenz Bauer
From: Cong Wang <cong.wang@bytedance.com>
Currently sock_map still has hard dependency on CONFIG_INET,
but there is no actual functional dependency on it after
we introduce ->psock_update_sk_prot().
We have to extend it to CONFIG_NET now as we are going to
support AF_UNIX.
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jakub Sitnicki <jakub@cloudflare.com>
Cc: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
include/linux/bpf.h | 38 ++++++++++++++++++++------------------
init/Kconfig | 2 +-
net/core/Makefile | 2 --
3 files changed, 21 insertions(+), 21 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 02b02cb29ce2..b818003739e8 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1826,6 +1826,12 @@ static inline bool bpf_map_is_dev_bound(struct bpf_map *map)
struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr);
void bpf_map_offload_map_free(struct bpf_map *map);
+
+int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
+int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
+int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
+void sock_map_unhash(struct sock *sk);
+void sock_map_close(struct sock *sk, long timeout);
#else
static inline int bpf_prog_offload_init(struct bpf_prog *prog,
union bpf_attr *attr)
@@ -1851,24 +1857,6 @@ static inline struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
static inline void bpf_map_offload_map_free(struct bpf_map *map)
{
}
-#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
-
-#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
-int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
-int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
-int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
-void sock_map_unhash(struct sock *sk);
-void sock_map_close(struct sock *sk, long timeout);
-
-void bpf_sk_reuseport_detach(struct sock *sk);
-int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
- void *value);
-int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
- void *value, u64 map_flags);
-#else
-static inline void bpf_sk_reuseport_detach(struct sock *sk)
-{
-}
#ifdef CONFIG_BPF_SYSCALL
static inline int sock_map_get_from_fd(const union bpf_attr *attr,
@@ -1888,7 +1876,21 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
{
return -EOPNOTSUPP;
}
+#endif /* CONFIG_BPF_SYSCALL */
+#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
+#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
+void bpf_sk_reuseport_detach(struct sock *sk);
+int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
+ void *value);
+int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 map_flags);
+#else
+static inline void bpf_sk_reuseport_detach(struct sock *sk)
+{
+}
+
+#ifdef CONFIG_BPF_SYSCALL
static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map,
void *key, void *value)
{
diff --git a/init/Kconfig b/init/Kconfig
index ca559ccdaa32..a8d9b0e05da1 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1723,7 +1723,7 @@ config BPF_SYSCALL
select IRQ_WORK
select TASKS_TRACE_RCU
select BINARY_PRINTF
- select NET_SOCK_MSG if INET
+ select NET_SOCK_MSG if NET
default n
help
Enable the bpf() system call that allows to manipulate eBPF
diff --git a/net/core/Makefile b/net/core/Makefile
index f7f16650fe9e..35ced6201814 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -33,8 +33,6 @@ obj-$(CONFIG_HWBM) += hwbm.o
obj-$(CONFIG_NET_DEVLINK) += devlink.o
obj-$(CONFIG_GRO_CELLS) += gro_cells.o
obj-$(CONFIG_FAILOVER) += failover.o
-ifeq ($(CONFIG_INET),y)
obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
-endif
obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
--
2.25.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Patch bpf-next v4 02/12] af_unix: implement ->read_sock() for sockmap
2021-05-08 22:08 [Patch bpf-next v4 00/12] sockmap: add sockmap support for unix datagram socket Cong Wang
2021-05-08 22:08 ` [Patch bpf-next v4 01/12] sock_map: relax config dependency to CONFIG_NET Cong Wang
@ 2021-05-08 22:08 ` Cong Wang
2021-05-18 4:51 ` Cong Wang
2021-05-08 22:08 ` [Patch bpf-next v4 03/12] af_unix: set TCP_ESTABLISHED for datagram sockets too Cong Wang
` (9 subsequent siblings)
11 siblings, 1 reply; 14+ messages in thread
From: Cong Wang @ 2021-05-08 22:08 UTC (permalink / raw)
To: netdev
Cc: bpf, jiang.wang, duanxiongchun, wangdongdong.6, Cong Wang,
John Fastabend, Daniel Borkmann, Jakub Sitnicki, Lorenz Bauer
From: Cong Wang <cong.wang@bytedance.com>
Implement ->read_sock() for AF_UNIX datagram socket, it is
pretty much similar to udp_read_sock().
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jakub Sitnicki <jakub@cloudflare.com>
Cc: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
net/unix/af_unix.c | 35 +++++++++++++++++++++++++++++++++++
1 file changed, 35 insertions(+)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 5a31307ceb76..26d18f948737 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -661,6 +661,8 @@ static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
unsigned int flags);
static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
+static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor);
static int unix_dgram_connect(struct socket *, struct sockaddr *,
int, int);
static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
@@ -738,6 +740,7 @@ static const struct proto_ops unix_dgram_ops = {
.listen = sock_no_listen,
.shutdown = unix_shutdown,
.sendmsg = unix_dgram_sendmsg,
+ .read_sock = unix_read_sock,
.recvmsg = unix_dgram_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
@@ -2183,6 +2186,38 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
return err;
}
+static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor)
+{
+ int copied = 0;
+
+ while (1) {
+ struct unix_sock *u = unix_sk(sk);
+ struct sk_buff *skb;
+ int used, err;
+
+ mutex_lock(&u->iolock);
+ skb = skb_recv_datagram(sk, 0, 1, &err);
+ mutex_unlock(&u->iolock);
+ if (!skb)
+ return err;
+
+ used = recv_actor(desc, skb, 0, skb->len);
+ if (used <= 0) {
+ if (!copied)
+ copied = used;
+ break;
+ } else if (used <= skb->len) {
+ copied += used;
+ }
+
+ if (!desc->count)
+ break;
+ }
+
+ return copied;
+}
+
/*
* Sleep until more data has arrived. But check for races..
*/
--
2.25.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [Patch bpf-next v4 02/12] af_unix: implement ->read_sock() for sockmap
2021-05-08 22:08 ` [Patch bpf-next v4 02/12] af_unix: implement ->read_sock() for sockmap Cong Wang
@ 2021-05-18 4:51 ` Cong Wang
0 siblings, 0 replies; 14+ messages in thread
From: Cong Wang @ 2021-05-18 4:51 UTC (permalink / raw)
To: Linux Kernel Network Developers
Cc: bpf, Jiang Wang, Xiongchun Duan, Dongdong Wang, Cong Wang,
John Fastabend, Daniel Borkmann, Jakub Sitnicki, Lorenz Bauer
On Sat, May 8, 2021 at 3:09 PM Cong Wang <xiyou.wangcong@gmail.com> wrote:
> +static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
> + sk_read_actor_t recv_actor)
> +{
> + int copied = 0;
> +
> + while (1) {
> + struct unix_sock *u = unix_sk(sk);
> + struct sk_buff *skb;
> + int used, err;
> +
> + mutex_lock(&u->iolock);
> + skb = skb_recv_datagram(sk, 0, 1, &err);
> + mutex_unlock(&u->iolock);
> + if (!skb)
> + return err;
> +
> + used = recv_actor(desc, skb, 0, skb->len);
> + if (used <= 0) {
> + if (!copied)
> + copied = used;
> + break;
> + } else if (used <= skb->len) {
> + copied += used;
> + }
> +
Like udp_read_sock(), we should also free the skb here.
I will update this and send V5 soon.
Thanks.
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Patch bpf-next v4 03/12] af_unix: set TCP_ESTABLISHED for datagram sockets too
2021-05-08 22:08 [Patch bpf-next v4 00/12] sockmap: add sockmap support for unix datagram socket Cong Wang
2021-05-08 22:08 ` [Patch bpf-next v4 01/12] sock_map: relax config dependency to CONFIG_NET Cong Wang
2021-05-08 22:08 ` [Patch bpf-next v4 02/12] af_unix: implement ->read_sock() for sockmap Cong Wang
@ 2021-05-08 22:08 ` Cong Wang
2021-05-08 22:08 ` [Patch bpf-next v4 04/12] sock_map: export symbols for af_unix module Cong Wang
` (8 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Cong Wang @ 2021-05-08 22:08 UTC (permalink / raw)
To: netdev
Cc: bpf, jiang.wang, duanxiongchun, wangdongdong.6, Cong Wang,
John Fastabend, Daniel Borkmann, Jakub Sitnicki, Lorenz Bauer
From: Cong Wang <cong.wang@bytedance.com>
Currently only unix stream socket sets TCP_ESTABLISHED,
datagram socket can set this too when they connect to its
peer socket. At least __ip4_datagram_connect() does the same.
This will be used by a later patch to determine whether an
AF_UNIX datagram socket can be redirected in sockmap.
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jakub Sitnicki <jakub@cloudflare.com>
Cc: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
net/unix/af_unix.c | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 26d18f948737..e08918c45892 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -487,6 +487,7 @@ static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
other->sk_error_report(other);
}
}
+ sk->sk_state = other->sk_state = TCP_CLOSE;
}
static void unix_sock_destructor(struct sock *sk)
@@ -1197,6 +1198,9 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
unix_peer(sk) = other;
unix_state_double_unlock(sk, other);
}
+
+ if (unix_peer(sk))
+ sk->sk_state = other->sk_state = TCP_ESTABLISHED;
return 0;
out_unlock:
@@ -1429,12 +1433,10 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb)
init_peercred(ska);
init_peercred(skb);
- if (ska->sk_type != SOCK_DGRAM) {
- ska->sk_state = TCP_ESTABLISHED;
- skb->sk_state = TCP_ESTABLISHED;
- socka->state = SS_CONNECTED;
- sockb->state = SS_CONNECTED;
- }
+ ska->sk_state = TCP_ESTABLISHED;
+ skb->sk_state = TCP_ESTABLISHED;
+ socka->state = SS_CONNECTED;
+ sockb->state = SS_CONNECTED;
return 0;
}
--
2.25.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Patch bpf-next v4 04/12] sock_map: export symbols for af_unix module
2021-05-08 22:08 [Patch bpf-next v4 00/12] sockmap: add sockmap support for unix datagram socket Cong Wang
` (2 preceding siblings ...)
2021-05-08 22:08 ` [Patch bpf-next v4 03/12] af_unix: set TCP_ESTABLISHED for datagram sockets too Cong Wang
@ 2021-05-08 22:08 ` Cong Wang
2021-05-08 22:08 ` [Patch bpf-next v4 05/12] af_unix: prepare for sockmap support Cong Wang
` (7 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Cong Wang @ 2021-05-08 22:08 UTC (permalink / raw)
To: netdev
Cc: bpf, jiang.wang, duanxiongchun, wangdongdong.6, Cong Wang,
John Fastabend, Daniel Borkmann, Jakub Sitnicki, Lorenz Bauer
From: Cong Wang <cong.wang@bytedance.com>
Before af_unix there is no need to support modules. Now
we have to export these two functions for af_unix module.
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jakub Sitnicki <jakub@cloudflare.com>
Cc: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
net/core/sock_map.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 6f1b82b8ad49..13b474d89f93 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1513,6 +1513,7 @@ void sock_map_unhash(struct sock *sk)
rcu_read_unlock();
saved_unhash(sk);
}
+EXPORT_SYMBOL_GPL(sock_map_unhash);
void sock_map_close(struct sock *sk, long timeout)
{
@@ -1536,6 +1537,7 @@ void sock_map_close(struct sock *sk, long timeout)
release_sock(sk);
saved_close(sk, timeout);
}
+EXPORT_SYMBOL_GPL(sock_map_close);
static int sock_map_iter_attach_target(struct bpf_prog *prog,
union bpf_iter_link_info *linfo,
--
2.25.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Patch bpf-next v4 05/12] af_unix: prepare for sockmap support
2021-05-08 22:08 [Patch bpf-next v4 00/12] sockmap: add sockmap support for unix datagram socket Cong Wang
` (3 preceding siblings ...)
2021-05-08 22:08 ` [Patch bpf-next v4 04/12] sock_map: export symbols for af_unix module Cong Wang
@ 2021-05-08 22:08 ` Cong Wang
2021-05-08 22:08 ` [Patch bpf-next v4 06/12] af_unix: implement ->psock_update_sk_prot() Cong Wang
` (6 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Cong Wang @ 2021-05-08 22:08 UTC (permalink / raw)
To: netdev
Cc: bpf, jiang.wang, duanxiongchun, wangdongdong.6, Cong Wang,
John Fastabend, Daniel Borkmann, Jakub Sitnicki, Lorenz Bauer
From: Cong Wang <cong.wang@bytedance.com>
Unlike af_inet, unix_proto is very different, it does not even have
a ->close() or ->unhash(). We have to add some dummy implementations
to satisfy sockmap. Normally they are just nops, they are only used
for the sockmap replacements.
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jakub Sitnicki <jakub@cloudflare.com>
Cc: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
net/unix/af_unix.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e08918c45892..0f9a6dcca752 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -487,6 +487,8 @@ static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
other->sk_error_report(other);
}
}
+ sk->sk_prot->unhash(sk);
+ other->sk_prot->unhash(other);
sk->sk_state = other->sk_state = TCP_CLOSE;
}
@@ -773,10 +775,23 @@ static const struct proto_ops unix_seqpacket_ops = {
.show_fdinfo = unix_show_fdinfo,
};
+/* Nothing to do here, unix socket is not unhashed when disconnecting,
+ * and does not need a ->close(). These are merely for sockmap.
+ */
+static void unix_unhash(struct sock *sk)
+{
+}
+
+static void unix_close(struct sock *sk, long timeout)
+{
+}
+
static struct proto unix_proto = {
.name = "UNIX",
.owner = THIS_MODULE,
.obj_size = sizeof(struct unix_sock),
+ .unhash = unix_unhash,
+ .close = unix_close,
};
static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
@@ -860,6 +875,7 @@ static int unix_release(struct socket *sock)
if (!sk)
return 0;
+ sk->sk_prot->close(sk, 0);
unix_release_sock(sk, 0);
sock->sk = NULL;
--
2.25.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Patch bpf-next v4 06/12] af_unix: implement ->psock_update_sk_prot()
2021-05-08 22:08 [Patch bpf-next v4 00/12] sockmap: add sockmap support for unix datagram socket Cong Wang
` (4 preceding siblings ...)
2021-05-08 22:08 ` [Patch bpf-next v4 05/12] af_unix: prepare for sockmap support Cong Wang
@ 2021-05-08 22:08 ` Cong Wang
2021-05-08 22:08 ` [Patch bpf-next v4 07/12] af_unix: implement unix_dgram_bpf_recvmsg() Cong Wang
` (5 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Cong Wang @ 2021-05-08 22:08 UTC (permalink / raw)
To: netdev
Cc: bpf, jiang.wang, duanxiongchun, wangdongdong.6, Cong Wang,
John Fastabend, Daniel Borkmann, Jakub Sitnicki, Lorenz Bauer
From: Cong Wang <cong.wang@bytedance.com>
Now we can implement unix_bpf_update_proto() to update
sk_prot, particularly prot->close() and prot->unhash().
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jakub Sitnicki <jakub@cloudflare.com>
Cc: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
MAINTAINERS | 1 +
include/net/af_unix.h | 10 +++++++++
net/unix/Makefile | 1 +
net/unix/af_unix.c | 6 +++++-
net/unix/unix_bpf.c | 48 +++++++++++++++++++++++++++++++++++++++++++
5 files changed, 65 insertions(+), 1 deletion(-)
create mode 100644 net/unix/unix_bpf.c
diff --git a/MAINTAINERS b/MAINTAINERS
index 4796ccf9f871..f9354cb422b1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10162,6 +10162,7 @@ F: net/core/skmsg.c
F: net/core/sock_map.c
F: net/ipv4/tcp_bpf.c
F: net/ipv4/udp_bpf.c
+F: net/unix/unix_bpf.c
LANTIQ / INTEL Ethernet drivers
M: Hauke Mehrtens <hauke@hauke-m.de>
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index f42fdddecd41..cca645846af1 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -89,4 +89,14 @@ void unix_sysctl_unregister(struct net *net);
static inline int unix_sysctl_register(struct net *net) { return 0; }
static inline void unix_sysctl_unregister(struct net *net) {}
#endif
+
+#ifdef CONFIG_BPF_SYSCALL
+extern struct proto unix_proto;
+
+int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
+void __init unix_bpf_build_proto(void);
+#else
+static inline void __init unix_bpf_build_proto(void)
+{}
+#endif
#endif
diff --git a/net/unix/Makefile b/net/unix/Makefile
index 54e58cc4f945..20491825b4d0 100644
--- a/net/unix/Makefile
+++ b/net/unix/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_UNIX) += unix.o
unix-y := af_unix.o garbage.o
unix-$(CONFIG_SYSCTL) += sysctl_net_unix.o
+unix-$(CONFIG_BPF_SYSCALL) += unix_bpf.o
obj-$(CONFIG_UNIX_DIAG) += unix_diag.o
unix_diag-y := diag.o
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 0f9a6dcca752..5424e8007858 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -786,12 +786,15 @@ static void unix_close(struct sock *sk, long timeout)
{
}
-static struct proto unix_proto = {
+struct proto unix_proto = {
.name = "UNIX",
.owner = THIS_MODULE,
.obj_size = sizeof(struct unix_sock),
.unhash = unix_unhash,
.close = unix_close,
+#ifdef CONFIG_BPF_SYSCALL
+ .psock_update_sk_prot = unix_bpf_update_proto,
+#endif
};
static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
@@ -2973,6 +2976,7 @@ static int __init af_unix_init(void)
sock_register(&unix_family_ops);
register_pernet_subsys(&unix_net_ops);
+ unix_bpf_build_proto();
out:
return rc;
}
diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c
new file mode 100644
index 000000000000..3ff71fb9aac8
--- /dev/null
+++ b/net/unix/unix_bpf.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Cong Wang <cong.wang@bytedance.com> */
+
+#include <linux/skmsg.h>
+#include <linux/bpf.h>
+#include <net/sock.h>
+#include <net/af_unix.h>
+
+static struct proto *unix_prot_saved __read_mostly;
+static DEFINE_SPINLOCK(unix_prot_lock);
+static struct proto unix_bpf_prot;
+
+static void unix_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
+{
+ *prot = *base;
+ prot->unhash = sock_map_unhash;
+ prot->close = sock_map_close;
+}
+
+static void unix_bpf_check_needs_rebuild(struct proto *ops)
+{
+ if (unlikely(ops != smp_load_acquire(&unix_prot_saved))) {
+ spin_lock_bh(&unix_prot_lock);
+ if (likely(ops != unix_prot_saved)) {
+ unix_bpf_rebuild_protos(&unix_bpf_prot, ops);
+ smp_store_release(&unix_prot_saved, ops);
+ }
+ spin_unlock_bh(&unix_prot_lock);
+ }
+}
+
+int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
+{
+ if (restore) {
+ sk->sk_write_space = psock->saved_write_space;
+ WRITE_ONCE(sk->sk_prot, psock->sk_proto);
+ return 0;
+ }
+
+ unix_bpf_check_needs_rebuild(psock->sk_proto);
+ WRITE_ONCE(sk->sk_prot, &unix_bpf_prot);
+ return 0;
+}
+
+void __init unix_bpf_build_proto(void)
+{
+ unix_bpf_rebuild_protos(&unix_bpf_prot, &unix_proto);
+}
--
2.25.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Patch bpf-next v4 07/12] af_unix: implement unix_dgram_bpf_recvmsg()
2021-05-08 22:08 [Patch bpf-next v4 00/12] sockmap: add sockmap support for unix datagram socket Cong Wang
` (5 preceding siblings ...)
2021-05-08 22:08 ` [Patch bpf-next v4 06/12] af_unix: implement ->psock_update_sk_prot() Cong Wang
@ 2021-05-08 22:08 ` Cong Wang
2021-05-08 22:08 ` [Patch bpf-next v4 08/12] sock_map: update sock type checks for AF_UNIX Cong Wang
` (4 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Cong Wang @ 2021-05-08 22:08 UTC (permalink / raw)
To: netdev
Cc: bpf, jiang.wang, duanxiongchun, wangdongdong.6, Cong Wang,
John Fastabend, Daniel Borkmann, Jakub Sitnicki, Lorenz Bauer
From: Cong Wang <cong.wang@bytedance.com>
We have to implement unix_dgram_bpf_recvmsg() to replace the
original ->recvmsg() to retrieve skmsg from ingress_msg.
AF_UNIX is again special here because the lack of
sk_prot->recvmsg(). I simply add a special case inside
unix_dgram_recvmsg() to call sk->sk_prot->recvmsg() directly.
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jakub Sitnicki <jakub@cloudflare.com>
Cc: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
include/net/af_unix.h | 3 +++
net/unix/af_unix.c | 19 ++++++++++++++---
net/unix/unix_bpf.c | 47 +++++++++++++++++++++++++++++++++++++++++++
3 files changed, 66 insertions(+), 3 deletions(-)
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index cca645846af1..49b13e9bffd0 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -82,6 +82,9 @@ static inline struct unix_sock *unix_sk(const struct sock *sk)
long unix_inq_len(struct sock *sk);
long unix_outq_len(struct sock *sk);
+int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
+ int nonblock, int flags);
+
#ifdef CONFIG_SYSCTL
int unix_sysctl_register(struct net *net);
void unix_sysctl_unregister(struct net *net);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 5424e8007858..e718ca6237a7 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2100,11 +2100,11 @@ static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
}
}
-static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
- size_t size, int flags)
+int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
+ int nonblock, int flags)
{
struct scm_cookie scm;
- struct sock *sk = sock->sk;
+ struct socket *sock = sk->sk_socket;
struct unix_sock *u = unix_sk(sk);
struct sk_buff *skb, *last;
long timeo;
@@ -2207,6 +2207,19 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
return err;
}
+static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ int flags)
+{
+ struct sock *sk = sock->sk;
+
+#ifdef CONFIG_BPF_SYSCALL
+ if (sk->sk_prot != &unix_proto)
+ return sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
+ flags & ~MSG_DONTWAIT, NULL);
+#endif
+ return __unix_dgram_recvmsg(sk, msg, size, flags & MSG_DONTWAIT, flags);
+}
+
static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
sk_read_actor_t recv_actor)
{
diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c
index 3ff71fb9aac8..debe82e1035d 100644
--- a/net/unix/unix_bpf.c
+++ b/net/unix/unix_bpf.c
@@ -6,6 +6,52 @@
#include <net/sock.h>
#include <net/af_unix.h>
+static int unix_dgram_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
+ size_t len, int nonblock, int flags,
+ int *addr_len)
+{
+ struct sk_psock *psock;
+ int copied, ret;
+
+ psock = sk_psock_get(sk);
+ if (unlikely(!psock))
+ return __unix_dgram_recvmsg(sk, msg, len, nonblock, flags);
+
+ lock_sock(sk);
+ if (!skb_queue_empty(&sk->sk_receive_queue) &&
+ sk_psock_queue_empty(psock)) {
+ ret = __unix_dgram_recvmsg(sk, msg, len, nonblock, flags);
+ goto out;
+ }
+
+msg_bytes_ready:
+ copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
+ if (!copied) {
+ int data, err = 0;
+ long timeo;
+
+ timeo = sock_rcvtimeo(sk, nonblock);
+ data = sk_msg_wait_data(sk, psock, flags, timeo, &err);
+ if (data) {
+ if (!sk_psock_queue_empty(psock))
+ goto msg_bytes_ready;
+ ret = __unix_dgram_recvmsg(sk, msg, len, nonblock,
+ flags);
+ goto out;
+ }
+ if (err) {
+ ret = err;
+ goto out;
+ }
+ copied = -EAGAIN;
+ }
+ ret = copied;
+out:
+ release_sock(sk);
+ sk_psock_put(sk, psock);
+ return ret;
+}
+
static struct proto *unix_prot_saved __read_mostly;
static DEFINE_SPINLOCK(unix_prot_lock);
static struct proto unix_bpf_prot;
@@ -15,6 +61,7 @@ static void unix_bpf_rebuild_protos(struct proto *prot, const struct proto *base
*prot = *base;
prot->unhash = sock_map_unhash;
prot->close = sock_map_close;
+ prot->recvmsg = unix_dgram_bpf_recvmsg;
}
static void unix_bpf_check_needs_rebuild(struct proto *ops)
--
2.25.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Patch bpf-next v4 08/12] sock_map: update sock type checks for AF_UNIX
2021-05-08 22:08 [Patch bpf-next v4 00/12] sockmap: add sockmap support for unix datagram socket Cong Wang
` (6 preceding siblings ...)
2021-05-08 22:08 ` [Patch bpf-next v4 07/12] af_unix: implement unix_dgram_bpf_recvmsg() Cong Wang
@ 2021-05-08 22:08 ` Cong Wang
2021-05-08 22:08 ` [Patch bpf-next v4 09/12] selftests/bpf: factor out udp_socketpair() Cong Wang
` (3 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Cong Wang @ 2021-05-08 22:08 UTC (permalink / raw)
To: netdev
Cc: bpf, jiang.wang, duanxiongchun, wangdongdong.6, Cong Wang,
John Fastabend, Daniel Borkmann, Jakub Sitnicki, Lorenz Bauer
From: Cong Wang <cong.wang@bytedance.com>
Now AF_UNIX datagram supports sockmap and redirection,
we can update the sock type checks for them accordingly.
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jakub Sitnicki <jakub@cloudflare.com>
Cc: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
net/core/sock_map.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 13b474d89f93..ee9a6dfcf269 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -533,6 +533,12 @@ static bool sk_is_udp(const struct sock *sk)
sk->sk_protocol == IPPROTO_UDP;
}
+static bool sk_is_unix(const struct sock *sk)
+{
+ return sk->sk_type == SOCK_DGRAM &&
+ sk->sk_family == AF_UNIX;
+}
+
static bool sock_map_redirect_allowed(const struct sock *sk)
{
if (sk_is_tcp(sk))
@@ -552,6 +558,8 @@ static bool sock_map_sk_state_allowed(const struct sock *sk)
return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN);
else if (sk_is_udp(sk))
return sk_hashed(sk);
+ else if (sk_is_unix(sk))
+ return sk->sk_state == TCP_ESTABLISHED;
return false;
}
--
2.25.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Patch bpf-next v4 09/12] selftests/bpf: factor out udp_socketpair()
2021-05-08 22:08 [Patch bpf-next v4 00/12] sockmap: add sockmap support for unix datagram socket Cong Wang
` (7 preceding siblings ...)
2021-05-08 22:08 ` [Patch bpf-next v4 08/12] sock_map: update sock type checks for AF_UNIX Cong Wang
@ 2021-05-08 22:08 ` Cong Wang
2021-05-08 22:08 ` [Patch bpf-next v4 10/12] selftests/bpf: factor out add_to_sockmap() Cong Wang
` (2 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Cong Wang @ 2021-05-08 22:08 UTC (permalink / raw)
To: netdev
Cc: bpf, jiang.wang, duanxiongchun, wangdongdong.6, Cong Wang,
John Fastabend, Daniel Borkmann, Jakub Sitnicki, Lorenz Bauer
From: Cong Wang <cong.wang@bytedance.com>
Factor out a common helper udp_socketpair() which creates
a pair of connected UDP sockets.
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jakub Sitnicki <jakub@cloudflare.com>
Cc: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
.../selftests/bpf/prog_tests/sockmap_listen.c | 76 ++++++++++---------
1 file changed, 39 insertions(+), 37 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 648d9ae898d2..3d9907bcf132 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -1603,32 +1603,27 @@ static void test_reuseport(struct test_sockmap_listen *skel,
}
}
-static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
- int verd_mapfd, enum redir_mode mode)
+static int udp_socketpair(int family, int *s, int *c)
{
- const char *log_prefix = redir_mode_str(mode);
struct sockaddr_storage addr;
- int c0, c1, p0, p1;
- unsigned int pass;
socklen_t len;
- int err, n;
- u64 value;
- u32 key;
- char b;
-
- zero_verdict_count(verd_mapfd);
+ int p0, c0;
+ int err;
- p0 = socket_loopback(family, sotype | SOCK_NONBLOCK);
+ p0 = socket_loopback(family, SOCK_DGRAM | SOCK_NONBLOCK);
if (p0 < 0)
- return;
+ return p0;
+
len = sizeof(addr);
err = xgetsockname(p0, sockaddr(&addr), &len);
if (err)
goto close_peer0;
- c0 = xsocket(family, sotype | SOCK_NONBLOCK, 0);
- if (c0 < 0)
+ c0 = xsocket(family, SOCK_DGRAM | SOCK_NONBLOCK, 0);
+ if (c0 < 0) {
+ err = c0;
goto close_peer0;
+ }
err = xconnect(c0, sockaddr(&addr), len);
if (err)
goto close_cli0;
@@ -1639,25 +1634,36 @@ static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
if (err)
goto close_cli0;
- p1 = socket_loopback(family, sotype | SOCK_NONBLOCK);
- if (p1 < 0)
- goto close_cli0;
- err = xgetsockname(p1, sockaddr(&addr), &len);
- if (err)
- goto close_cli0;
+ *s = p0;
+ *c = c0;
+ return 0;
- c1 = xsocket(family, sotype | SOCK_NONBLOCK, 0);
- if (c1 < 0)
- goto close_peer1;
- err = xconnect(c1, sockaddr(&addr), len);
- if (err)
- goto close_cli1;
- err = xgetsockname(c1, sockaddr(&addr), &len);
+close_cli0:
+ xclose(c0);
+close_peer0:
+ xclose(p0);
+ return err;
+}
+
+static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
+ enum redir_mode mode)
+{
+ const char *log_prefix = redir_mode_str(mode);
+ int c0, c1, p0, p1;
+ unsigned int pass;
+ int err, n;
+ u64 value;
+ u32 key;
+ char b;
+
+ zero_verdict_count(verd_mapfd);
+
+ err = udp_socketpair(family, &p0, &c0);
if (err)
- goto close_cli1;
- err = xconnect(p1, sockaddr(&addr), len);
+ return;
+ err = udp_socketpair(family, &p1, &c1);
if (err)
- goto close_cli1;
+ goto close_cli0;
key = 0;
value = p0;
@@ -1694,11 +1700,9 @@ static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
close_cli1:
xclose(c1);
-close_peer1:
xclose(p1);
close_cli0:
xclose(c0);
-close_peer0:
xclose(p0);
}
@@ -1715,11 +1719,9 @@ static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
return;
skel->bss->test_ingress = false;
- udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
- REDIR_EGRESS);
+ udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS);
skel->bss->test_ingress = true;
- udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
- REDIR_INGRESS);
+ udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS);
xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
}
--
2.25.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Patch bpf-next v4 10/12] selftests/bpf: factor out add_to_sockmap()
2021-05-08 22:08 [Patch bpf-next v4 00/12] sockmap: add sockmap support for unix datagram socket Cong Wang
` (8 preceding siblings ...)
2021-05-08 22:08 ` [Patch bpf-next v4 09/12] selftests/bpf: factor out udp_socketpair() Cong Wang
@ 2021-05-08 22:08 ` Cong Wang
2021-05-08 22:08 ` [Patch bpf-next v4 11/12] selftests/bpf: add a test case for unix sockmap Cong Wang
2021-05-08 22:08 ` [Patch bpf-next v4 12/12] selftests/bpf: add test cases for redirection between udp and unix Cong Wang
11 siblings, 0 replies; 14+ messages in thread
From: Cong Wang @ 2021-05-08 22:08 UTC (permalink / raw)
To: netdev
Cc: bpf, jiang.wang, duanxiongchun, wangdongdong.6, Cong Wang,
John Fastabend, Daniel Borkmann, Jakub Sitnicki, Lorenz Bauer
From: Cong Wang <cong.wang@bytedance.com>
Factor out a common helper add_to_sockmap() which adds two
sockets into a sockmap.
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jakub Sitnicki <jakub@cloudflare.com>
Cc: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
.../selftests/bpf/prog_tests/sockmap_listen.c | 59 +++++++------------
1 file changed, 21 insertions(+), 38 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 3d9907bcf132..ee017278fae4 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -919,6 +919,23 @@ static const char *redir_mode_str(enum redir_mode mode)
}
}
+static int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
+{
+ u64 value;
+ u32 key;
+ int err;
+
+ key = 0;
+ value = fd1;
+ err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ if (err)
+ return err;
+
+ key = 1;
+ value = fd2;
+ return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+}
+
static void redir_to_connected(int family, int sotype, int sock_mapfd,
int verd_mapfd, enum redir_mode mode)
{
@@ -928,7 +945,6 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
unsigned int pass;
socklen_t len;
int err, n;
- u64 value;
u32 key;
char b;
@@ -965,15 +981,7 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
if (p1 < 0)
goto close_cli1;
- key = 0;
- value = p0;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
- if (err)
- goto close_peer1;
-
- key = 1;
- value = p1;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ err = add_to_sockmap(sock_mapfd, p0, p1);
if (err)
goto close_peer1;
@@ -1061,7 +1069,6 @@ static void redir_to_listening(int family, int sotype, int sock_mapfd,
int s, c, p, err, n;
unsigned int drop;
socklen_t len;
- u64 value;
u32 key;
zero_verdict_count(verd_mapfd);
@@ -1086,15 +1093,7 @@ static void redir_to_listening(int family, int sotype, int sock_mapfd,
if (p < 0)
goto close_cli;
- key = 0;
- value = s;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
- if (err)
- goto close_peer;
-
- key = 1;
- value = p;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ err = add_to_sockmap(sock_mapfd, s, p);
if (err)
goto close_peer;
@@ -1346,7 +1345,6 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
int s1, s2, c, err;
unsigned int drop;
socklen_t len;
- u64 value;
u32 key;
zero_verdict_count(verd_map);
@@ -1360,16 +1358,10 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
if (s2 < 0)
goto close_srv1;
- key = 0;
- value = s1;
- err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
+ err = add_to_sockmap(sock_map, s1, s2);
if (err)
goto close_srv2;
- key = 1;
- value = s2;
- err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
-
/* Connect to s2, reuseport BPF selects s1 via sock_map[0] */
len = sizeof(addr);
err = xgetsockname(s2, sockaddr(&addr), &len);
@@ -1652,7 +1644,6 @@ static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
int c0, c1, p0, p1;
unsigned int pass;
int err, n;
- u64 value;
u32 key;
char b;
@@ -1665,15 +1656,7 @@ static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
if (err)
goto close_cli0;
- key = 0;
- value = p0;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
- if (err)
- goto close_cli1;
-
- key = 1;
- value = p1;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ err = add_to_sockmap(sock_mapfd, p0, p1);
if (err)
goto close_cli1;
--
2.25.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Patch bpf-next v4 11/12] selftests/bpf: add a test case for unix sockmap
2021-05-08 22:08 [Patch bpf-next v4 00/12] sockmap: add sockmap support for unix datagram socket Cong Wang
` (9 preceding siblings ...)
2021-05-08 22:08 ` [Patch bpf-next v4 10/12] selftests/bpf: factor out add_to_sockmap() Cong Wang
@ 2021-05-08 22:08 ` Cong Wang
2021-05-08 22:08 ` [Patch bpf-next v4 12/12] selftests/bpf: add test cases for redirection between udp and unix Cong Wang
11 siblings, 0 replies; 14+ messages in thread
From: Cong Wang @ 2021-05-08 22:08 UTC (permalink / raw)
To: netdev
Cc: bpf, jiang.wang, duanxiongchun, wangdongdong.6, Cong Wang,
John Fastabend, Daniel Borkmann, Jakub Sitnicki, Lorenz Bauer
From: Cong Wang <cong.wang@bytedance.com>
Add a test case to ensure redirection between two AF_UNIX
datagram sockets work.
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jakub Sitnicki <jakub@cloudflare.com>
Cc: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
.../selftests/bpf/prog_tests/sockmap_listen.c | 92 +++++++++++++++++++
1 file changed, 92 insertions(+)
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index ee017278fae4..2b1bdb8fa48d 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -1433,6 +1433,8 @@ static const char *family_str(sa_family_t family)
return "IPv4";
case AF_INET6:
return "IPv6";
+ case AF_UNIX:
+ return "Unix";
default:
return "unknown";
}
@@ -1555,6 +1557,94 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
}
}
+static void unix_redir_to_connected(int sotype, int sock_mapfd,
+ int verd_mapfd, enum redir_mode mode)
+{
+ const char *log_prefix = redir_mode_str(mode);
+ int c0, c1, p0, p1;
+ unsigned int pass;
+ int err, n;
+ int sfd[2];
+ u32 key;
+ char b;
+
+ zero_verdict_count(verd_mapfd);
+
+ if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
+ return;
+ c0 = sfd[0], p0 = sfd[1];
+
+ if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
+ goto close0;
+ c1 = sfd[0], p1 = sfd[1];
+
+ err = add_to_sockmap(sock_mapfd, p0, p1);
+ if (err)
+ goto close;
+
+ n = write(c1, "a", 1);
+ if (n < 0)
+ FAIL_ERRNO("%s: write", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete write", log_prefix);
+ if (n < 1)
+ goto close;
+
+ key = SK_PASS;
+ err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
+ if (err)
+ goto close;
+ if (pass != 1)
+ FAIL("%s: want pass count 1, have %d", log_prefix, pass);
+
+ n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
+ if (n < 0)
+ FAIL_ERRNO("%s: read", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete read", log_prefix);
+
+close:
+ xclose(c1);
+ xclose(p1);
+close0:
+ xclose(c0);
+ xclose(p0);
+}
+
+static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int sotype)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
+ if (err)
+ return;
+
+ skel->bss->test_ingress = false;
+ unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS);
+ skel->bss->test_ingress = true;
+ unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
+}
+
+static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
+ int sotype)
+{
+ const char *family_name, *map_name;
+ char s[MAX_TEST_NAME];
+
+ family_name = family_str(AF_UNIX);
+ map_name = map_type_str(map);
+ snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
+ if (!test__start_subtest(s))
+ return;
+ unix_skb_redir_to_connected(skel, map, sotype);
+}
+
static void test_reuseport(struct test_sockmap_listen *skel,
struct bpf_map *map, int family, int sotype)
{
@@ -1747,10 +1837,12 @@ void test_sockmap_listen(void)
skel->bss->test_sockmap = true;
run_tests(skel, skel->maps.sock_map, AF_INET);
run_tests(skel, skel->maps.sock_map, AF_INET6);
+ test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM);
skel->bss->test_sockmap = false;
run_tests(skel, skel->maps.sock_hash, AF_INET);
run_tests(skel, skel->maps.sock_hash, AF_INET6);
+ test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM);
test_sockmap_listen__destroy(skel);
}
--
2.25.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Patch bpf-next v4 12/12] selftests/bpf: add test cases for redirection between udp and unix
2021-05-08 22:08 [Patch bpf-next v4 00/12] sockmap: add sockmap support for unix datagram socket Cong Wang
` (10 preceding siblings ...)
2021-05-08 22:08 ` [Patch bpf-next v4 11/12] selftests/bpf: add a test case for unix sockmap Cong Wang
@ 2021-05-08 22:08 ` Cong Wang
11 siblings, 0 replies; 14+ messages in thread
From: Cong Wang @ 2021-05-08 22:08 UTC (permalink / raw)
To: netdev
Cc: bpf, jiang.wang, duanxiongchun, wangdongdong.6, Cong Wang,
John Fastabend, Daniel Borkmann, Jakub Sitnicki, Lorenz Bauer
From: Cong Wang <cong.wang@bytedance.com>
Add two test cases to ensure redirection between udp and unix
work bidirectionally.
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jakub Sitnicki <jakub@cloudflare.com>
Cc: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
.../selftests/bpf/prog_tests/sockmap_listen.c | 165 ++++++++++++++++++
1 file changed, 165 insertions(+)
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 2b1bdb8fa48d..01c052e15a83 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -1813,6 +1813,170 @@ static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map
udp_skb_redir_to_connected(skel, map, family);
}
+static void udp_unix_redir_to_connected(int family, int sock_mapfd,
+ int verd_mapfd, enum redir_mode mode)
+{
+ const char *log_prefix = redir_mode_str(mode);
+ int c0, c1, p0, p1;
+ unsigned int pass;
+ int err, n;
+ int sfd[2];
+ u32 key;
+ char b;
+
+ zero_verdict_count(verd_mapfd);
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
+ return;
+ c0 = sfd[0], p0 = sfd[1];
+
+ err = udp_socketpair(family, &p1, &c1);
+ if (err)
+ goto close;
+
+ err = add_to_sockmap(sock_mapfd, p0, p1);
+ if (err)
+ goto close_cli1;
+
+ n = write(c1, "a", 1);
+ if (n < 0)
+ FAIL_ERRNO("%s: write", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete write", log_prefix);
+ if (n < 1)
+ goto close_cli1;
+
+ key = SK_PASS;
+ err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
+ if (err)
+ goto close_cli1;
+ if (pass != 1)
+ FAIL("%s: want pass count 1, have %d", log_prefix, pass);
+
+ n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
+ if (n < 0)
+ FAIL_ERRNO("%s: read", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete read", log_prefix);
+
+close_cli1:
+ xclose(c1);
+ xclose(p1);
+close:
+ xclose(c0);
+ xclose(p0);
+}
+
+static void udp_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
+ if (err)
+ return;
+
+ skel->bss->test_ingress = false;
+ udp_unix_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS);
+ skel->bss->test_ingress = true;
+ udp_unix_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
+}
+
+static void unix_udp_redir_to_connected(int family, int sock_mapfd,
+ int verd_mapfd, enum redir_mode mode)
+{
+ const char *log_prefix = redir_mode_str(mode);
+ int c0, c1, p0, p1;
+ unsigned int pass;
+ int err, n;
+ int sfd[2];
+ u32 key;
+ char b;
+
+ zero_verdict_count(verd_mapfd);
+
+ err = udp_socketpair(family, &p0, &c0);
+ if (err)
+ return;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
+ goto close_cli0;
+ c1 = sfd[0], p1 = sfd[1];
+
+ err = add_to_sockmap(sock_mapfd, p0, p1);
+ if (err)
+ goto close;
+
+ n = write(c1, "a", 1);
+ if (n < 0)
+ FAIL_ERRNO("%s: write", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete write", log_prefix);
+ if (n < 1)
+ goto close;
+
+ key = SK_PASS;
+ err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
+ if (err)
+ goto close;
+ if (pass != 1)
+ FAIL("%s: want pass count 1, have %d", log_prefix, pass);
+
+ n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
+ if (n < 0)
+ FAIL_ERRNO("%s: read", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete read", log_prefix);
+
+close:
+ xclose(c1);
+ xclose(p1);
+close_cli0:
+ xclose(c0);
+ xclose(p0);
+
+}
+
+static void unix_udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
+ if (err)
+ return;
+
+ skel->bss->test_ingress = false;
+ unix_udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS);
+ skel->bss->test_ingress = true;
+ unix_udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
+}
+
+static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
+ int family)
+{
+ const char *family_name, *map_name;
+ char s[MAX_TEST_NAME];
+
+ family_name = family_str(family);
+ map_name = map_type_str(map);
+ snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
+ if (!test__start_subtest(s))
+ return;
+ udp_unix_skb_redir_to_connected(skel, map, family);
+ unix_udp_skb_redir_to_connected(skel, map, family);
+}
+
static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
int family)
{
@@ -1822,6 +1986,7 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
test_reuseport(skel, map, family, SOCK_STREAM);
test_reuseport(skel, map, family, SOCK_DGRAM);
test_udp_redir(skel, map, family);
+ test_udp_unix_redir(skel, map, family);
}
void test_sockmap_listen(void)
--
2.25.1
^ permalink raw reply related [flat|nested] 14+ messages in thread