netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH bpf-next 0/3] AF_XDP: add socket monitoring support
@ 2019-01-18 13:03 bjorn.topel
  2019-01-18 13:03 ` [PATCH bpf-next 1/3] net: xsk: track AF_XDP sockets on a per-netns list bjorn.topel
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: bjorn.topel @ 2019-01-18 13:03 UTC (permalink / raw)
  To: ast, daniel, netdev
  Cc: Björn Töpel, magnus.karlsson, magnus.karlsson

From: Björn Töpel <bjorn.topel@intel.com>

This series adds an AF_XDP sock_diag interface for querying sockets
from user-space. Tools like iproute2 ss(8) can use this interface to
list open AF_XDP sockets.

The diagnostic provides information about the Rx/Tx/fill/completetion
rings, umem, memory usage and such. For a complete list, please refer
to the xsk_diag.c file.

The AF_XDP sock_diag interface is optional, and can be built as a
module.

A separate patch series, adding ss(8) iproute2 support, will follow.


Thanks,
Björn


Björn Töpel (3):
  net: xsk: track AF_XDP sockets on a per-netns list
  xsk: add id to umem
  xsk: add sock_diag interface for AF_XDP

 include/net/net_namespace.h   |   4 +
 include/net/netns/xdp.h       |  13 +++
 include/net/xdp_sock.h        |   1 +
 include/uapi/linux/xdp_diag.h |  72 +++++++++++++
 net/xdp/Kconfig               |   8 ++
 net/xdp/Makefile              |   1 +
 net/xdp/xdp_umem.c            |  13 +++
 net/xdp/xsk.c                 |  36 ++++++-
 net/xdp/xsk.h                 |  12 +++
 net/xdp/xsk_diag.c            | 192 ++++++++++++++++++++++++++++++++++
 10 files changed, 347 insertions(+), 5 deletions(-)
 create mode 100644 include/net/netns/xdp.h
 create mode 100644 include/uapi/linux/xdp_diag.h
 create mode 100644 net/xdp/xsk.h
 create mode 100644 net/xdp/xsk_diag.c

-- 
2.19.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH bpf-next 1/3] net: xsk: track AF_XDP sockets on a per-netns list
  2019-01-18 13:03 [PATCH bpf-next 0/3] AF_XDP: add socket monitoring support bjorn.topel
@ 2019-01-18 13:03 ` bjorn.topel
  2019-01-18 13:03 ` [PATCH bpf-next 2/3] xsk: add id to umem bjorn.topel
  2019-01-18 13:03 ` [PATCH bpf-next 3/3] xsk: add sock_diag interface for AF_XDP bjorn.topel
  2 siblings, 0 replies; 8+ messages in thread
From: bjorn.topel @ 2019-01-18 13:03 UTC (permalink / raw)
  To: ast, daniel, netdev
  Cc: Björn Töpel, magnus.karlsson, magnus.karlsson

From: Björn Töpel <bjorn.topel@intel.com>

Track each AF_XDP socket in a per-netns list. This will be used later
by the sock_diag interface for querying sockets from userspace.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
 include/net/net_namespace.h |  4 ++++
 include/net/netns/xdp.h     | 13 +++++++++++++
 net/xdp/xsk.c               | 30 ++++++++++++++++++++++++++++++
 3 files changed, 47 insertions(+)
 create mode 100644 include/net/netns/xdp.h

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 99d4148e0f90..a68ced28d8f4 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -31,6 +31,7 @@
 #include <net/netns/xfrm.h>
 #include <net/netns/mpls.h>
 #include <net/netns/can.h>
+#include <net/netns/xdp.h>
 #include <linux/ns_common.h>
 #include <linux/idr.h>
 #include <linux/skbuff.h>
@@ -160,6 +161,9 @@ struct net {
 #endif
 #if IS_ENABLED(CONFIG_CAN)
 	struct netns_can	can;
+#endif
+#ifdef CONFIG_XDP_SOCKETS
+	struct netns_xdp	xdp;
 #endif
 	struct sock		*diag_nlsk;
 	atomic_t		fnhe_genid;
diff --git a/include/net/netns/xdp.h b/include/net/netns/xdp.h
new file mode 100644
index 000000000000..e5734261ba0a
--- /dev/null
+++ b/include/net/netns/xdp.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NETNS_XDP_H__
+#define __NETNS_XDP_H__
+
+#include <linux/rculist.h>
+#include <linux/mutex.h>
+
+struct netns_xdp {
+	struct mutex		lock;
+	struct hlist_head	list;
+};
+
+#endif /* __NETNS_XDP_H__ */
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index a03268454a27..80ca48cefc42 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -350,6 +350,10 @@ static int xsk_release(struct socket *sock)
 
 	net = sock_net(sk);
 
+	mutex_lock(&net->xdp.lock);
+	sk_del_node_init_rcu(sk);
+	mutex_unlock(&net->xdp.lock);
+
 	local_bh_disable();
 	sock_prot_inuse_add(net, sk->sk_prot, -1);
 	local_bh_enable();
@@ -746,6 +750,10 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
 	mutex_init(&xs->mutex);
 	spin_lock_init(&xs->tx_completion_lock);
 
+	mutex_lock(&net->xdp.lock);
+	sk_add_node_rcu(sk, &net->xdp.list);
+	mutex_unlock(&net->xdp.lock);
+
 	local_bh_disable();
 	sock_prot_inuse_add(net, &xsk_proto, 1);
 	local_bh_enable();
@@ -759,6 +767,23 @@ static const struct net_proto_family xsk_family_ops = {
 	.owner	= THIS_MODULE,
 };
 
+static int __net_init xsk_net_init(struct net *net)
+{
+	mutex_init(&net->xdp.lock);
+	INIT_HLIST_HEAD(&net->xdp.list);
+	return 0;
+}
+
+static void __net_exit xsk_net_exit(struct net *net)
+{
+	WARN_ON_ONCE(!hlist_empty(&net->xdp.list));
+}
+
+static struct pernet_operations xsk_net_ops = {
+	.init = xsk_net_init,
+	.exit = xsk_net_exit,
+};
+
 static int __init xsk_init(void)
 {
 	int err;
@@ -771,8 +796,13 @@ static int __init xsk_init(void)
 	if (err)
 		goto out_proto;
 
+	err = register_pernet_subsys(&xsk_net_ops);
+	if (err)
+		goto out_sk;
 	return 0;
 
+out_sk:
+	sock_unregister(PF_XDP);
 out_proto:
 	proto_unregister(&xsk_proto);
 out:
-- 
2.19.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH bpf-next 2/3] xsk: add id to umem
  2019-01-18 13:03 [PATCH bpf-next 0/3] AF_XDP: add socket monitoring support bjorn.topel
  2019-01-18 13:03 ` [PATCH bpf-next 1/3] net: xsk: track AF_XDP sockets on a per-netns list bjorn.topel
@ 2019-01-18 13:03 ` bjorn.topel
  2019-01-18 13:03 ` [PATCH bpf-next 3/3] xsk: add sock_diag interface for AF_XDP bjorn.topel
  2 siblings, 0 replies; 8+ messages in thread
From: bjorn.topel @ 2019-01-18 13:03 UTC (permalink / raw)
  To: ast, daniel, netdev
  Cc: Björn Töpel, magnus.karlsson, magnus.karlsson

From: Björn Töpel <bjorn.topel@intel.com>

This commit adds an id to the umem structure. The id uniquely
identifies a umem instance, and will be exposed to user-space via the
socket monitoring interface.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
 include/net/xdp_sock.h |  1 +
 net/xdp/xdp_umem.c     | 13 +++++++++++++
 2 files changed, 14 insertions(+)

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 13acb9803a6d..61cf7dbb6782 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -42,6 +42,7 @@ struct xdp_umem {
 	struct work_struct work;
 	struct page **pgs;
 	u32 npgs;
+	int id;
 	struct net_device *dev;
 	struct xdp_umem_fq_reuse *fq_reuse;
 	u16 queue_id;
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index a264cf2accd0..eabdb0f59031 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -13,12 +13,15 @@
 #include <linux/mm.h>
 #include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
+#include <linux/idr.h>
 
 #include "xdp_umem.h"
 #include "xsk_queue.h"
 
 #define XDP_UMEM_MIN_CHUNK_SIZE 2048
 
+static DEFINE_IDA(umem_ida);
+
 void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
 {
 	unsigned long flags;
@@ -183,6 +186,8 @@ static void xdp_umem_release(struct xdp_umem *umem)
 
 	xdp_umem_clear_dev(umem);
 
+	ida_simple_remove(&umem_ida, umem->id);
+
 	if (umem->fq) {
 		xskq_destroy(umem->fq);
 		umem->fq = NULL;
@@ -389,8 +394,16 @@ struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
 	if (!umem)
 		return ERR_PTR(-ENOMEM);
 
+	err = ida_simple_get(&umem_ida, 0, 0, GFP_KERNEL);
+	if (err < 0) {
+		kfree(umem);
+		return ERR_PTR(err);
+	}
+	umem->id = err;
+
 	err = xdp_umem_reg(umem, mr);
 	if (err) {
+		ida_simple_remove(&umem_ida, umem->id);
 		kfree(umem);
 		return ERR_PTR(err);
 	}
-- 
2.19.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH bpf-next 3/3] xsk: add sock_diag interface for AF_XDP
  2019-01-18 13:03 [PATCH bpf-next 0/3] AF_XDP: add socket monitoring support bjorn.topel
  2019-01-18 13:03 ` [PATCH bpf-next 1/3] net: xsk: track AF_XDP sockets on a per-netns list bjorn.topel
  2019-01-18 13:03 ` [PATCH bpf-next 2/3] xsk: add id to umem bjorn.topel
@ 2019-01-18 13:03 ` bjorn.topel
  2019-01-23 13:19   ` Daniel Borkmann
  2 siblings, 1 reply; 8+ messages in thread
From: bjorn.topel @ 2019-01-18 13:03 UTC (permalink / raw)
  To: ast, daniel, netdev
  Cc: Björn Töpel, magnus.karlsson, magnus.karlsson

From: Björn Töpel <bjorn.topel@intel.com>

This patch adds the sock_diag interface for querying sockets from user
space. Tools like iproute2 ss(8) can use this interface to list open
AF_XDP sockets.

The user-space ABI is defined in linux/xdp_diag.h and includes netlink
request and response structs. The request can query sockets and the
response contains socket information about the rings, umems, inode and
more.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
 include/uapi/linux/xdp_diag.h |  72 +++++++++++++
 net/xdp/Kconfig               |   8 ++
 net/xdp/Makefile              |   1 +
 net/xdp/xsk.c                 |   6 +-
 net/xdp/xsk.h                 |  12 +++
 net/xdp/xsk_diag.c            | 192 ++++++++++++++++++++++++++++++++++
 6 files changed, 286 insertions(+), 5 deletions(-)
 create mode 100644 include/uapi/linux/xdp_diag.h
 create mode 100644 net/xdp/xsk.h
 create mode 100644 net/xdp/xsk_diag.c

diff --git a/include/uapi/linux/xdp_diag.h b/include/uapi/linux/xdp_diag.h
new file mode 100644
index 000000000000..efe8ce281dce
--- /dev/null
+++ b/include/uapi/linux/xdp_diag.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * xdp_diag: interface for query/monitor XDP sockets
+ * Copyright(c) 2019 Intel Corporation.
+ */
+
+#ifndef _LINUX_XDP_DIAG_H
+#define _LINUX_XDP_DIAG_H
+
+#include <linux/types.h>
+
+struct xdp_diag_req {
+	__u8	sdiag_family;
+	__u8	sdiag_protocol;
+	__u16	pad;
+	__u32	xdiag_ino;
+	__u32	xdiag_show;
+	__u32	xdiag_cookie[2];
+};
+
+struct xdp_diag_msg {
+	__u8	xdiag_family;
+	__u8	xdiag_type;
+	__u32	xdiag_ino;
+	__u32	xdiag_cookie[2];
+};
+
+#define XDP_SHOW_INFO		(1 << 0) /* Basic information */
+#define XDP_SHOW_RING_CFG	(1 << 1)
+#define XDP_SHOW_UMEM		(1 << 2)
+#define XDP_SHOW_MEMINFO	(1 << 3)
+
+enum {
+	XDP_DIAG_NONE,
+	XDP_DIAG_INFO,
+	XDP_DIAG_UID,
+	XDP_DIAG_RX_RING,
+	XDP_DIAG_TX_RING,
+	XDP_DIAG_UMEM,
+	XDP_DIAG_UMEM_FILL_RING,
+	XDP_DIAG_UMEM_COMPLETION_RING,
+	XDP_DIAG_MEMINFO,
+	__XDP_DIAG_MAX,
+};
+
+#define XDP_DIAG_MAX (__XDP_DIAG_MAX - 1)
+
+struct xdp_diag_info {
+	__u32	ifindex;
+	__u32	queue_id;
+};
+
+struct xdp_diag_ring {
+	__u32	entries; /*num descs */
+};
+
+#define XDP_DU_F_ZEROCOPY (1 << 0)
+
+struct xdp_diag_umem {
+	__u64	size;
+	__u32	id;
+	__u32	num_pages;
+	__u32	chunk_size;
+	__u32	headroom;
+	__u32	ifindex;
+	__u32	queue_id;
+	__u32	flags;
+	__u32	refs;
+};
+
+
+#endif /* _LINUX_XDP_DIAG_H */
diff --git a/net/xdp/Kconfig b/net/xdp/Kconfig
index 90e4a7152854..0255b33cff4b 100644
--- a/net/xdp/Kconfig
+++ b/net/xdp/Kconfig
@@ -5,3 +5,11 @@ config XDP_SOCKETS
 	help
 	  XDP sockets allows a channel between XDP programs and
 	  userspace applications.
+
+config XDP_SOCKETS_DIAG
+	tristate "XDP sockets: monitoring interface"
+	depends on XDP_SOCKETS
+	default n
+	help
+	  Support for PF_XDP sockets monitoring interface used by the ss tool.
+	  If unsure, say Y.
diff --git a/net/xdp/Makefile b/net/xdp/Makefile
index 04f073146256..59dbfdf93dca 100644
--- a/net/xdp/Makefile
+++ b/net/xdp/Makefile
@@ -1 +1,2 @@
 obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o
+obj-$(CONFIG_XDP_SOCKETS_DIAG) += xsk_diag.o
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 80ca48cefc42..949d3bbccb2f 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -27,14 +27,10 @@
 
 #include "xsk_queue.h"
 #include "xdp_umem.h"
+#include "xsk.h"
 
 #define TX_BATCH_SIZE 16
 
-static struct xdp_sock *xdp_sk(struct sock *sk)
-{
-	return (struct xdp_sock *)sk;
-}
-
 bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
 {
 	return READ_ONCE(xs->rx) &&  READ_ONCE(xs->umem) &&
diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h
new file mode 100644
index 000000000000..ba8120610426
--- /dev/null
+++ b/net/xdp/xsk.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2019 Intel Corporation. */
+
+#ifndef XSK_H_
+#define XSK_H_
+
+static inline struct xdp_sock *xdp_sk(struct sock *sk)
+{
+	return (struct xdp_sock *)sk;
+}
+
+#endif /* XSK_H_ */
diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
new file mode 100644
index 000000000000..2585d7a4ac2f
--- /dev/null
+++ b/net/xdp/xsk_diag.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0
+/* XDP sockets monitoring support
+ *
+ * Copyright(c) 2019 Intel Corporation.
+ *
+ * Author: Björn Töpel <bjorn.topel@intel.com>
+ */
+
+#include <linux/module.h>
+#include <net/xdp_sock.h>
+#include <linux/xdp_diag.h>
+#include <linux/sock_diag.h>
+
+#include "xsk_queue.h"
+#include "xsk.h"
+
+static int xsk_diag_put_info(const struct xdp_sock *xs, struct sk_buff *nlskb)
+{
+	struct xdp_diag_info di;
+
+	di.ifindex = xs->dev ? xs->dev->ifindex : 0;
+	di.queue_id = xs->queue_id;
+	return nla_put(nlskb, XDP_DIAG_INFO, sizeof(di), &di);
+}
+
+static int xsk_diag_put_ring(const struct xsk_queue *queue, int nl_type,
+			     struct sk_buff *nlskb)
+{
+	struct xdp_diag_ring dr;
+
+	dr.entries = queue->nentries;
+	return nla_put(nlskb, nl_type, sizeof(dr), &dr);
+}
+
+static int xsk_diag_put_rings_cfg(const struct xdp_sock *xs,
+				  struct sk_buff *nlskb)
+{
+	int err = 0;
+
+	if (xs->rx)
+		err = xsk_diag_put_ring(xs->rx, XDP_DIAG_RX_RING, nlskb);
+	if (!err && xs->tx)
+		err = xsk_diag_put_ring(xs->tx, XDP_DIAG_TX_RING, nlskb);
+	return err;
+}
+
+static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
+{
+	struct xdp_umem *umem = xs->umem;
+	struct xdp_diag_umem du;
+	int err;
+
+	if (!umem)
+		return 0;
+
+	du.id = umem->id;
+	du.size = umem->size;
+	du.num_pages = umem->npgs;
+	du.chunk_size = (__u32)(~umem->chunk_mask + 1);
+	du.headroom = umem->headroom;
+	du.ifindex = umem->dev ? umem->dev->ifindex : 0;
+	du.queue_id = umem->queue_id;
+	du.flags = 0;
+	if (umem->zc)
+		du.flags |= XDP_DU_F_ZEROCOPY;
+	du.refs = refcount_read(&umem->users);
+
+	err = nla_put(nlskb, XDP_DIAG_UMEM, sizeof(du), &du);
+
+	if (!err && umem->fq)
+		err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_FILL_RING, nlskb);
+	if (!err && umem->cq) {
+		err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_COMPLETION_RING,
+					nlskb);
+	}
+	return err;
+}
+
+static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb,
+			 struct xdp_diag_req *req,
+			 struct user_namespace *user_ns,
+			 u32 portid, u32 seq, u32 flags, int sk_ino)
+{
+	struct xdp_sock *xs = xdp_sk(sk);
+	struct xdp_diag_msg *msg;
+	struct nlmsghdr *nlh;
+
+	nlh = nlmsg_put(nlskb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*msg),
+			flags);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	msg = nlmsg_data(nlh);
+	msg->xdiag_family = AF_XDP;
+	msg->xdiag_type = sk->sk_type;
+	msg->xdiag_ino = sk_ino;
+	sock_diag_save_cookie(sk, msg->xdiag_cookie);
+
+	if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb))
+		goto out_nlmsg_trim;
+
+	if ((req->xdiag_show & XDP_SHOW_INFO) &&
+	    nla_put_u32(nlskb, XDP_DIAG_UID,
+			from_kuid_munged(user_ns, sock_i_uid(sk))))
+		goto out_nlmsg_trim;
+
+	if ((req->xdiag_show & XDP_SHOW_RING_CFG) &&
+	    xsk_diag_put_rings_cfg(xs, nlskb))
+		goto out_nlmsg_trim;
+
+	if ((req->xdiag_show & XDP_SHOW_UMEM) &&
+	    xsk_diag_put_umem(xs, nlskb))
+		goto out_nlmsg_trim;
+
+	if ((req->xdiag_show & XDP_SHOW_MEMINFO) &&
+	    sock_diag_put_meminfo(sk, nlskb, XDP_DIAG_MEMINFO))
+		goto out_nlmsg_trim;
+
+	nlmsg_end(nlskb, nlh);
+	return 0;
+
+out_nlmsg_trim:
+	nlmsg_cancel(nlskb, nlh);
+	return -EMSGSIZE;
+}
+
+static int xsk_diag_dump(struct sk_buff *nlskb, struct netlink_callback *cb)
+{
+	struct xdp_diag_req *req = nlmsg_data(cb->nlh);
+	struct net *net = sock_net(nlskb->sk);
+	int num = 0, s_num = cb->args[0];
+	struct sock *sk;
+
+	mutex_lock(&net->xdp.lock);
+
+	sk_for_each(sk, &net->xdp.list) {
+		if (!net_eq(sock_net(sk), net))
+			continue;
+		if (num++ < s_num)
+			continue;
+
+		if (xsk_diag_fill(sk, nlskb, req,
+				  sk_user_ns(NETLINK_CB(cb->skb).sk),
+				  NETLINK_CB(cb->skb).portid,
+				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
+				  sock_i_ino(sk)) < 0) {
+			num--;
+			break;
+		}
+	}
+
+	mutex_unlock(&net->xdp.lock);
+	cb->args[0] = num;
+	return nlskb->len;
+}
+
+static int xsk_diag_handler_dump(struct sk_buff *nlskb, struct nlmsghdr *hdr)
+{
+	struct netlink_dump_control c = { .dump = xsk_diag_dump };
+	int hdrlen = sizeof(struct xdp_diag_req);
+	struct net *net = sock_net(nlskb->sk);
+	struct xdp_diag_req *req;
+
+	if (nlmsg_len(hdr) < hdrlen)
+		return -EINVAL;
+
+	if (!(hdr->nlmsg_flags & NLM_F_DUMP))
+		return -EOPNOTSUPP;
+
+	req = nlmsg_data(hdr);
+	return netlink_dump_start(net->diag_nlsk, nlskb, hdr, &c);
+}
+
+static const struct sock_diag_handler xsk_diag_handler = {
+	.family = AF_XDP,
+	.dump = xsk_diag_handler_dump,
+};
+
+static int __init xsk_diag_init(void)
+{
+	return sock_diag_register(&xsk_diag_handler);
+}
+
+static void __exit xsk_diag_exit(void)
+{
+	sock_diag_unregister(&xsk_diag_handler);
+}
+
+module_init(xsk_diag_init);
+module_exit(xsk_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_XDP);
-- 
2.19.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH bpf-next 3/3] xsk: add sock_diag interface for AF_XDP
  2019-01-18 13:03 ` [PATCH bpf-next 3/3] xsk: add sock_diag interface for AF_XDP bjorn.topel
@ 2019-01-23 13:19   ` Daniel Borkmann
  2019-01-23 14:24     ` Björn Töpel
  0 siblings, 1 reply; 8+ messages in thread
From: Daniel Borkmann @ 2019-01-23 13:19 UTC (permalink / raw)
  To: bjorn.topel, ast, netdev
  Cc: Björn Töpel, magnus.karlsson, magnus.karlsson

On 01/18/2019 02:03 PM, bjorn.topel@gmail.com wrote:
> From: Björn Töpel <bjorn.topel@intel.com>
> 
> This patch adds the sock_diag interface for querying sockets from user
> space. Tools like iproute2 ss(8) can use this interface to list open
> AF_XDP sockets.
> 
> The user-space ABI is defined in linux/xdp_diag.h and includes netlink
> request and response structs. The request can query sockets and the
> response contains socket information about the rings, umems, inode and
> more.
> 
> Signed-off-by: Björn Töpel <bjorn.topel@intel.com>

Series looks good, few minor nits inline:

> ---
>  include/uapi/linux/xdp_diag.h |  72 +++++++++++++
>  net/xdp/Kconfig               |   8 ++
>  net/xdp/Makefile              |   1 +
>  net/xdp/xsk.c                 |   6 +-
>  net/xdp/xsk.h                 |  12 +++
>  net/xdp/xsk_diag.c            | 192 ++++++++++++++++++++++++++++++++++
>  6 files changed, 286 insertions(+), 5 deletions(-)
>  create mode 100644 include/uapi/linux/xdp_diag.h
>  create mode 100644 net/xdp/xsk.h
>  create mode 100644 net/xdp/xsk_diag.c
> 
> diff --git a/include/uapi/linux/xdp_diag.h b/include/uapi/linux/xdp_diag.h
> new file mode 100644
> index 000000000000..efe8ce281dce
> --- /dev/null
> +++ b/include/uapi/linux/xdp_diag.h
> @@ -0,0 +1,72 @@
> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> +/*
> + * xdp_diag: interface for query/monitor XDP sockets
> + * Copyright(c) 2019 Intel Corporation.
> + */
> +
> +#ifndef _LINUX_XDP_DIAG_H
> +#define _LINUX_XDP_DIAG_H
> +
> +#include <linux/types.h>
> +
> +struct xdp_diag_req {
> +	__u8	sdiag_family;
> +	__u8	sdiag_protocol;
> +	__u16	pad;

Presumably this one is for future use? Maybe better as '__u16 :16;' to
avoid compile errors if someone tries to zero 'pad' member manually?

> +	__u32	xdiag_ino;
> +	__u32	xdiag_show;
> +	__u32	xdiag_cookie[2];
> +};
> +
> +struct xdp_diag_msg {
> +	__u8	xdiag_family;
> +	__u8	xdiag_type;
> +	__u32	xdiag_ino;
> +	__u32	xdiag_cookie[2];
> +};
> +
> +#define XDP_SHOW_INFO		(1 << 0) /* Basic information */
> +#define XDP_SHOW_RING_CFG	(1 << 1)
> +#define XDP_SHOW_UMEM		(1 << 2)
> +#define XDP_SHOW_MEMINFO	(1 << 3)
> +
> +enum {
> +	XDP_DIAG_NONE,
> +	XDP_DIAG_INFO,
> +	XDP_DIAG_UID,
> +	XDP_DIAG_RX_RING,
> +	XDP_DIAG_TX_RING,
> +	XDP_DIAG_UMEM,
> +	XDP_DIAG_UMEM_FILL_RING,
> +	XDP_DIAG_UMEM_COMPLETION_RING,
> +	XDP_DIAG_MEMINFO,
> +	__XDP_DIAG_MAX,
> +};
> +
> +#define XDP_DIAG_MAX (__XDP_DIAG_MAX - 1)
> +
> +struct xdp_diag_info {
> +	__u32	ifindex;
> +	__u32	queue_id;
> +};
> +
> +struct xdp_diag_ring {
> +	__u32	entries; /*num descs */
> +};
> +
> +#define XDP_DU_F_ZEROCOPY (1 << 0)
> +
> +struct xdp_diag_umem {
> +	__u64	size;
> +	__u32	id;
> +	__u32	num_pages;
> +	__u32	chunk_size;
> +	__u32	headroom;
> +	__u32	ifindex;
> +	__u32	queue_id;
> +	__u32	flags;
> +	__u32	refs;
> +};
> +
> +

Nit: one newline too much

> +#endif /* _LINUX_XDP_DIAG_H */
> diff --git a/net/xdp/Kconfig b/net/xdp/Kconfig
> index 90e4a7152854..0255b33cff4b 100644
> --- a/net/xdp/Kconfig
> +++ b/net/xdp/Kconfig
> @@ -5,3 +5,11 @@ config XDP_SOCKETS
>  	help
>  	  XDP sockets allows a channel between XDP programs and
>  	  userspace applications.
> +
> +config XDP_SOCKETS_DIAG
> +	tristate "XDP sockets: monitoring interface"
> +	depends on XDP_SOCKETS
> +	default n
> +	help
> +	  Support for PF_XDP sockets monitoring interface used by the ss tool.
> +	  If unsure, say Y.
> diff --git a/net/xdp/Makefile b/net/xdp/Makefile
> index 04f073146256..59dbfdf93dca 100644
> --- a/net/xdp/Makefile
> +++ b/net/xdp/Makefile
> @@ -1 +1,2 @@
>  obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o
> +obj-$(CONFIG_XDP_SOCKETS_DIAG) += xsk_diag.o
> diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
> index 80ca48cefc42..949d3bbccb2f 100644
> --- a/net/xdp/xsk.c
> +++ b/net/xdp/xsk.c
> @@ -27,14 +27,10 @@
>  
>  #include "xsk_queue.h"
>  #include "xdp_umem.h"
> +#include "xsk.h"
>  
>  #define TX_BATCH_SIZE 16
>  
> -static struct xdp_sock *xdp_sk(struct sock *sk)
> -{
> -	return (struct xdp_sock *)sk;
> -}
> -
>  bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
>  {
>  	return READ_ONCE(xs->rx) &&  READ_ONCE(xs->umem) &&
> diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h
> new file mode 100644
> index 000000000000..ba8120610426
> --- /dev/null
> +++ b/net/xdp/xsk.h
> @@ -0,0 +1,12 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/* Copyright(c) 2019 Intel Corporation. */
> +
> +#ifndef XSK_H_
> +#define XSK_H_
> +
> +static inline struct xdp_sock *xdp_sk(struct sock *sk)
> +{
> +	return (struct xdp_sock *)sk;
> +}
> +
> +#endif /* XSK_H_ */
> diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
> new file mode 100644
> index 000000000000..2585d7a4ac2f
> --- /dev/null
> +++ b/net/xdp/xsk_diag.c
> @@ -0,0 +1,192 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* XDP sockets monitoring support
> + *
> + * Copyright(c) 2019 Intel Corporation.
> + *
> + * Author: Björn Töpel <bjorn.topel@intel.com>
> + */
> +
> +#include <linux/module.h>
> +#include <net/xdp_sock.h>
> +#include <linux/xdp_diag.h>
> +#include <linux/sock_diag.h>
> +
> +#include "xsk_queue.h"
> +#include "xsk.h"
> +
> +static int xsk_diag_put_info(const struct xdp_sock *xs, struct sk_buff *nlskb)
> +{
> +	struct xdp_diag_info di;

Not a bug, but can we generally zero all the structs on stack such that once
we extend them nothing gets potentially leaked.

> +	di.ifindex = xs->dev ? xs->dev->ifindex : 0;
> +	di.queue_id = xs->queue_id;
> +	return nla_put(nlskb, XDP_DIAG_INFO, sizeof(di), &di);
> +}
> +
> +static int xsk_diag_put_ring(const struct xsk_queue *queue, int nl_type,
> +			     struct sk_buff *nlskb)
> +{
> +	struct xdp_diag_ring dr;

Ditto.

> +	dr.entries = queue->nentries;
> +	return nla_put(nlskb, nl_type, sizeof(dr), &dr);
> +}
> +
> +static int xsk_diag_put_rings_cfg(const struct xdp_sock *xs,
> +				  struct sk_buff *nlskb)
> +{
> +	int err = 0;
> +
> +	if (xs->rx)
> +		err = xsk_diag_put_ring(xs->rx, XDP_DIAG_RX_RING, nlskb);
> +	if (!err && xs->tx)
> +		err = xsk_diag_put_ring(xs->tx, XDP_DIAG_TX_RING, nlskb);
> +	return err;
> +}
> +
> +static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
> +{
> +	struct xdp_umem *umem = xs->umem;
> +	struct xdp_diag_umem du;

Ditto

> +	int err;
> +
> +	if (!umem)
> +		return 0;
> +
> +	du.id = umem->id;
> +	du.size = umem->size;
> +	du.num_pages = umem->npgs;
> +	du.chunk_size = (__u32)(~umem->chunk_mask + 1);
> +	du.headroom = umem->headroom;
> +	du.ifindex = umem->dev ? umem->dev->ifindex : 0;
> +	du.queue_id = umem->queue_id;
> +	du.flags = 0;
> +	if (umem->zc)
> +		du.flags |= XDP_DU_F_ZEROCOPY;
> +	du.refs = refcount_read(&umem->users);
> +
> +	err = nla_put(nlskb, XDP_DIAG_UMEM, sizeof(du), &du);
> +
> +	if (!err && umem->fq)
> +		err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_FILL_RING, nlskb);
> +	if (!err && umem->cq) {
> +		err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_COMPLETION_RING,
> +					nlskb);
> +	}
> +	return err;
> +}
> +
> +static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb,
> +			 struct xdp_diag_req *req,
> +			 struct user_namespace *user_ns,
> +			 u32 portid, u32 seq, u32 flags, int sk_ino)
> +{
> +	struct xdp_sock *xs = xdp_sk(sk);
> +	struct xdp_diag_msg *msg;
> +	struct nlmsghdr *nlh;
> +
> +	nlh = nlmsg_put(nlskb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*msg),
> +			flags);
> +	if (!nlh)
> +		return -EMSGSIZE;
> +
> +	msg = nlmsg_data(nlh);
> +	msg->xdiag_family = AF_XDP;
> +	msg->xdiag_type = sk->sk_type;
> +	msg->xdiag_ino = sk_ino;
> +	sock_diag_save_cookie(sk, msg->xdiag_cookie);

Don't we have a hole in struct xdp_diag_msg after xdiag_type? Probably better
to memset everything in the beginning as well.

> +	if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb))
> +		goto out_nlmsg_trim;
> +
> +	if ((req->xdiag_show & XDP_SHOW_INFO) &&
> +	    nla_put_u32(nlskb, XDP_DIAG_UID,
> +			from_kuid_munged(user_ns, sock_i_uid(sk))))
> +		goto out_nlmsg_trim;
> +
> +	if ((req->xdiag_show & XDP_SHOW_RING_CFG) &&
> +	    xsk_diag_put_rings_cfg(xs, nlskb))
> +		goto out_nlmsg_trim;
> +
> +	if ((req->xdiag_show & XDP_SHOW_UMEM) &&
> +	    xsk_diag_put_umem(xs, nlskb))
> +		goto out_nlmsg_trim;
> +
> +	if ((req->xdiag_show & XDP_SHOW_MEMINFO) &&
> +	    sock_diag_put_meminfo(sk, nlskb, XDP_DIAG_MEMINFO))
> +		goto out_nlmsg_trim;
> +
> +	nlmsg_end(nlskb, nlh);
> +	return 0;
> +
> +out_nlmsg_trim:
> +	nlmsg_cancel(nlskb, nlh);
> +	return -EMSGSIZE;
> +}
> +
> +static int xsk_diag_dump(struct sk_buff *nlskb, struct netlink_callback *cb)
> +{
> +	struct xdp_diag_req *req = nlmsg_data(cb->nlh);
> +	struct net *net = sock_net(nlskb->sk);
> +	int num = 0, s_num = cb->args[0];
> +	struct sock *sk;
> +
> +	mutex_lock(&net->xdp.lock);
> +
> +	sk_for_each(sk, &net->xdp.list) {
> +		if (!net_eq(sock_net(sk), net))
> +			continue;
> +		if (num++ < s_num)
> +			continue;
> +
> +		if (xsk_diag_fill(sk, nlskb, req,
> +				  sk_user_ns(NETLINK_CB(cb->skb).sk),
> +				  NETLINK_CB(cb->skb).portid,
> +				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
> +				  sock_i_ino(sk)) < 0) {
> +			num--;
> +			break;
> +		}
> +	}
> +
> +	mutex_unlock(&net->xdp.lock);
> +	cb->args[0] = num;
> +	return nlskb->len;
> +}
> +
> +static int xsk_diag_handler_dump(struct sk_buff *nlskb, struct nlmsghdr *hdr)
> +{
> +	struct netlink_dump_control c = { .dump = xsk_diag_dump };
> +	int hdrlen = sizeof(struct xdp_diag_req);
> +	struct net *net = sock_net(nlskb->sk);
> +	struct xdp_diag_req *req;
> +
> +	if (nlmsg_len(hdr) < hdrlen)
> +		return -EINVAL;
> +
> +	if (!(hdr->nlmsg_flags & NLM_F_DUMP))
> +		return -EOPNOTSUPP;
> +
> +	req = nlmsg_data(hdr);
> +	return netlink_dump_start(net->diag_nlsk, nlskb, hdr, &c);
> +}
> +
> +static const struct sock_diag_handler xsk_diag_handler = {
> +	.family = AF_XDP,
> +	.dump = xsk_diag_handler_dump,
> +};
> +
> +static int __init xsk_diag_init(void)
> +{
> +	return sock_diag_register(&xsk_diag_handler);
> +}
> +
> +static void __exit xsk_diag_exit(void)
> +{
> +	sock_diag_unregister(&xsk_diag_handler);
> +}
> +
> +module_init(xsk_diag_init);
> +module_exit(xsk_diag_exit);
> +MODULE_LICENSE("GPL");
> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_XDP);
> 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH bpf-next 3/3] xsk: add sock_diag interface for AF_XDP
  2019-01-23 13:19   ` Daniel Borkmann
@ 2019-01-23 14:24     ` Björn Töpel
  2019-01-24 13:08       ` Daniel Borkmann
  0 siblings, 1 reply; 8+ messages in thread
From: Björn Töpel @ 2019-01-23 14:24 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: ast, Netdev, Björn Töpel, Karlsson, Magnus, Magnus Karlsson

Den ons 23 jan. 2019 kl 14:19 skrev Daniel Borkmann <daniel@iogearbox.net>:
>
> On 01/18/2019 02:03 PM, bjorn.topel@gmail.com wrote:
> > From: Björn Töpel <bjorn.topel@intel.com>
> >
> > This patch adds the sock_diag interface for querying sockets from user
> > space. Tools like iproute2 ss(8) can use this interface to list open
> > AF_XDP sockets.
> >
> > The user-space ABI is defined in linux/xdp_diag.h and includes netlink
> > request and response structs. The request can query sockets and the
> > response contains socket information about the rings, umems, inode and
> > more.
> >
> > Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
>
> Series looks good, few minor nits inline:
>
> > ---
> >  include/uapi/linux/xdp_diag.h |  72 +++++++++++++
> >  net/xdp/Kconfig               |   8 ++
> >  net/xdp/Makefile              |   1 +
> >  net/xdp/xsk.c                 |   6 +-
> >  net/xdp/xsk.h                 |  12 +++
> >  net/xdp/xsk_diag.c            | 192 ++++++++++++++++++++++++++++++++++
> >  6 files changed, 286 insertions(+), 5 deletions(-)
> >  create mode 100644 include/uapi/linux/xdp_diag.h
> >  create mode 100644 net/xdp/xsk.h
> >  create mode 100644 net/xdp/xsk_diag.c
> >
> > diff --git a/include/uapi/linux/xdp_diag.h b/include/uapi/linux/xdp_diag.h
> > new file mode 100644
> > index 000000000000..efe8ce281dce
> > --- /dev/null
> > +++ b/include/uapi/linux/xdp_diag.h
> > @@ -0,0 +1,72 @@
> > +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> > +/*
> > + * xdp_diag: interface for query/monitor XDP sockets
> > + * Copyright(c) 2019 Intel Corporation.
> > + */
> > +
> > +#ifndef _LINUX_XDP_DIAG_H
> > +#define _LINUX_XDP_DIAG_H
> > +
> > +#include <linux/types.h>
> > +
> > +struct xdp_diag_req {
> > +     __u8    sdiag_family;
> > +     __u8    sdiag_protocol;
> > +     __u16   pad;
>
> Presumably this one is for future use? Maybe better as '__u16 :16;' to
> avoid compile errors if someone tries to zero 'pad' member manually?
>

The "pad" was there simply to have an explicitly named structure hole.
I'm not following the bitfield argument. Why does that avoid compiler
errors?

> > +     __u32   xdiag_ino;
> > +     __u32   xdiag_show;
> > +     __u32   xdiag_cookie[2];
> > +};
> > +
> > +struct xdp_diag_msg {
> > +     __u8    xdiag_family;
> > +     __u8    xdiag_type;
> > +     __u32   xdiag_ino;
> > +     __u32   xdiag_cookie[2];
> > +};
> > +
> > +#define XDP_SHOW_INFO                (1 << 0) /* Basic information */
> > +#define XDP_SHOW_RING_CFG    (1 << 1)
> > +#define XDP_SHOW_UMEM                (1 << 2)
> > +#define XDP_SHOW_MEMINFO     (1 << 3)
> > +
> > +enum {
> > +     XDP_DIAG_NONE,
> > +     XDP_DIAG_INFO,
> > +     XDP_DIAG_UID,
> > +     XDP_DIAG_RX_RING,
> > +     XDP_DIAG_TX_RING,
> > +     XDP_DIAG_UMEM,
> > +     XDP_DIAG_UMEM_FILL_RING,
> > +     XDP_DIAG_UMEM_COMPLETION_RING,
> > +     XDP_DIAG_MEMINFO,
> > +     __XDP_DIAG_MAX,
> > +};
> > +
> > +#define XDP_DIAG_MAX (__XDP_DIAG_MAX - 1)
> > +
> > +struct xdp_diag_info {
> > +     __u32   ifindex;
> > +     __u32   queue_id;
> > +};
> > +
> > +struct xdp_diag_ring {
> > +     __u32   entries; /*num descs */
> > +};
> > +
> > +#define XDP_DU_F_ZEROCOPY (1 << 0)
> > +
> > +struct xdp_diag_umem {
> > +     __u64   size;
> > +     __u32   id;
> > +     __u32   num_pages;
> > +     __u32   chunk_size;
> > +     __u32   headroom;
> > +     __u32   ifindex;
> > +     __u32   queue_id;
> > +     __u32   flags;
> > +     __u32   refs;
> > +};
> > +
> > +
>
> Nit: one newline too much
>

Good catch!

> > +#endif /* _LINUX_XDP_DIAG_H */
> > diff --git a/net/xdp/Kconfig b/net/xdp/Kconfig
> > index 90e4a7152854..0255b33cff4b 100644
> > --- a/net/xdp/Kconfig
> > +++ b/net/xdp/Kconfig
> > @@ -5,3 +5,11 @@ config XDP_SOCKETS
> >       help
> >         XDP sockets allows a channel between XDP programs and
> >         userspace applications.
> > +
> > +config XDP_SOCKETS_DIAG
> > +     tristate "XDP sockets: monitoring interface"
> > +     depends on XDP_SOCKETS
> > +     default n
> > +     help
> > +       Support for PF_XDP sockets monitoring interface used by the ss tool.
> > +       If unsure, say Y.
> > diff --git a/net/xdp/Makefile b/net/xdp/Makefile
> > index 04f073146256..59dbfdf93dca 100644
> > --- a/net/xdp/Makefile
> > +++ b/net/xdp/Makefile
> > @@ -1 +1,2 @@
> >  obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o
> > +obj-$(CONFIG_XDP_SOCKETS_DIAG) += xsk_diag.o
> > diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
> > index 80ca48cefc42..949d3bbccb2f 100644
> > --- a/net/xdp/xsk.c
> > +++ b/net/xdp/xsk.c
> > @@ -27,14 +27,10 @@
> >
> >  #include "xsk_queue.h"
> >  #include "xdp_umem.h"
> > +#include "xsk.h"
> >
> >  #define TX_BATCH_SIZE 16
> >
> > -static struct xdp_sock *xdp_sk(struct sock *sk)
> > -{
> > -     return (struct xdp_sock *)sk;
> > -}
> > -
> >  bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
> >  {
> >       return READ_ONCE(xs->rx) &&  READ_ONCE(xs->umem) &&
> > diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h
> > new file mode 100644
> > index 000000000000..ba8120610426
> > --- /dev/null
> > +++ b/net/xdp/xsk.h
> > @@ -0,0 +1,12 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +/* Copyright(c) 2019 Intel Corporation. */
> > +
> > +#ifndef XSK_H_
> > +#define XSK_H_
> > +
> > +static inline struct xdp_sock *xdp_sk(struct sock *sk)
> > +{
> > +     return (struct xdp_sock *)sk;
> > +}
> > +
> > +#endif /* XSK_H_ */
> > diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
> > new file mode 100644
> > index 000000000000..2585d7a4ac2f
> > --- /dev/null
> > +++ b/net/xdp/xsk_diag.c
> > @@ -0,0 +1,192 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/* XDP sockets monitoring support
> > + *
> > + * Copyright(c) 2019 Intel Corporation.
> > + *
> > + * Author: Björn Töpel <bjorn.topel@intel.com>
> > + */
> > +
> > +#include <linux/module.h>
> > +#include <net/xdp_sock.h>
> > +#include <linux/xdp_diag.h>
> > +#include <linux/sock_diag.h>
> > +
> > +#include "xsk_queue.h"
> > +#include "xsk.h"
> > +
> > +static int xsk_diag_put_info(const struct xdp_sock *xs, struct sk_buff *nlskb)
> > +{
> > +     struct xdp_diag_info di;
>
> Not a bug, but can we generally zero all the structs on stack such that once
> we extend them nothing gets potentially leaked.
>

Makes sense! Will fix here and...

> > +     di.ifindex = xs->dev ? xs->dev->ifindex : 0;
> > +     di.queue_id = xs->queue_id;
> > +     return nla_put(nlskb, XDP_DIAG_INFO, sizeof(di), &di);
> > +}
> > +
> > +static int xsk_diag_put_ring(const struct xsk_queue *queue, int nl_type,
> > +                          struct sk_buff *nlskb)
> > +{
> > +     struct xdp_diag_ring dr;
>
> Ditto.
>

...here...

> > +     dr.entries = queue->nentries;
> > +     return nla_put(nlskb, nl_type, sizeof(dr), &dr);
> > +}
> > +
> > +static int xsk_diag_put_rings_cfg(const struct xdp_sock *xs,
> > +                               struct sk_buff *nlskb)
> > +{
> > +     int err = 0;
> > +
> > +     if (xs->rx)
> > +             err = xsk_diag_put_ring(xs->rx, XDP_DIAG_RX_RING, nlskb);
> > +     if (!err && xs->tx)
> > +             err = xsk_diag_put_ring(xs->tx, XDP_DIAG_TX_RING, nlskb);
> > +     return err;
> > +}
> > +
> > +static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
> > +{
> > +     struct xdp_umem *umem = xs->umem;
> > +     struct xdp_diag_umem du;
>
> Ditto
>

...and here.

> > +     int err;
> > +
> > +     if (!umem)
> > +             return 0;
> > +
> > +     du.id = umem->id;
> > +     du.size = umem->size;
> > +     du.num_pages = umem->npgs;
> > +     du.chunk_size = (__u32)(~umem->chunk_mask + 1);
> > +     du.headroom = umem->headroom;
> > +     du.ifindex = umem->dev ? umem->dev->ifindex : 0;
> > +     du.queue_id = umem->queue_id;
> > +     du.flags = 0;
> > +     if (umem->zc)
> > +             du.flags |= XDP_DU_F_ZEROCOPY;
> > +     du.refs = refcount_read(&umem->users);
> > +
> > +     err = nla_put(nlskb, XDP_DIAG_UMEM, sizeof(du), &du);
> > +
> > +     if (!err && umem->fq)
> > +             err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_FILL_RING, nlskb);
> > +     if (!err && umem->cq) {
> > +             err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_COMPLETION_RING,
> > +                                     nlskb);
> > +     }
> > +     return err;
> > +}
> > +
> > +static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb,
> > +                      struct xdp_diag_req *req,
> > +                      struct user_namespace *user_ns,
> > +                      u32 portid, u32 seq, u32 flags, int sk_ino)
> > +{
> > +     struct xdp_sock *xs = xdp_sk(sk);
> > +     struct xdp_diag_msg *msg;
> > +     struct nlmsghdr *nlh;
> > +
> > +     nlh = nlmsg_put(nlskb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*msg),
> > +                     flags);
> > +     if (!nlh)
> > +             return -EMSGSIZE;
> > +
> > +     msg = nlmsg_data(nlh);
> > +     msg->xdiag_family = AF_XDP;
> > +     msg->xdiag_type = sk->sk_type;
> > +     msg->xdiag_ino = sk_ino;
> > +     sock_diag_save_cookie(sk, msg->xdiag_cookie);
>
> Don't we have a hole in struct xdp_diag_msg after xdiag_type? Probably better
> to memset everything in the beginning as well.
>

You're right! Good catch. In general for uapi structures; Explicitly
named "holes" or not? The pad in req is exactly that. So, either add a
pad to _msg or remove in _req. What is preferred?

I'll spin up a v2.


Thanks for looking at the code!
Björn

> > +     if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb))
> > +             goto out_nlmsg_trim;
> > +
> > +     if ((req->xdiag_show & XDP_SHOW_INFO) &&
> > +         nla_put_u32(nlskb, XDP_DIAG_UID,
> > +                     from_kuid_munged(user_ns, sock_i_uid(sk))))
> > +             goto out_nlmsg_trim;
> > +
> > +     if ((req->xdiag_show & XDP_SHOW_RING_CFG) &&
> > +         xsk_diag_put_rings_cfg(xs, nlskb))
> > +             goto out_nlmsg_trim;
> > +
> > +     if ((req->xdiag_show & XDP_SHOW_UMEM) &&
> > +         xsk_diag_put_umem(xs, nlskb))
> > +             goto out_nlmsg_trim;
> > +
> > +     if ((req->xdiag_show & XDP_SHOW_MEMINFO) &&
> > +         sock_diag_put_meminfo(sk, nlskb, XDP_DIAG_MEMINFO))
> > +             goto out_nlmsg_trim;
> > +
> > +     nlmsg_end(nlskb, nlh);
> > +     return 0;
> > +
> > +out_nlmsg_trim:
> > +     nlmsg_cancel(nlskb, nlh);
> > +     return -EMSGSIZE;
> > +}
> > +
> > +static int xsk_diag_dump(struct sk_buff *nlskb, struct netlink_callback *cb)
> > +{
> > +     struct xdp_diag_req *req = nlmsg_data(cb->nlh);
> > +     struct net *net = sock_net(nlskb->sk);
> > +     int num = 0, s_num = cb->args[0];
> > +     struct sock *sk;
> > +
> > +     mutex_lock(&net->xdp.lock);
> > +
> > +     sk_for_each(sk, &net->xdp.list) {
> > +             if (!net_eq(sock_net(sk), net))
> > +                     continue;
> > +             if (num++ < s_num)
> > +                     continue;
> > +
> > +             if (xsk_diag_fill(sk, nlskb, req,
> > +                               sk_user_ns(NETLINK_CB(cb->skb).sk),
> > +                               NETLINK_CB(cb->skb).portid,
> > +                               cb->nlh->nlmsg_seq, NLM_F_MULTI,
> > +                               sock_i_ino(sk)) < 0) {
> > +                     num--;
> > +                     break;
> > +             }
> > +     }
> > +
> > +     mutex_unlock(&net->xdp.lock);
> > +     cb->args[0] = num;
> > +     return nlskb->len;
> > +}
> > +
> > +static int xsk_diag_handler_dump(struct sk_buff *nlskb, struct nlmsghdr *hdr)
> > +{
> > +     struct netlink_dump_control c = { .dump = xsk_diag_dump };
> > +     int hdrlen = sizeof(struct xdp_diag_req);
> > +     struct net *net = sock_net(nlskb->sk);
> > +     struct xdp_diag_req *req;
> > +
> > +     if (nlmsg_len(hdr) < hdrlen)
> > +             return -EINVAL;
> > +
> > +     if (!(hdr->nlmsg_flags & NLM_F_DUMP))
> > +             return -EOPNOTSUPP;
> > +
> > +     req = nlmsg_data(hdr);
> > +     return netlink_dump_start(net->diag_nlsk, nlskb, hdr, &c);
> > +}
> > +
> > +static const struct sock_diag_handler xsk_diag_handler = {
> > +     .family = AF_XDP,
> > +     .dump = xsk_diag_handler_dump,
> > +};
> > +
> > +static int __init xsk_diag_init(void)
> > +{
> > +     return sock_diag_register(&xsk_diag_handler);
> > +}
> > +
> > +static void __exit xsk_diag_exit(void)
> > +{
> > +     sock_diag_unregister(&xsk_diag_handler);
> > +}
> > +
> > +module_init(xsk_diag_init);
> > +module_exit(xsk_diag_exit);
> > +MODULE_LICENSE("GPL");
> > +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_XDP);
> >
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH bpf-next 3/3] xsk: add sock_diag interface for AF_XDP
  2019-01-23 14:24     ` Björn Töpel
@ 2019-01-24 13:08       ` Daniel Borkmann
  2019-01-24 13:15         ` Björn Töpel
  0 siblings, 1 reply; 8+ messages in thread
From: Daniel Borkmann @ 2019-01-24 13:08 UTC (permalink / raw)
  To: Björn Töpel
  Cc: ast, Netdev, Björn Töpel, Karlsson, Magnus, Magnus Karlsson

On 01/23/2019 03:24 PM, Björn Töpel wrote:
> Den ons 23 jan. 2019 kl 14:19 skrev Daniel Borkmann <daniel@iogearbox.net>:
>>
>> On 01/18/2019 02:03 PM, bjorn.topel@gmail.com wrote:
>>> From: Björn Töpel <bjorn.topel@intel.com>
>>>
>>> This patch adds the sock_diag interface for querying sockets from user
>>> space. Tools like iproute2 ss(8) can use this interface to list open
>>> AF_XDP sockets.
>>>
>>> The user-space ABI is defined in linux/xdp_diag.h and includes netlink
>>> request and response structs. The request can query sockets and the
>>> response contains socket information about the rings, umems, inode and
>>> more.
>>>
>>> Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
>>
>> Series looks good, few minor nits inline:
>>
>>> ---
>>>  include/uapi/linux/xdp_diag.h |  72 +++++++++++++
>>>  net/xdp/Kconfig               |   8 ++
>>>  net/xdp/Makefile              |   1 +
>>>  net/xdp/xsk.c                 |   6 +-
>>>  net/xdp/xsk.h                 |  12 +++
>>>  net/xdp/xsk_diag.c            | 192 ++++++++++++++++++++++++++++++++++
>>>  6 files changed, 286 insertions(+), 5 deletions(-)
>>>  create mode 100644 include/uapi/linux/xdp_diag.h
>>>  create mode 100644 net/xdp/xsk.h
>>>  create mode 100644 net/xdp/xsk_diag.c
>>>
>>> diff --git a/include/uapi/linux/xdp_diag.h b/include/uapi/linux/xdp_diag.h
>>> new file mode 100644
>>> index 000000000000..efe8ce281dce
>>> --- /dev/null
>>> +++ b/include/uapi/linux/xdp_diag.h
>>> @@ -0,0 +1,72 @@
>>> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
>>> +/*
>>> + * xdp_diag: interface for query/monitor XDP sockets
>>> + * Copyright(c) 2019 Intel Corporation.
>>> + */
>>> +
>>> +#ifndef _LINUX_XDP_DIAG_H
>>> +#define _LINUX_XDP_DIAG_H
>>> +
>>> +#include <linux/types.h>
>>> +
>>> +struct xdp_diag_req {
>>> +     __u8    sdiag_family;
>>> +     __u8    sdiag_protocol;
>>> +     __u16   pad;
>>
>> Presumably this one is for future use? Maybe better as '__u16 :16;' to
>> avoid compile errors if someone tries to zero 'pad' member manually?
> 
> The "pad" was there simply to have an explicitly named structure hole.
> I'm not following the bitfield argument. Why does that avoid compiler
> errors?

Mostly in the sense that an application would explicitly set 'pad = 0'
whereas pad could later on potentially be renamed and reused otherwise
(which __u16 :16 would avoid in first place). But looking at other
*_diag_req structs, it's explicitly named as 'pad' elsewhere already,
then nevermind, lets have it rather consistent then and keep as is.

One small thing I still spotted when looking at it again, in function
xsk_diag_handler_dump() the req is unused.

Thanks,
Daniel

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH bpf-next 3/3] xsk: add sock_diag interface for AF_XDP
  2019-01-24 13:08       ` Daniel Borkmann
@ 2019-01-24 13:15         ` Björn Töpel
  0 siblings, 0 replies; 8+ messages in thread
From: Björn Töpel @ 2019-01-24 13:15 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: ast, Netdev, Björn Töpel, Karlsson, Magnus, Magnus Karlsson

Den tors 24 jan. 2019 kl 14:08 skrev Daniel Borkmann <daniel@iogearbox.net>:
>
[...]
> >>> +#ifndef _LINUX_XDP_DIAG_H
> >>> +#define _LINUX_XDP_DIAG_H
> >>> +
> >>> +#include <linux/types.h>
> >>> +
> >>> +struct xdp_diag_req {
> >>> +     __u8    sdiag_family;
> >>> +     __u8    sdiag_protocol;
> >>> +     __u16   pad;
> >>
> >> Presumably this one is for future use? Maybe better as '__u16 :16;' to
> >> avoid compile errors if someone tries to zero 'pad' member manually?
> >
> > The "pad" was there simply to have an explicitly named structure hole.
> > I'm not following the bitfield argument. Why does that avoid compiler
> > errors?
>
> Mostly in the sense that an application would explicitly set 'pad = 0'
> whereas pad could later on potentially be renamed and reused otherwise
> (which __u16 :16 would avoid in first place). But looking at other
> *_diag_req structs, it's explicitly named as 'pad' elsewhere already,
> then nevermind, lets have it rather consistent then and keep as is.
>

Ok, thanks for clearing it up. I'll address all your comments, and
also adds a "pad" member in msg for consistency.

> One small thing I still spotted when looking at it again, in function
> xsk_diag_handler_dump() the req is unused.
>

Ah, thank you.


Björn

> Thanks,
> Daniel

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2019-01-24 13:15 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-18 13:03 [PATCH bpf-next 0/3] AF_XDP: add socket monitoring support bjorn.topel
2019-01-18 13:03 ` [PATCH bpf-next 1/3] net: xsk: track AF_XDP sockets on a per-netns list bjorn.topel
2019-01-18 13:03 ` [PATCH bpf-next 2/3] xsk: add id to umem bjorn.topel
2019-01-18 13:03 ` [PATCH bpf-next 3/3] xsk: add sock_diag interface for AF_XDP bjorn.topel
2019-01-23 13:19   ` Daniel Borkmann
2019-01-23 14:24     ` Björn Töpel
2019-01-24 13:08       ` Daniel Borkmann
2019-01-24 13:15         ` Björn Töpel

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).