* [PATCH RFC net-next v2 01/16] mctp: Add MCTP base
2021-07-13 6:20 [PATCH RFC net-next v2 00/16] Add Management Component Transport Protocol support Jeremy Kerr
@ 2021-07-13 6:20 ` Jeremy Kerr
2021-07-13 6:20 ` [PATCH RFC net-next v2 02/16] mctp: Add base socket/protocol definitions Jeremy Kerr
` (14 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Jeremy Kerr @ 2021-07-13 6:20 UTC (permalink / raw)
To: netdev; +Cc: Matt Johnston, Andrew Jeffery
Add basic Kconfig, an initial (empty) af_mctp source object, and
{AF,PF}_MCTP definitions, and the required selinux definitions.
Signed-off-by: Jeremy Kerr <jk@codeconstruct.com.au>
---
v2:
- Add Linux-syscall-note to uapi header
- Add selinux defs for new AF_MCTP
- Controller -> component
- Don't use strict cflags; warnings are present in #includes
---
MAINTAINERS | 7 +++++++
include/linux/socket.h | 6 +++++-
include/uapi/linux/mctp.h | 15 +++++++++++++++
net/Kconfig | 1 +
net/Makefile | 1 +
net/mctp/Kconfig | 13 +++++++++++++
net/mctp/Makefile | 3 +++
net/mctp/af_mctp.c | 7 +++++++
security/selinux/hooks.c | 4 +++-
security/selinux/include/classmap.h | 4 +++-
10 files changed, 58 insertions(+), 3 deletions(-)
create mode 100644 include/uapi/linux/mctp.h
create mode 100644 net/mctp/Kconfig
create mode 100644 net/mctp/Makefile
create mode 100644 net/mctp/af_mctp.c
diff --git a/MAINTAINERS b/MAINTAINERS
index 503fd21901f1..8116fd2a0789 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10869,6 +10869,13 @@ F: drivers/mailbox/arm_mhuv2.c
F: include/linux/mailbox/arm_mhuv2_message.h
F: Documentation/devicetree/bindings/mailbox/arm,mhuv2.yaml
+MANAGEMENT COMPONENT TRANSPORT PROTOCOL (MCTP)
+M: Jeremy Kerr <jk@codeconstruct.com.au>
+M: Matt Johnston <matt@codeconstruct.com.au>
+L: netdev@vger.kernel.org
+S: Maintained
+F: net/mctp/
+
MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7
M: Michael Kerrisk <mtk.manpages@gmail.com>
L: linux-man@vger.kernel.org
diff --git a/include/linux/socket.h b/include/linux/socket.h
index b8fc5c53ba6f..14286a11589d 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -223,8 +223,11 @@ struct ucred {
* reuses AF_INET address family
*/
#define AF_XDP 44 /* XDP sockets */
+#define AF_MCTP 45 /* Management component
+ * transport protocol
+ */
-#define AF_MAX 45 /* For now.. */
+#define AF_MAX 46 /* For now.. */
/* Protocol families, same as address families. */
#define PF_UNSPEC AF_UNSPEC
@@ -274,6 +277,7 @@ struct ucred {
#define PF_QIPCRTR AF_QIPCRTR
#define PF_SMC AF_SMC
#define PF_XDP AF_XDP
+#define PF_MCTP AF_MCTP
#define PF_MAX AF_MAX
/* Maximum queue length specifiable by listen. */
diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h
new file mode 100644
index 000000000000..2640a589c14c
--- /dev/null
+++ b/include/uapi/linux/mctp.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Management Component Transport Protocol (MCTP)
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#ifndef __UAPI_MCTP_H
+#define __UAPI_MCTP_H
+
+struct sockaddr_mctp {
+};
+
+#endif /* __UAPI_MCTP_H */
diff --git a/net/Kconfig b/net/Kconfig
index c7392c449b25..fb13460c6dab 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -363,6 +363,7 @@ source "net/bluetooth/Kconfig"
source "net/rxrpc/Kconfig"
source "net/kcm/Kconfig"
source "net/strparser/Kconfig"
+source "net/mctp/Kconfig"
config FIB_RULES
bool
diff --git a/net/Makefile b/net/Makefile
index 9ca9572188fe..fbfeb8a0bb37 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -78,3 +78,4 @@ obj-$(CONFIG_QRTR) += qrtr/
obj-$(CONFIG_NET_NCSI) += ncsi/
obj-$(CONFIG_XDP_SOCKETS) += xdp/
obj-$(CONFIG_MPTCP) += mptcp/
+obj-$(CONFIG_MCTP) += mctp/
diff --git a/net/mctp/Kconfig b/net/mctp/Kconfig
new file mode 100644
index 000000000000..2cdf3d0a28c9
--- /dev/null
+++ b/net/mctp/Kconfig
@@ -0,0 +1,13 @@
+
+menuconfig MCTP
+ depends on NET
+ tristate "MCTP core protocol support"
+ help
+ Management Component Transport Protocol (MCTP) is an in-system
+ protocol for communicating between management controllers and
+ their managed devices (peripherals, host processors, etc.). The
+ protocol is defined by DMTF specification DSP0236.
+
+ This option enables core MCTP support. For communicating with other
+ devices, you'll want to enable a driver for a specific hardware
+ channel.
diff --git a/net/mctp/Makefile b/net/mctp/Makefile
new file mode 100644
index 000000000000..7c056b1b7939
--- /dev/null
+++ b/net/mctp/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_MCTP) += mctp.o
+mctp-objs := af_mctp.o
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
new file mode 100644
index 000000000000..1b63b753057b
--- /dev/null
+++ b/net/mctp/af_mctp.c
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Management Component Transport Protocol (MCTP)
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index eaea837d89d1..4de54fbadc2e 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1330,7 +1330,9 @@ static inline u16 socket_type_to_security_class(int family, int type, int protoc
return SECCLASS_SMC_SOCKET;
case PF_XDP:
return SECCLASS_XDP_SOCKET;
-#if PF_MAX > 45
+ case PF_MCTP:
+ return SECCLASS_MCTP_SOCKET;
+#if PF_MAX > 46
#error New address family defined, please update this function.
#endif
}
diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h
index 62d19bccf3de..084757ff4390 100644
--- a/security/selinux/include/classmap.h
+++ b/security/selinux/include/classmap.h
@@ -246,6 +246,8 @@ struct security_class_mapping secclass_map[] = {
NULL } },
{ "xdp_socket",
{ COMMON_SOCK_PERMS, NULL } },
+ { "mctp_socket",
+ { COMMON_SOCK_PERMS, NULL } },
{ "perf_event",
{ "open", "cpu", "kernel", "tracepoint", "read", "write", NULL } },
{ "lockdown",
@@ -255,6 +257,6 @@ struct security_class_mapping secclass_map[] = {
{ NULL }
};
-#if PF_MAX > 45
+#if PF_MAX > 46
#error New address family defined, please update secclass_map.
#endif
--
2.30.2
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH RFC net-next v2 02/16] mctp: Add base socket/protocol definitions
2021-07-13 6:20 [PATCH RFC net-next v2 00/16] Add Management Component Transport Protocol support Jeremy Kerr
2021-07-13 6:20 ` [PATCH RFC net-next v2 01/16] mctp: Add MCTP base Jeremy Kerr
@ 2021-07-13 6:20 ` Jeremy Kerr
2021-07-13 6:20 ` [PATCH RFC net-next v2 03/16] mctp: Add base packet definitions Jeremy Kerr
` (13 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Jeremy Kerr @ 2021-07-13 6:20 UTC (permalink / raw)
To: netdev; +Cc: Matt Johnston, Andrew Jeffery
Add an empty socket implementation, plus initialisation/destruction
handlers.
Signed-off-by: Jeremy Kerr <jk@codeconstruct.com.au>
---
v2:
- Controller -> component
---
net/mctp/af_mctp.c | 169 +++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 169 insertions(+)
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index 1b63b753057b..b3aeca6486e3 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -5,3 +5,172 @@
* Copyright (c) 2021 Code Construct
* Copyright (c) 2021 Google
*/
+
+#include <linux/net.h>
+#include <linux/mctp.h>
+#include <linux/module.h>
+#include <linux/socket.h>
+
+#include <net/sock.h>
+
+struct mctp_sock {
+ struct sock sk;
+};
+
+static int mctp_release(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+
+ if (sk) {
+ sock->sk = NULL;
+ sk->sk_prot->close(sk, 0);
+ }
+
+ return 0;
+}
+
+static int mctp_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
+{
+ return 0;
+}
+
+static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
+{
+ return 0;
+}
+
+static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+ int flags)
+{
+ return 0;
+}
+
+static int mctp_setsockopt(struct socket *sock, int level, int optname,
+ sockptr_t optval, unsigned int optlen)
+{
+ return -EINVAL;
+}
+
+static int mctp_getsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, int __user *optlen)
+{
+ return -EINVAL;
+}
+
+static const struct proto_ops mctp_dgram_ops = {
+ .family = PF_MCTP,
+ .release = mctp_release,
+ .bind = mctp_bind,
+ .connect = sock_no_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = sock_no_accept,
+ .getname = sock_no_getname,
+ .poll = datagram_poll,
+ .ioctl = sock_no_ioctl,
+ .gettstamp = sock_gettstamp,
+ .listen = sock_no_listen,
+ .shutdown = sock_no_shutdown,
+ .setsockopt = mctp_setsockopt,
+ .getsockopt = mctp_getsockopt,
+ .sendmsg = mctp_sendmsg,
+ .recvmsg = mctp_recvmsg,
+ .mmap = sock_no_mmap,
+ .sendpage = sock_no_sendpage,
+};
+
+static void mctp_sk_close(struct sock *sk, long timeout)
+{
+ sk_common_release(sk);
+}
+
+static struct proto mctp_proto = {
+ .name = "MCTP",
+ .owner = THIS_MODULE,
+ .obj_size = sizeof(struct mctp_sock),
+ .close = mctp_sk_close,
+};
+
+static int mctp_pf_create(struct net *net, struct socket *sock,
+ int protocol, int kern)
+{
+ const struct proto_ops *ops;
+ struct proto *proto;
+ struct sock *sk;
+ int rc;
+
+ if (protocol)
+ return -EPROTONOSUPPORT;
+
+ /* only datagram sockets are supported */
+ if (sock->type != SOCK_DGRAM)
+ return -ESOCKTNOSUPPORT;
+
+ proto = &mctp_proto;
+ ops = &mctp_dgram_ops;
+
+ sock->state = SS_UNCONNECTED;
+ sock->ops = ops;
+
+ sk = sk_alloc(net, PF_MCTP, GFP_KERNEL, proto, kern);
+ if (!sk)
+ return -ENOMEM;
+
+ sock_init_data(sock, sk);
+
+ rc = 0;
+ if (sk->sk_prot->init)
+ rc = sk->sk_prot->init(sk);
+
+ if (rc)
+ goto err_sk_put;
+
+ return 0;
+
+err_sk_put:
+ sock_orphan(sk);
+ sock_put(sk);
+ return rc;
+}
+
+static struct net_proto_family mctp_pf = {
+ .family = PF_MCTP,
+ .create = mctp_pf_create,
+ .owner = THIS_MODULE,
+};
+
+static __init int mctp_init(void)
+{
+ int rc;
+
+ pr_info("mctp: management component transport protocol core\n");
+
+ rc = sock_register(&mctp_pf);
+ if (rc)
+ return rc;
+
+ rc = proto_register(&mctp_proto, 0);
+ if (rc)
+ goto err_unreg_sock;
+
+ return 0;
+
+err_unreg_sock:
+ sock_unregister(PF_MCTP);
+
+ return rc;
+}
+
+static __exit void mctp_exit(void)
+{
+ proto_unregister(&mctp_proto);
+ sock_unregister(PF_MCTP);
+}
+
+module_init(mctp_init);
+module_exit(mctp_exit);
+
+MODULE_DESCRIPTION("MCTP core");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Jeremy Kerr <jk@codeconstruct.com.au>");
+
+MODULE_ALIAS_NETPROTO(PF_MCTP);
--
2.30.2
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH RFC net-next v2 03/16] mctp: Add base packet definitions
2021-07-13 6:20 [PATCH RFC net-next v2 00/16] Add Management Component Transport Protocol support Jeremy Kerr
2021-07-13 6:20 ` [PATCH RFC net-next v2 01/16] mctp: Add MCTP base Jeremy Kerr
2021-07-13 6:20 ` [PATCH RFC net-next v2 02/16] mctp: Add base socket/protocol definitions Jeremy Kerr
@ 2021-07-13 6:20 ` Jeremy Kerr
2021-07-13 6:20 ` [PATCH RFC net-next v2 04/16] mctp: Add sockaddr_mctp to uapi Jeremy Kerr
` (12 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Jeremy Kerr @ 2021-07-13 6:20 UTC (permalink / raw)
To: netdev; +Cc: Matt Johnston, Andrew Jeffery
Simple packet header format as defined by DMTF DSP0236.
Signed-off-by: Jeremy Kerr <jk@codeconstruct.com.au>
---
v2:
- Controller -> component
---
MAINTAINERS | 1 +
include/net/mctp.h | 35 +++++++++++++++++++++++++++++++++++
2 files changed, 36 insertions(+)
create mode 100644 include/net/mctp.h
diff --git a/MAINTAINERS b/MAINTAINERS
index 8116fd2a0789..8eb957c848dc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10874,6 +10874,7 @@ M: Jeremy Kerr <jk@codeconstruct.com.au>
M: Matt Johnston <matt@codeconstruct.com.au>
L: netdev@vger.kernel.org
S: Maintained
+F: include/net/mctp.h
F: net/mctp/
MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7
diff --git a/include/net/mctp.h b/include/net/mctp.h
new file mode 100644
index 000000000000..4c01e083be45
--- /dev/null
+++ b/include/net/mctp.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Management Component Transport Protocol (MCTP)
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#ifndef __NET_MCTP_H
+#define __NET_MCTP_H
+
+#include <linux/bits.h>
+
+/* MCTP packet definitions */
+struct mctp_hdr {
+ u8 ver;
+ u8 dest;
+ u8 src;
+ u8 flags_seq_tag;
+};
+
+#define MCTP_VER_MIN 1
+#define MCTP_VER_MAX 1
+
+/* Definitions for flags_seq_tag field */
+#define MCTP_HDR_FLAG_SOM BIT(7)
+#define MCTP_HDR_FLAG_EOM BIT(6)
+#define MCTP_HDR_FLAG_TO BIT(3)
+#define MCTP_HDR_FLAGS GENMASK(5, 3)
+#define MCTP_HDR_SEQ_SHIFT 4
+#define MCTP_HDR_SEQ_MASK GENMASK(1, 0)
+#define MCTP_HDR_TAG_SHIFT 0
+#define MCTP_HDR_TAG_MASK GENMASK(2, 0)
+
+#endif /* __NET_MCTP_H */
--
2.30.2
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH RFC net-next v2 04/16] mctp: Add sockaddr_mctp to uapi
2021-07-13 6:20 [PATCH RFC net-next v2 00/16] Add Management Component Transport Protocol support Jeremy Kerr
` (2 preceding siblings ...)
2021-07-13 6:20 ` [PATCH RFC net-next v2 03/16] mctp: Add base packet definitions Jeremy Kerr
@ 2021-07-13 6:20 ` Jeremy Kerr
2021-07-13 6:20 ` [PATCH RFC net-next v2 05/16] mctp: Add initial driver infrastructure Jeremy Kerr
` (11 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Jeremy Kerr @ 2021-07-13 6:20 UTC (permalink / raw)
To: netdev; +Cc: Matt Johnston, Andrew Jeffery
This change introduces the user-visible MCTP header, containing the
protocol-specific addressing definitions.
Signed-off-by: Jeremy Kerr <jk@codeconstruct.com.au>
v2:
- include MCTP_ADDR_NULL and MCTP_TAG_* definitions
---
include/uapi/linux/mctp.h | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h
index 2640a589c14c..5c6a2e0c4e59 100644
--- a/include/uapi/linux/mctp.h
+++ b/include/uapi/linux/mctp.h
@@ -9,7 +9,28 @@
#ifndef __UAPI_MCTP_H
#define __UAPI_MCTP_H
+#include <linux/types.h>
+
+typedef __u8 mctp_eid_t;
+
+struct mctp_addr {
+ mctp_eid_t s_addr;
+};
+
struct sockaddr_mctp {
+ unsigned short int smctp_family;
+ int smctp_network;
+ struct mctp_addr smctp_addr;
+ __u8 smctp_type;
+ __u8 smctp_tag;
};
+#define MCTP_NET_ANY 0x0
+
+#define MCTP_ADDR_NULL 0x00
+#define MCTP_ADDR_ANY 0xff
+
+#define MCTP_TAG_MASK GENMASK(2,0)
+#define MCTP_TAG_OWNER BIT(3)
+
#endif /* __UAPI_MCTP_H */
--
2.30.2
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH RFC net-next v2 05/16] mctp: Add initial driver infrastructure
2021-07-13 6:20 [PATCH RFC net-next v2 00/16] Add Management Component Transport Protocol support Jeremy Kerr
` (3 preceding siblings ...)
2021-07-13 6:20 ` [PATCH RFC net-next v2 04/16] mctp: Add sockaddr_mctp to uapi Jeremy Kerr
@ 2021-07-13 6:20 ` Jeremy Kerr
2021-07-13 6:20 ` [PATCH RFC net-next v2 06/16] mctp: Add device handling and netlink interface Jeremy Kerr
` (10 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Jeremy Kerr @ 2021-07-13 6:20 UTC (permalink / raw)
To: netdev; +Cc: Matt Johnston, Andrew Jeffery
Add an empty drivers/net/mctp/, for future interface drivers.
Signed-off-by: Jeremy Kerr <jk@codeconstruct.com.au>
---
MAINTAINERS | 1 +
drivers/net/Kconfig | 2 ++
drivers/net/Makefile | 1 +
drivers/net/mctp/Kconfig | 8 ++++++++
drivers/net/mctp/Makefile | 0
include/uapi/linux/if_arp.h | 1 +
6 files changed, 13 insertions(+)
create mode 100644 drivers/net/mctp/Kconfig
create mode 100644 drivers/net/mctp/Makefile
diff --git a/MAINTAINERS b/MAINTAINERS
index 8eb957c848dc..f13ee797154b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10874,6 +10874,7 @@ M: Jeremy Kerr <jk@codeconstruct.com.au>
M: Matt Johnston <matt@codeconstruct.com.au>
L: netdev@vger.kernel.org
S: Maintained
+F: drivers/net/mctp/
F: include/net/mctp.h
F: net/mctp/
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 74dc8e249faa..fd4f15709d5d 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -482,6 +482,8 @@ config NET_SB1000
source "drivers/net/phy/Kconfig"
+source "drivers/net/mctp/Kconfig"
+
source "drivers/net/mdio/Kconfig"
source "drivers/net/pcs/Kconfig"
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 7ffd2d03efaf..a48a664605a3 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -69,6 +69,7 @@ obj-$(CONFIG_WAN) += wan/
obj-$(CONFIG_WLAN) += wireless/
obj-$(CONFIG_IEEE802154) += ieee802154/
obj-$(CONFIG_WWAN) += wwan/
+obj-$(CONFIG_MCTP) += mctp/
obj-$(CONFIG_VMXNET3) += vmxnet3/
obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o
diff --git a/drivers/net/mctp/Kconfig b/drivers/net/mctp/Kconfig
new file mode 100644
index 000000000000..d8f966cedc89
--- /dev/null
+++ b/drivers/net/mctp/Kconfig
@@ -0,0 +1,8 @@
+
+if MCTP
+
+menu "MCTP Device Drivers"
+
+endmenu
+
+endif
diff --git a/drivers/net/mctp/Makefile b/drivers/net/mctp/Makefile
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/include/uapi/linux/if_arp.h b/include/uapi/linux/if_arp.h
index c3cc5a9e5eaf..4783af9fe520 100644
--- a/include/uapi/linux/if_arp.h
+++ b/include/uapi/linux/if_arp.h
@@ -54,6 +54,7 @@
#define ARPHRD_X25 271 /* CCITT X.25 */
#define ARPHRD_HWX25 272 /* Boards with X.25 in firmware */
#define ARPHRD_CAN 280 /* Controller Area Network */
+#define ARPHRD_MCTP 290
#define ARPHRD_PPP 512
#define ARPHRD_CISCO 513 /* Cisco HDLC */
#define ARPHRD_HDLC ARPHRD_CISCO
--
2.30.2
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH RFC net-next v2 06/16] mctp: Add device handling and netlink interface
2021-07-13 6:20 [PATCH RFC net-next v2 00/16] Add Management Component Transport Protocol support Jeremy Kerr
` (4 preceding siblings ...)
2021-07-13 6:20 ` [PATCH RFC net-next v2 05/16] mctp: Add initial driver infrastructure Jeremy Kerr
@ 2021-07-13 6:20 ` Jeremy Kerr
2021-07-13 6:20 ` [PATCH RFC net-next v2 07/16] mctp: Add initial routing framework Jeremy Kerr
` (9 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Jeremy Kerr @ 2021-07-13 6:20 UTC (permalink / raw)
To: netdev; +Cc: Matt Johnston, Andrew Jeffery
This change adds the infrastructure for managing MCTP netdevices; we add
a pointer to the AF_MCTP-specific data to struct netdevice, and hook up
the rtnetlink operations for adding and removing addresses.
Includes changes from Matt Johnston <matt@codeconstruct.com.au>.
Signed-off-by: Jeremy Kerr <jk@codeconstruct.com.au>
---
v2:
* controller -> component
---
MAINTAINERS | 1 +
include/linux/netdevice.h | 3 +
include/net/mctp.h | 14 ++
include/net/mctpdevice.h | 41 ++++
include/uapi/linux/if_ether.h | 3 +
include/uapi/linux/if_link.h | 10 +
include/uapi/linux/mctp.h | 1 +
net/mctp/Makefile | 2 +-
net/mctp/af_mctp.c | 8 +
net/mctp/device.c | 367 ++++++++++++++++++++++++++++++++++
10 files changed, 449 insertions(+), 1 deletion(-)
create mode 100644 include/net/mctpdevice.h
create mode 100644 net/mctp/device.c
diff --git a/MAINTAINERS b/MAINTAINERS
index f13ee797154b..407b265b026b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10876,6 +10876,7 @@ L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/mctp/
F: include/net/mctp.h
+F: include/net/mctpdevice.h
F: net/mctp/
MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5cbc950b34df..a37e119460fe 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2092,6 +2092,9 @@ struct net_device {
#if IS_ENABLED(CONFIG_MPLS_ROUTING)
struct mpls_dev __rcu *mpls_ptr;
#endif
+#if IS_ENABLED(CONFIG_MCTP)
+ struct mctp_dev __rcu *mctp_ptr;
+#endif
/*
* Cache lines mostly used on receive path (including eth_type_trans())
diff --git a/include/net/mctp.h b/include/net/mctp.h
index 4c01e083be45..61452e03aa85 100644
--- a/include/net/mctp.h
+++ b/include/net/mctp.h
@@ -10,6 +10,7 @@
#define __NET_MCTP_H
#include <linux/bits.h>
+#include <linux/mctp.h>
/* MCTP packet definitions */
struct mctp_hdr {
@@ -32,4 +33,17 @@ struct mctp_hdr {
#define MCTP_HDR_TAG_SHIFT 0
#define MCTP_HDR_TAG_MASK GENMASK(2, 0)
+static inline bool mctp_address_ok(mctp_eid_t eid)
+{
+ return eid >= 8 && eid < 255;
+}
+
+static inline struct mctp_hdr *mctp_hdr(struct sk_buff *skb)
+{
+ return (struct mctp_hdr *)skb_network_header(skb);
+}
+
+void mctp_device_init(void);
+void mctp_device_exit(void);
+
#endif /* __NET_MCTP_H */
diff --git a/include/net/mctpdevice.h b/include/net/mctpdevice.h
new file mode 100644
index 000000000000..1f5a30d1e99b
--- /dev/null
+++ b/include/net/mctpdevice.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Management Component Transport Protocol (MCTP) - device
+ * definitions.
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#ifndef __NET_MCTPDEVICE_H
+#define __NET_MCTPDEVICE_H
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/refcount.h>
+
+struct mctp_ifaddr {
+ u8 eid;
+
+ struct mctp_dev *dev;
+ struct list_head dev_list;
+
+ struct rcu_head rcu;
+};
+
+struct mctp_dev {
+ struct net_device *dev;
+
+ unsigned int net;
+
+ struct list_head addrs; /* -> mctp_ifaddr.dev_list */
+
+ struct rcu_head rcu;
+};
+
+#define MCTP_INITIAL_DEFAULT_NET 1
+
+struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev);
+struct mctp_dev *__mctp_dev_get(const struct net_device *dev);
+
+#endif /* __NET_MCTPDEVICE_H */
diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index a0b637911d3c..5f589c7a8382 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -151,6 +151,9 @@
#define ETH_P_MAP 0x00F9 /* Qualcomm multiplexing and
* aggregation protocol
*/
+#define ETH_P_MCTP 0x00FA /* Management component transport
+ * protocol packets
+ */
/*
* This is an Ethernet frame header.
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index cd5b382a4138..a4543ddf98f4 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -1251,4 +1251,14 @@ struct ifla_rmnet_flags {
__u32 mask;
};
+/* MCTP section */
+
+enum {
+ IFLA_MCTP_UNSPEC,
+ IFLA_MCTP_NET,
+ __IFLA_MCTP_MAX,
+};
+
+#define IFLA_MCTP_MAX (__IFLA_MCTP_MAX - 1)
+
#endif /* _UAPI_LINUX_IF_LINK_H */
diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h
index 5c6a2e0c4e59..7be546e7abe7 100644
--- a/include/uapi/linux/mctp.h
+++ b/include/uapi/linux/mctp.h
@@ -26,6 +26,7 @@ struct sockaddr_mctp {
};
#define MCTP_NET_ANY 0x0
+#define MCTP_NET_DEFAULT 0x0
#define MCTP_ADDR_NULL 0x00
#define MCTP_ADDR_ANY 0xff
diff --git a/net/mctp/Makefile b/net/mctp/Makefile
index 7c056b1b7939..2ea98c27b262 100644
--- a/net/mctp/Makefile
+++ b/net/mctp/Makefile
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_MCTP) += mctp.o
-mctp-objs := af_mctp.o
+mctp-objs := af_mctp.o device.o
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index b3aeca6486e3..401b4fa141a5 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -6,13 +6,18 @@
* Copyright (c) 2021 Google
*/
+#include <linux/if_arp.h>
#include <linux/net.h>
#include <linux/mctp.h>
#include <linux/module.h>
#include <linux/socket.h>
+#include <net/mctp.h>
+#include <net/mctpdevice.h>
#include <net/sock.h>
+/* socket implementation */
+
struct mctp_sock {
struct sock sk;
};
@@ -152,6 +157,8 @@ static __init int mctp_init(void)
if (rc)
goto err_unreg_sock;
+ mctp_device_init();
+
return 0;
err_unreg_sock:
@@ -162,6 +169,7 @@ static __init int mctp_init(void)
static __exit void mctp_exit(void)
{
+ mctp_device_exit();
proto_unregister(&mctp_proto);
sock_unregister(PF_MCTP);
}
diff --git a/net/mctp/device.c b/net/mctp/device.c
new file mode 100644
index 000000000000..6ad3367d1209
--- /dev/null
+++ b/net/mctp/device.c
@@ -0,0 +1,367 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Management Component Transport Protocol (MCTP) - device implementation.
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#include <linux/if_link.h>
+#include <linux/mctp.h>
+#include <linux/netdevice.h>
+#include <linux/rcupdate.h>
+#include <linux/rtnetlink.h>
+
+#include <net/addrconf.h>
+#include <net/netlink.h>
+#include <net/mctp.h>
+#include <net/mctpdevice.h>
+#include <net/sock.h>
+
+/* unlocked: caller must hold rcu_read_lock */
+struct mctp_dev *__mctp_dev_get(const struct net_device *dev)
+{
+ return rcu_dereference(dev->mctp_ptr);
+}
+
+struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev)
+{
+ return rtnl_dereference(dev->mctp_ptr);
+}
+
+static void mctp_dev_destroy(struct mctp_dev *mdev)
+{
+ struct net_device *dev = mdev->dev;
+
+ dev_put(dev);
+ kfree_rcu(mdev, rcu);
+}
+
+static int mctp_fill_addrinfo(struct sk_buff *skb, struct netlink_callback *cb,
+ struct mctp_dev *mdev, struct mctp_ifaddr *ifa)
+{
+ struct ifaddrmsg *hdr;
+ struct nlmsghdr *nlh;
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ RTM_NEWADDR, sizeof(*hdr), NLM_F_MULTI);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ hdr = nlmsg_data(nlh);
+ hdr->ifa_family = AF_MCTP;
+ hdr->ifa_prefixlen = 0;
+ hdr->ifa_flags = 0;
+ hdr->ifa_scope = 0;
+ hdr->ifa_index = mdev->dev->ifindex;
+
+ if (nla_put_u8(skb, IFA_LOCAL, ifa->eid))
+ goto cancel;
+
+ if (nla_put_u8(skb, IFA_ADDRESS, ifa->eid))
+ goto cancel;
+
+ nlmsg_end(skb, nlh);
+
+ return 0;
+
+cancel:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int mctp_dump_dev_addrinfo(struct mctp_dev *mdev, struct sk_buff *skb,
+ struct netlink_callback *cb, int s_a_idx)
+{
+ struct mctp_ifaddr *ifa;
+ int a_idx = 0;
+
+ list_for_each_entry(ifa, &mdev->addrs, dev_list) {
+ if (a_idx < s_a_idx)
+ goto next;
+
+ if (mctp_fill_addrinfo(skb, cb, mdev, ifa) < 0)
+ break;
+
+next:
+ a_idx++;
+ }
+
+ return 0;
+
+ cb->args[2] = a_idx;
+}
+
+static int mctp_dump_addrinfo(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct net_device *dev;
+ struct mctp_dev *mdev;
+ struct hlist_head *head;
+ int s_h, s_idx, s_a_idx;
+ int h, idx;
+
+ /* todo: change to struct overlay */
+ s_h = cb->args[0];
+ s_idx = cb->args[1];
+ s_a_idx = cb->args[2];
+
+ /* todo: strict check -> filter by dev */
+
+ rcu_read_lock();
+ for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+ idx = 0;
+ head = &net->dev_index_head[h];
+ hlist_for_each_entry_rcu(dev, head, index_hlist) {
+ if (idx < s_idx)
+ goto cont;
+ mdev = __mctp_dev_get(dev);
+ if (!mdev)
+ goto cont;
+ if (h > s_h || idx > s_idx)
+ s_a_idx = 0;
+ if (mctp_dump_dev_addrinfo(mdev, skb, cb, s_a_idx) < 0)
+ goto out;
+cont:
+ idx++;
+ }
+ }
+out:
+ rcu_read_unlock();
+ cb->args[1] = idx;
+ cb->args[0] = h;
+
+ return skb->len;
+}
+
+static const struct nla_policy ifa_mctp_policy[IFA_MAX + 1] = {
+ [IFA_ADDRESS] = { .type = NLA_U8 },
+ [IFA_LOCAL] = { .type = NLA_U8 },
+};
+
+static int mctp_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nlattr *tb[IFA_MAX + 1];
+ struct mctp_ifaddr *ifaddr;
+ struct net_device *dev;
+ struct mctp_addr *addr;
+ struct mctp_dev *mdev;
+ struct ifaddrmsg *ifm;
+ int rc;
+
+ rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_mctp_policy,
+ extack);
+ if (rc < 0)
+ return rc;
+
+ ifm = nlmsg_data(nlh);
+
+ if (tb[IFA_LOCAL])
+ addr = nla_data(tb[IFA_LOCAL]);
+ else if (tb[IFA_ADDRESS])
+ addr = nla_data(tb[IFA_ADDRESS]);
+ else
+ return -EINVAL;
+
+ /* find device */
+ dev = __dev_get_by_index(net, ifm->ifa_index);
+ if (!dev)
+ return -ENODEV;
+
+ mdev = mctp_dev_get_rtnl(dev);
+ if (!mdev)
+ return -ENODEV;
+
+ if (!mctp_address_ok(addr->s_addr))
+ return -EINVAL;
+
+ /* Prevent duplicates */
+ list_for_each_entry(ifaddr, &mdev->addrs, dev_list) {
+ if (ifaddr->eid == addr->s_addr)
+ return -EEXIST;
+ }
+
+ ifaddr = kzalloc(sizeof(*ifaddr), GFP_KERNEL);
+ if (!ifaddr)
+ return -ENOMEM;
+
+ ifaddr->eid = addr->s_addr;
+ ifaddr->dev = mdev;
+ list_add(&ifaddr->dev_list, &mdev->addrs);
+
+ return 0;
+}
+
+static struct mctp_dev *mctp_add_dev(struct net_device *dev)
+{
+ struct mctp_dev *mdev;
+
+ ASSERT_RTNL();
+
+ mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
+ if (!mdev)
+ return ERR_PTR(ENOMEM);
+
+ INIT_LIST_HEAD(&mdev->addrs);
+
+ mdev->net = MCTP_INITIAL_DEFAULT_NET;
+
+ /* associate to net_device */
+ rcu_assign_pointer(dev->mctp_ptr, mdev);
+ dev_hold(dev);
+ mdev->dev = dev;
+
+ return mdev;
+}
+
+static int mctp_fill_link_af(struct sk_buff *skb,
+ const struct net_device *dev, u32 ext_filter_mask)
+{
+ struct mctp_dev *mdev;
+
+ mdev = mctp_dev_get_rtnl(dev);
+ if (!mdev)
+ return -ENODATA;
+ if (nla_put_u32(skb, IFLA_MCTP_NET, mdev->net))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static size_t mctp_get_link_af_size(const struct net_device *dev,
+ u32 ext_filter_mask)
+{
+ struct mctp_dev *mdev;
+ unsigned int ret;
+
+ /* caller holds RCU */
+ mdev = __mctp_dev_get(dev);
+ if (!mdev)
+ return 0;
+ ret = nla_total_size(4); /* IFLA_MCTP_NET */
+ return ret;
+}
+
+static const struct nla_policy ifla_af_mctp_policy[IFLA_MCTP_MAX + 1] = {
+ [IFLA_MCTP_NET] = { .type = NLA_U32 },
+};
+
+static int mctp_set_link_af(struct net_device *dev, const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IFLA_MCTP_MAX + 1];
+ struct mctp_dev *mdev;
+ int rc;
+
+ rc = nla_parse_nested(tb, IFLA_MCTP_MAX, attr, ifla_af_mctp_policy,
+ NULL);
+ if (rc)
+ return rc;
+
+ /* caller holds RCU */
+ mdev = __mctp_dev_get(dev);
+ if (!mdev)
+ return 0;
+
+ if (tb[IFLA_MCTP_NET])
+ WRITE_ONCE(mdev->net, nla_get_u32(tb[IFLA_MCTP_NET]));
+
+ return 0;
+}
+
+static void mctp_unregister(struct net_device *dev)
+{
+ struct mctp_ifaddr *ifa, *tmp;
+ struct mctp_dev *mdev;
+
+ mdev = mctp_dev_get_rtnl(dev);
+
+ if (!mdev)
+ return;
+
+ RCU_INIT_POINTER(mdev->dev->mctp_ptr, NULL);
+
+ /* TODO: drop routes, to the point where we can dev_put(); requires
+ * coordination with the route layer; each route should hold a dev
+ * reference. We should only need to synchronize_rcu() once for this.
+ */
+ list_for_each_entry_safe(ifa, tmp, &mdev->addrs, dev_list) {
+ list_del_rcu(&ifa->dev_list);
+ kfree_rcu(ifa, rcu);
+ }
+
+ mctp_dev_destroy(mdev);
+}
+
+static int mctp_register(struct net_device *dev)
+{
+ struct mctp_dev *mdev;
+
+ /* Already registered? */
+ if (rtnl_dereference(dev->mctp_ptr))
+ return 0;
+
+ /* only register specific types; MCTP-specific and loopback for now */
+ if (dev->type != ARPHRD_MCTP && dev->type != ARPHRD_LOOPBACK)
+ return 0;
+
+ mdev = mctp_add_dev(dev);
+ if (IS_ERR(mdev))
+ return PTR_ERR(mdev);
+
+ return 0;
+}
+
+static int mctp_dev_notify(struct notifier_block *this, unsigned long event,
+ void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ int rc;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ rc = mctp_register(dev);
+ if (rc)
+ return notifier_from_errno(rc);
+ break;
+ case NETDEV_UNREGISTER:
+ mctp_unregister(dev);
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct rtnl_af_ops mctp_af_ops = {
+ .family = AF_MCTP,
+ .fill_link_af = mctp_fill_link_af,
+ .get_link_af_size = mctp_get_link_af_size,
+ .set_link_af = mctp_set_link_af,
+};
+
+static struct notifier_block mctp_dev_nb = {
+ .notifier_call = mctp_dev_notify,
+ .priority = ADDRCONF_NOTIFY_PRIORITY,
+};
+
+void __init mctp_device_init(void)
+{
+ register_netdevice_notifier(&mctp_dev_nb);
+
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETADDR,
+ NULL, mctp_dump_addrinfo, 0);
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWADDR,
+ mctp_rtm_newaddr, NULL, 0);
+ rtnl_af_register(&mctp_af_ops);
+}
+
+void __exit mctp_device_exit(void)
+{
+ rtnl_af_unregister(&mctp_af_ops);
+ rtnl_unregister(PF_MCTP, RTM_NEWADDR);
+ rtnl_unregister(PF_MCTP, RTM_GETADDR);
+ rtnl_unregister(PF_MCTP, RTM_GETLINK);
+
+ unregister_netdevice_notifier(&mctp_dev_nb);
+}
--
2.30.2
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH RFC net-next v2 07/16] mctp: Add initial routing framework
2021-07-13 6:20 [PATCH RFC net-next v2 00/16] Add Management Component Transport Protocol support Jeremy Kerr
` (5 preceding siblings ...)
2021-07-13 6:20 ` [PATCH RFC net-next v2 06/16] mctp: Add device handling and netlink interface Jeremy Kerr
@ 2021-07-13 6:20 ` Jeremy Kerr
2021-07-13 6:20 ` [PATCH RFC net-next v2 08/16] mctp: Add netlink route management Jeremy Kerr
` (8 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Jeremy Kerr @ 2021-07-13 6:20 UTC (permalink / raw)
To: netdev; +Cc: Matt Johnston, Andrew Jeffery
Add a simple routing table, and a couple of route output handlers, and
the mctp packet_type & handler.
Includes changes from Matt Johnston <matt@codeconstruct.com.au>.
Signed-off-by: Jeremy Kerr <jk@codeconstruct.com.au>
---
v2:
- Controller -> component
---
MAINTAINERS | 1 +
include/net/mctp.h | 75 +++++++++
include/net/net_namespace.h | 4 +
include/net/netns/mctp.h | 16 ++
net/mctp/Makefile | 2 +-
net/mctp/af_mctp.c | 7 +
net/mctp/device.c | 7 +-
net/mctp/route.c | 320 ++++++++++++++++++++++++++++++++++++
8 files changed, 427 insertions(+), 5 deletions(-)
create mode 100644 include/net/netns/mctp.h
create mode 100644 net/mctp/route.c
diff --git a/MAINTAINERS b/MAINTAINERS
index 407b265b026b..9a591fa2384d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10877,6 +10877,7 @@ S: Maintained
F: drivers/net/mctp/
F: include/net/mctp.h
F: include/net/mctpdevice.h
+F: include/net/netns/mctp.h
F: net/mctp/
MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7
diff --git a/include/net/mctp.h b/include/net/mctp.h
index 61452e03aa85..4094bec5e5db 100644
--- a/include/net/mctp.h
+++ b/include/net/mctp.h
@@ -11,6 +11,7 @@
#include <linux/bits.h>
#include <linux/mctp.h>
+#include <net/net_namespace.h>
/* MCTP packet definitions */
struct mctp_hdr {
@@ -33,6 +34,8 @@ struct mctp_hdr {
#define MCTP_HDR_TAG_SHIFT 0
#define MCTP_HDR_TAG_MASK GENMASK(2, 0)
+#define MCTP_HEADER_MAXLEN 4
+
static inline bool mctp_address_ok(mctp_eid_t eid)
{
return eid >= 8 && eid < 255;
@@ -43,6 +46,78 @@ static inline struct mctp_hdr *mctp_hdr(struct sk_buff *skb)
return (struct mctp_hdr *)skb_network_header(skb);
}
+struct mctp_skb_cb {
+ unsigned int magic;
+ unsigned int net;
+ mctp_eid_t src;
+};
+
+/* skb control-block accessors with a little extra debugging for initial
+ * development.
+ *
+ * TODO: remove checks & mctp_skb_cb->magic; replace callers of __mctp_cb
+ * with mctp_cb().
+ *
+ * __mctp_cb() is only for the initial ingress code; we should see ->magic set
+ * at all times after this.
+ */
+static inline struct mctp_skb_cb *__mctp_cb(struct sk_buff *skb)
+{
+ struct mctp_skb_cb *cb = (void *)skb->cb;
+
+ cb->magic = 0x4d435450;
+ return cb;
+}
+
+static inline struct mctp_skb_cb *mctp_cb(struct sk_buff *skb)
+{
+ struct mctp_skb_cb *cb = (void *)skb->cb;
+
+ WARN_ON(cb->magic != 0x4d435450);
+ return (void *)(skb->cb);
+}
+
+/* Route definition.
+ *
+ * These are held in the pernet->mctp.routes list, with RCU protection for
+ * removed routes. We hold a reference to the netdev; routes need to be
+ * dropped on NETDEV_UNREGISTER events.
+ *
+ * Updates to the route table are performed under rtnl; all reads under RCU,
+ * so routes cannot be referenced over a RCU grace period. Specifically: A
+ * caller cannot block between mctp_route_lookup and passing the route to
+ * mctp_do_route.
+ */
+struct mctp_route {
+ mctp_eid_t min, max;
+
+ struct mctp_dev *dev;
+ unsigned int mtu;
+ int (*output)(struct mctp_route *route,
+ struct sk_buff *skb);
+
+ struct list_head list;
+ refcount_t refs;
+ struct rcu_head rcu;
+};
+
+/* route interfaces */
+struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
+ mctp_eid_t daddr);
+
+int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb);
+
+int mctp_local_output(struct sock *sk, struct mctp_route *rt,
+ struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag);
+
+/* routing <--> device interface */
+int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr);
+int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr);
+void mctp_route_remove_dev(struct mctp_dev *mdev);
+
+int mctp_routes_init(void);
+void mctp_routes_exit(void);
+
void mctp_device_init(void);
void mctp_device_exit(void);
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index fa5887143f0d..71fce3086fc3 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -33,6 +33,7 @@
#include <net/netns/can.h>
#include <net/netns/xdp.h>
#include <net/netns/bpf.h>
+#include <net/netns/mctp.h>
#include <linux/ns_common.h>
#include <linux/idr.h>
#include <linux/skbuff.h>
@@ -166,6 +167,9 @@ struct net {
#ifdef CONFIG_XDP_SOCKETS
struct netns_xdp xdp;
#endif
+#if IS_ENABLED(CONFIG_MCTP)
+ struct netns_mctp mctp;
+#endif
#if IS_ENABLED(CONFIG_CRYPTO_USER)
struct sock *crypto_nlsk;
#endif
diff --git a/include/net/netns/mctp.h b/include/net/netns/mctp.h
new file mode 100644
index 000000000000..508459b08a59
--- /dev/null
+++ b/include/net/netns/mctp.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * MCTP per-net structures
+ */
+
+#ifndef __NETNS_MCTP_H__
+#define __NETNS_MCTP_H__
+
+#include <linux/types.h>
+
+struct netns_mctp {
+ /* Only updated under RTNL, entries freed via RCU */
+ struct list_head routes;
+};
+
+#endif /* __NETNS_MCTP_H__ */
diff --git a/net/mctp/Makefile b/net/mctp/Makefile
index 2ea98c27b262..b1a330e9d82a 100644
--- a/net/mctp/Makefile
+++ b/net/mctp/Makefile
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_MCTP) += mctp.o
-mctp-objs := af_mctp.o device.o
+mctp-objs := af_mctp.o device.o route.o
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index 401b4fa141a5..8085f5912101 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -157,10 +157,16 @@ static __init int mctp_init(void)
if (rc)
goto err_unreg_sock;
+ rc = mctp_routes_init();
+ if (rc)
+ goto err_unreg_proto;
+
mctp_device_init();
return 0;
+err_unreg_proto:
+ proto_unregister(&mctp_proto);
err_unreg_sock:
sock_unregister(PF_MCTP);
@@ -170,6 +176,7 @@ static __init int mctp_init(void)
static __exit void mctp_exit(void)
{
mctp_device_exit();
+ mctp_routes_exit();
proto_unregister(&mctp_proto);
sock_unregister(PF_MCTP);
}
diff --git a/net/mctp/device.c b/net/mctp/device.c
index 6ad3367d1209..579f747c89c3 100644
--- a/net/mctp/device.c
+++ b/net/mctp/device.c
@@ -191,6 +191,8 @@ static int mctp_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
ifaddr->dev = mdev;
list_add(&ifaddr->dev_list, &mdev->addrs);
+ mctp_route_add_local(mdev, addr->s_addr);
+
return 0;
}
@@ -282,10 +284,7 @@ static void mctp_unregister(struct net_device *dev)
RCU_INIT_POINTER(mdev->dev->mctp_ptr, NULL);
- /* TODO: drop routes, to the point where we can dev_put(); requires
- * coordination with the route layer; each route should hold a dev
- * reference. We should only need to synchronize_rcu() once for this.
- */
+ mctp_route_remove_dev(mdev);
list_for_each_entry_safe(ifa, tmp, &mdev->addrs, dev_list) {
list_del_rcu(&ifa->dev_list);
kfree_rcu(ifa, rcu);
diff --git a/net/mctp/route.c b/net/mctp/route.c
new file mode 100644
index 000000000000..f0bbc582d6f7
--- /dev/null
+++ b/net/mctp/route.c
@@ -0,0 +1,320 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Management Component Transport Protocol (MCTP) - routing
+ * implementation.
+ *
+ * This is currently based on a simple routing table, with no dst cache. The
+ * number of routes should stay fairly small, so the lookup cost is small.
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#include <linux/idr.h>
+#include <linux/mctp.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+
+#include <uapi/linux/if_arp.h>
+
+#include <net/mctp.h>
+#include <net/mctpdevice.h>
+
+/* route output callbacks */
+static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb)
+{
+ kfree_skb(skb);
+ return 0;
+}
+
+static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
+{
+ /* -> to local stack */
+ /* TODO: socket lookup, reassemble */
+ kfree_skb(skb);
+ return 0;
+}
+
+static int __always_unused mctp_route_output(struct mctp_route *route,
+ struct sk_buff *skb)
+{
+ unsigned int mtu;
+ int rc;
+
+ skb->protocol = htons(ETH_P_MCTP);
+
+ mtu = READ_ONCE(skb->dev->mtu);
+ if (skb->len > mtu) {
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
+ /* TODO: daddr (from rt->neigh), saddr (from device?) */
+ rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
+ NULL, NULL, skb->len);
+ if (rc) {
+ kfree_skb(skb);
+ return -EHOSTUNREACH;
+ }
+
+ rc = dev_queue_xmit(skb);
+ if (rc)
+ rc = net_xmit_errno(rc);
+
+ return rc;
+}
+
+/* route alloc/release */
+static void mctp_route_release(struct mctp_route *rt)
+{
+ if (refcount_dec_and_test(&rt->refs)) {
+ dev_put(rt->dev->dev);
+ kfree_rcu(rt, rcu);
+ }
+}
+
+/* returns a route with the refcount at 1 */
+static struct mctp_route *mctp_route_alloc(void)
+{
+ struct mctp_route *rt;
+
+ rt = kzalloc(sizeof(*rt), GFP_KERNEL);
+ if (!rt)
+ return NULL;
+
+ INIT_LIST_HEAD(&rt->list);
+ refcount_set(&rt->refs, 1);
+ rt->output = mctp_route_discard;
+
+ return rt;
+}
+
+/* routing lookups */
+static inline bool mctp_rt_match_eid(struct mctp_route *rt,
+ unsigned int net, mctp_eid_t eid)
+{
+ return READ_ONCE(rt->dev->net) == net &&
+ rt->min <= eid && rt->max >= eid;
+}
+
+/* compares match, used for duplicate prevention */
+static bool mctp_rt_compare_exact(struct mctp_route *rt1,
+ struct mctp_route *rt2)
+{
+ ASSERT_RTNL();
+ return rt1->dev->net == rt2->dev->net &&
+ rt1->min == rt2->min &&
+ rt1->max == rt2->max;
+}
+
+struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
+ mctp_eid_t daddr)
+{
+ struct mctp_route *tmp, *rt = NULL;
+
+ list_for_each_entry_rcu(tmp, &net->mctp.routes, list) {
+ /* TODO: add metrics */
+ if (mctp_rt_match_eid(tmp, dnet, daddr)) {
+ if (refcount_inc_not_zero(&tmp->refs)) {
+ rt = tmp;
+ break;
+ }
+ }
+ }
+
+ return rt;
+}
+
+/* sends a skb to rt and releases the route. */
+int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb)
+{
+ int rc;
+
+ rc = rt->output(rt, skb);
+ mctp_route_release(rt);
+ return rc;
+}
+
+int mctp_local_output(struct sock *sk, struct mctp_route *rt,
+ struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag)
+{
+ struct mctp_skb_cb *cb = mctp_cb(skb);
+ struct mctp_hdr *hdr;
+ mctp_eid_t saddr;
+
+ if (WARN_ON(!rt->dev))
+ return -EINVAL;
+
+ if (list_empty(&rt->dev->addrs))
+ return -EHOSTUNREACH;
+
+ /* use the outbound interface's first address as our source */
+ saddr = list_first_entry(&rt->dev->addrs,
+ struct mctp_ifaddr, dev_list)->eid;
+
+ /* TODO: we have the route MTU here; packetise */
+
+ skb_reset_transport_header(skb);
+ skb_push(skb, sizeof(struct mctp_hdr));
+ skb_reset_network_header(skb);
+ hdr = mctp_hdr(skb);
+ hdr->ver = 1;
+ hdr->dest = daddr;
+ hdr->src = saddr;
+ hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM; /* TODO */
+
+ skb->protocol = htons(ETH_P_MCTP);
+ skb->priority = 0;
+
+ /* cb->net will have been set on initial ingress */
+ cb->src = saddr;
+
+ return mctp_do_route(rt, skb);
+}
+
+/* route management */
+int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr)
+{
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_route *rt, *ert;
+
+ rt = mctp_route_alloc();
+ if (!rt)
+ return -ENOMEM;
+
+ rt->min = addr;
+ rt->max = addr;
+ rt->dev = mdev;
+ dev_hold(rt->dev->dev);
+ rt->output = mctp_route_input;
+
+ ASSERT_RTNL();
+ /* Prevent duplicate identical routes. */
+ list_for_each_entry(ert, &net->mctp.routes, list) {
+ if (mctp_rt_compare_exact(rt, ert)) {
+ mctp_route_release(rt);
+ return -EEXIST;
+ }
+ }
+
+ list_add_rcu(&rt->list, &net->mctp.routes);
+
+ return 0;
+}
+
+int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr)
+{
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_route *rt, *tmp;
+
+ ASSERT_RTNL();
+
+ list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
+ if (rt->dev == mdev && rt->min == addr && rt->max == addr) {
+ list_del_rcu(&rt->list);
+ /* TODO: immediate RTM_DELROUTE */
+ mctp_route_release(rt);
+ }
+ }
+
+ return 0;
+}
+
+/* removes all entries for a given device */
+void mctp_route_remove_dev(struct mctp_dev *mdev)
+{
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_route *rt, *tmp;
+ ASSERT_RTNL();
+ list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
+ if (rt->dev == mdev) {
+ list_del_rcu(&rt->list);
+ /* TODO: immediate RTM_DELROUTE */
+ mctp_route_release(rt);
+ }
+ }
+}
+
+/* Incoming packet-handling */
+
+static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt,
+ struct net_device *orig_dev)
+{
+ struct net *net = dev_net(dev);
+ struct mctp_skb_cb *cb;
+ struct mctp_route *rt;
+ struct mctp_hdr *mh;
+
+ /* basic non-data sanity checks */
+ if (dev->type != ARPHRD_MCTP)
+ goto err_drop;
+
+ if (!pskb_may_pull(skb, sizeof(struct mctp_hdr)))
+ goto err_drop;
+
+ skb_reset_transport_header(skb);
+ skb_reset_network_header(skb);
+
+ /* We have enough for a header; decode and route */
+ mh = mctp_hdr(skb);
+ if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX)
+ goto err_drop;
+
+ cb = __mctp_cb(skb);
+ rcu_read_lock();
+ cb->net = READ_ONCE(__mctp_dev_get(dev)->net);
+ rcu_read_unlock();
+
+ rt = mctp_route_lookup(net, cb->net, mh->dest);
+ if (!rt)
+ goto err_drop;
+
+ mctp_do_route(rt, skb);
+
+ return NET_RX_SUCCESS;
+
+err_drop:
+ kfree_skb(skb);
+ return NET_RX_DROP;
+}
+
+static struct packet_type mctp_packet_type = {
+ .type = cpu_to_be16(ETH_P_MCTP),
+ .func = mctp_pkttype_receive,
+};
+
+/* net namespace implementation */
+static int __net_init mctp_routes_net_init(struct net *net)
+{
+ struct netns_mctp *ns = &net->mctp;
+
+ INIT_LIST_HEAD(&ns->routes);
+ return 0;
+}
+
+static void __net_exit mctp_routes_net_exit(struct net *net)
+{
+ struct mctp_route *rt;
+
+ list_for_each_entry_rcu(rt, &net->mctp.routes, list)
+ mctp_route_release(rt);
+}
+
+static struct pernet_operations mctp_net_ops = {
+ .init = mctp_routes_net_init,
+ .exit = mctp_routes_net_exit,
+};
+
+int __init mctp_routes_init(void)
+{
+ dev_add_pack(&mctp_packet_type);
+ return register_pernet_subsys(&mctp_net_ops);
+}
+
+void __exit mctp_routes_exit(void)
+{
+ unregister_pernet_subsys(&mctp_net_ops);
+ dev_remove_pack(&mctp_packet_type);
+}
--
2.30.2
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH RFC net-next v2 08/16] mctp: Add netlink route management
2021-07-13 6:20 [PATCH RFC net-next v2 00/16] Add Management Component Transport Protocol support Jeremy Kerr
` (6 preceding siblings ...)
2021-07-13 6:20 ` [PATCH RFC net-next v2 07/16] mctp: Add initial routing framework Jeremy Kerr
@ 2021-07-13 6:20 ` Jeremy Kerr
2021-07-13 6:20 ` [PATCH RFC net-next v2 09/16] mctp: Add neighbour implementation Jeremy Kerr
` (7 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Jeremy Kerr @ 2021-07-13 6:20 UTC (permalink / raw)
To: netdev; +Cc: Matt Johnston, Andrew Jeffery
From: Matt Johnston <matt@codeconstruct.com.au>
This change adds RTM_GETROUTE, RTM_NEWROUTE & RTM_DELROUTE handlers,
allowing management of the MCTP route table.
Includes changes from Jeremy Kerr <jk@codeconstruct.com.au>.
Signed-off-by: Matt Johnston <matt@codeconstruct.com.au>
---
include/net/mctp.h | 4 +
net/mctp/route.c | 252 +++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 247 insertions(+), 9 deletions(-)
diff --git a/include/net/mctp.h b/include/net/mctp.h
index 4094bec5e5db..97cf21869db3 100644
--- a/include/net/mctp.h
+++ b/include/net/mctp.h
@@ -111,6 +111,10 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag);
/* routing <--> device interface */
+int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start,
+ unsigned int daddr_extent, unsigned int mtu, bool is_local);
+int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start,
+ unsigned int daddr_extent);
int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr);
int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr);
void mctp_route_remove_dev(struct mctp_dev *mdev);
diff --git a/net/mctp/route.c b/net/mctp/route.c
index f0bbc582d6f7..33908e218244 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -20,6 +20,8 @@
#include <net/mctp.h>
#include <net/mctpdevice.h>
+#include <net/netlink.h>
+#include <net/sock.h>
/* route output callbacks */
static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb)
@@ -36,8 +38,7 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
return 0;
}
-static int __always_unused mctp_route_output(struct mctp_route *route,
- struct sk_buff *skb)
+static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
{
unsigned int mtu;
int rc;
@@ -174,20 +175,28 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
}
/* route management */
-int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr)
+int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start,
+ unsigned int daddr_extent, unsigned int mtu, bool is_local)
{
struct net *net = dev_net(mdev->dev);
struct mctp_route *rt, *ert;
+ if (!mctp_address_ok(daddr_start))
+ return -EINVAL;
+
+ if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
+ return -EINVAL;
+
rt = mctp_route_alloc();
if (!rt)
return -ENOMEM;
- rt->min = addr;
- rt->max = addr;
+ rt->min = daddr_start;
+ rt->max = daddr_start + daddr_extent;
+ rt->mtu = mtu;
rt->dev = mdev;
dev_hold(rt->dev->dev);
- rt->output = mctp_route_input;
+ rt->output = is_local ? mctp_route_input : mctp_route_output;
ASSERT_RTNL();
/* Prevent duplicate identical routes. */
@@ -203,22 +212,43 @@ int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr)
return 0;
}
-int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr)
+int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start,
+ unsigned int daddr_extent)
{
struct net *net = dev_net(mdev->dev);
struct mctp_route *rt, *tmp;
+ mctp_eid_t daddr_end;
+ bool dropped;
+
+ if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
+ return -EINVAL;
+
+ daddr_end = daddr_start + daddr_extent;
+ dropped = false;
ASSERT_RTNL();
list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
- if (rt->dev == mdev && rt->min == addr && rt->max == addr) {
+ if (rt->dev == mdev &&
+ rt->min == daddr_start && rt->max == daddr_end) {
list_del_rcu(&rt->list);
/* TODO: immediate RTM_DELROUTE */
mctp_route_release(rt);
+ dropped = true;
}
}
- return 0;
+ return dropped ? 0 : -ENOENT;
+}
+
+int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr)
+{
+ return mctp_route_add(mdev, addr, 0, 0, true);
+}
+
+int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr)
+{
+ return mctp_route_remove(mdev, addr, 0);
}
/* removes all entries for a given device */
@@ -285,6 +315,199 @@ static struct packet_type mctp_packet_type = {
.func = mctp_pkttype_receive,
};
+/* netlink interface */
+
+static const struct nla_policy rta_mctp_policy[RTA_MAX + 1] = {
+ [RTA_DST] = { .type = NLA_U8 },
+ [RTA_METRICS] = { .type = NLA_NESTED },
+ [RTA_OIF] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy rta_metrics_mctp_policy[RTAX_MAX + 1] = {
+ [RTAX_MTU] = { .type = NLA_U32 },
+};
+
+/* Common part for RTM_NEWROUTE and RTM_DELROUTE parsing.
+ * tb must hold RTA_MAX+1 elements.
+ */
+static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack,
+ struct nlattr **tb, struct rtmsg **rtm,
+ struct mctp_dev **mdev, mctp_eid_t *daddr_start)
+{
+ struct net *net = sock_net(skb->sk);
+ struct net_device *dev;
+ unsigned int ifindex;
+ int rc;
+
+ rc = nlmsg_parse(nlh, sizeof(struct rtmsg), tb, RTA_MAX,
+ rta_mctp_policy, extack);
+ if (rc < 0) {
+ NL_SET_ERR_MSG(extack, "incorrect format");
+ return rc;
+ }
+
+ if (!tb[RTA_DST]) {
+ NL_SET_ERR_MSG(extack, "dst EID missing");
+ return -EINVAL;
+ }
+ *daddr_start = nla_get_u8(tb[RTA_DST]);
+
+ if (!tb[RTA_OIF]) {
+ NL_SET_ERR_MSG(extack, "ifindex missing");
+ return -EINVAL;
+ }
+ ifindex = nla_get_u32(tb[RTA_OIF]);
+
+ *rtm = nlmsg_data(nlh);
+ if ((*rtm)->rtm_family != AF_MCTP) {
+ NL_SET_ERR_MSG(extack, "route family must be AF_MCTP");
+ return -EINVAL;
+ }
+
+ dev = __dev_get_by_index(net, ifindex);
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "bad ifindex");
+ return -ENODEV;
+ }
+ *mdev = mctp_dev_get_rtnl(dev);
+ if (!*mdev)
+ return -ENODEV;
+
+ if (dev->flags & IFF_LOOPBACK) {
+ NL_SET_ERR_MSG(extack, "no routes to loopback");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RTA_MAX + 1];
+ mctp_eid_t daddr_start;
+ struct mctp_dev *mdev;
+ struct rtmsg *rtm;
+ unsigned int mtu;
+ int rc;
+
+ rc = mctp_route_nlparse(skb, nlh, extack, tb,
+ &rtm, &mdev, &daddr_start);
+ if (rc < 0)
+ return rc;
+
+ /* TODO: parse mtu from nlparse */
+ mtu = 0;
+
+ rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu, false);
+ return rc;
+}
+
+static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RTA_MAX + 1];
+ mctp_eid_t daddr_start;
+ struct mctp_dev *mdev;
+ struct rtmsg *rtm;
+ int rc;
+
+ rc = mctp_route_nlparse(skb, nlh, extack, tb,
+ &rtm, &mdev, &daddr_start);
+ if (rc < 0)
+ return rc;
+
+ rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len);
+ return rc;
+}
+
+static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt,
+ u32 portid, u32 seq, int event, unsigned int flags)
+{
+ struct nlmsghdr *nlh;
+ struct rtmsg *hdr;
+ void *metrics;
+
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ hdr = nlmsg_data(nlh);
+ hdr->rtm_family = AF_MCTP;
+
+ /* we use the _len fields as a number of EIDs, rather than
+ * a number of bits in the address
+ */
+ hdr->rtm_dst_len = rt->max - rt->min;
+ hdr->rtm_src_len = 0;
+ hdr->rtm_tos = 0;
+ hdr->rtm_table = RT_TABLE_DEFAULT;
+ hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */
+ hdr->rtm_scope = RT_SCOPE_LINK; /* TODO: scope in mctp_route? */
+ hdr->rtm_type = RTN_ANYCAST; /* TODO: type from route */
+
+ if (nla_put_u8(skb, RTA_DST, rt->min))
+ goto cancel;
+
+ metrics = nla_nest_start_noflag(skb, RTA_METRICS);
+ if (!metrics)
+ goto cancel;
+
+ if (rt->mtu) {
+ if (nla_put_u32(skb, RTAX_MTU, rt->mtu))
+ goto cancel;
+ }
+
+ nla_nest_end(skb, metrics);
+
+ if (rt->dev) {
+ if (nla_put_u32(skb, RTA_OIF, rt->dev->dev->ifindex))
+ goto cancel;
+ }
+
+ /* TODO: conditional neighbour physaddr? */
+
+ nlmsg_end(skb, nlh);
+
+ return 0;
+
+cancel:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int mctp_dump_rtinfo(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct mctp_route *rt;
+ int s_idx, idx;
+
+ /* TODO: allow filtering on route data, possibly under
+ * cb->strict_check
+ */
+
+ /* TODO: change to struct overlay */
+ s_idx = cb->args[0];
+ idx = 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(rt, &net->mctp.routes, list) {
+ if (idx++ < s_idx)
+ continue;
+ if (mctp_fill_rtinfo(skb, rt,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWROUTE, NLM_F_MULTI) < 0)
+ break;
+ }
+
+ rcu_read_unlock();
+ cb->args[0] = idx;
+
+ return skb->len;
+}
+
/* net namespace implementation */
static int __net_init mctp_routes_net_init(struct net *net)
{
@@ -310,11 +533,22 @@ static struct pernet_operations mctp_net_ops = {
int __init mctp_routes_init(void)
{
dev_add_pack(&mctp_packet_type);
+
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETROUTE,
+ NULL, mctp_dump_rtinfo, 0);
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWROUTE,
+ mctp_newroute, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELROUTE,
+ mctp_delroute, NULL, 0);
+
return register_pernet_subsys(&mctp_net_ops);
}
void __exit mctp_routes_exit(void)
{
unregister_pernet_subsys(&mctp_net_ops);
+ rtnl_unregister(PF_MCTP, RTM_DELROUTE);
+ rtnl_unregister(PF_MCTP, RTM_NEWROUTE);
+ rtnl_unregister(PF_MCTP, RTM_GETROUTE);
dev_remove_pack(&mctp_packet_type);
}
--
2.30.2
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH RFC net-next v2 09/16] mctp: Add neighbour implementation
2021-07-13 6:20 [PATCH RFC net-next v2 00/16] Add Management Component Transport Protocol support Jeremy Kerr
` (7 preceding siblings ...)
2021-07-13 6:20 ` [PATCH RFC net-next v2 08/16] mctp: Add netlink route management Jeremy Kerr
@ 2021-07-13 6:20 ` Jeremy Kerr
2021-07-13 6:20 ` [PATCH RFC net-next v2 10/16] mctp: Add neighbour netlink interface Jeremy Kerr
` (6 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Jeremy Kerr @ 2021-07-13 6:20 UTC (permalink / raw)
To: netdev; +Cc: Matt Johnston, Andrew Jeffery
From: Matt Johnston <matt@codeconstruct.com.au>
Add an initial neighbour table implementation, to be used in the route
output path.
Signed-off-by: Matt Johnston <matt@codeconstruct.com.au>
---
v2:
- controller -> component
---
include/net/mctp.h | 25 +++++++
include/net/mctpdevice.h | 1 +
include/net/netns/mctp.h | 4 ++
net/mctp/Makefile | 2 +-
net/mctp/af_mctp.c | 5 ++
net/mctp/device.c | 1 +
net/mctp/neigh.c | 141 +++++++++++++++++++++++++++++++++++++++
7 files changed, 178 insertions(+), 1 deletion(-)
create mode 100644 net/mctp/neigh.c
diff --git a/include/net/mctp.h b/include/net/mctp.h
index 97cf21869db3..8a7353a0aa2e 100644
--- a/include/net/mctp.h
+++ b/include/net/mctp.h
@@ -119,6 +119,31 @@ int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr);
int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr);
void mctp_route_remove_dev(struct mctp_dev *mdev);
+/* neighbour definitions */
+enum mctp_neigh_source {
+ MCTP_NEIGH_STATIC,
+ MCTP_NEIGH_DISCOVER,
+};
+
+struct mctp_neigh {
+ struct mctp_dev *dev;
+ mctp_eid_t eid;
+ enum mctp_neigh_source source;
+
+ unsigned char ha[MAX_ADDR_LEN];
+
+ struct list_head list;
+ struct rcu_head rcu;
+};
+
+int mctp_neigh_init(void);
+void mctp_neigh_exit(void);
+
+// ret_hwaddr may be NULL, otherwise must have space for MAX_ADDR_LEN
+int mctp_neigh_lookup(struct mctp_dev *dev, mctp_eid_t eid,
+ void *ret_hwaddr);
+void mctp_neigh_remove_dev(struct mctp_dev *mdev);
+
int mctp_routes_init(void);
void mctp_routes_exit(void);
diff --git a/include/net/mctpdevice.h b/include/net/mctpdevice.h
index 1f5a30d1e99b..e8282772f475 100644
--- a/include/net/mctpdevice.h
+++ b/include/net/mctpdevice.h
@@ -37,5 +37,6 @@ struct mctp_dev {
struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev);
struct mctp_dev *__mctp_dev_get(const struct net_device *dev);
+struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev);
#endif /* __NET_MCTPDEVICE_H */
diff --git a/include/net/netns/mctp.h b/include/net/netns/mctp.h
index 508459b08a59..2f5ebeeb320e 100644
--- a/include/net/netns/mctp.h
+++ b/include/net/netns/mctp.h
@@ -11,6 +11,10 @@
struct netns_mctp {
/* Only updated under RTNL, entries freed via RCU */
struct list_head routes;
+
+ /* neighbour table */
+ struct mutex neigh_lock;
+ struct list_head neighbours;
};
#endif /* __NETNS_MCTP_H__ */
diff --git a/net/mctp/Makefile b/net/mctp/Makefile
index b1a330e9d82a..0171333384d7 100644
--- a/net/mctp/Makefile
+++ b/net/mctp/Makefile
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_MCTP) += mctp.o
-mctp-objs := af_mctp.o device.o route.o
+mctp-objs := af_mctp.o device.o route.o neigh.o
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index 8085f5912101..58701e6b282c 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -161,6 +161,10 @@ static __init int mctp_init(void)
if (rc)
goto err_unreg_proto;
+ rc = mctp_neigh_init();
+ if (rc)
+ goto err_unreg_proto;
+
mctp_device_init();
return 0;
@@ -176,6 +180,7 @@ static __init int mctp_init(void)
static __exit void mctp_exit(void)
{
mctp_device_exit();
+ mctp_neigh_exit();
mctp_routes_exit();
proto_unregister(&mctp_proto);
sock_unregister(PF_MCTP);
diff --git a/net/mctp/device.c b/net/mctp/device.c
index 579f747c89c3..280679798a2e 100644
--- a/net/mctp/device.c
+++ b/net/mctp/device.c
@@ -285,6 +285,7 @@ static void mctp_unregister(struct net_device *dev)
RCU_INIT_POINTER(mdev->dev->mctp_ptr, NULL);
mctp_route_remove_dev(mdev);
+ mctp_neigh_remove_dev(mdev);
list_for_each_entry_safe(ifa, tmp, &mdev->addrs, dev_list) {
list_del_rcu(&ifa->dev_list);
kfree_rcu(ifa, rcu);
diff --git a/net/mctp/neigh.c b/net/mctp/neigh.c
new file mode 100644
index 000000000000..8603f0c45a8f
--- /dev/null
+++ b/net/mctp/neigh.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Management Component Transport Protocol (MCTP) - routing
+ * implementation.
+ *
+ * This is currently based on a simple routing table, with no dst cache. The
+ * number of routes should stay fairly small, so the lookup cost is small.
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#include <linux/idr.h>
+#include <linux/mctp.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+
+#include <net/mctp.h>
+#include <net/mctpdevice.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+
+static int __always_unused mctp_neigh_add(struct mctp_dev *mdev, mctp_eid_t eid,
+ enum mctp_neigh_source source,
+ size_t lladdr_len, const void *lladdr)
+{
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_neigh *neigh;
+ int rc;
+
+ mutex_lock(&net->mctp.neigh_lock);
+ if (mctp_neigh_lookup(mdev, eid, NULL) == 0) {
+ rc = -EEXIST;
+ goto out;
+ }
+
+ if (lladdr_len > sizeof(neigh->ha)) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ neigh = kzalloc(sizeof(*neigh), GFP_KERNEL);
+ if (!neigh) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ INIT_LIST_HEAD(&neigh->list);
+ neigh->dev = mdev;
+ dev_hold(neigh->dev->dev);
+ neigh->eid = eid;
+ neigh->source = source;
+ memcpy(neigh->ha, lladdr, lladdr_len);
+
+ list_add_rcu(&neigh->list, &net->mctp.neighbours);
+ rc = 0;
+out:
+ mutex_unlock(&net->mctp.neigh_lock);
+ return rc;
+}
+
+static void __mctp_neigh_free(struct rcu_head *rcu)
+{
+ struct mctp_neigh *neigh = container_of(rcu, struct mctp_neigh, rcu);
+
+ dev_put(neigh->dev->dev);
+ kfree(neigh);
+}
+
+/* Removes all neighbour entries referring to a device */
+void mctp_neigh_remove_dev(struct mctp_dev *mdev)
+{
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_neigh *neigh, *tmp;
+
+ mutex_lock(&net->mctp.neigh_lock);
+ list_for_each_entry_safe(neigh, tmp, &net->mctp.neighbours, list) {
+ if (neigh->dev == mdev) {
+ list_del_rcu(&neigh->list);
+ /* TODO: immediate RTM_DELNEIGH */
+ call_rcu(&neigh->rcu, __mctp_neigh_free);
+ }
+ }
+
+ mutex_unlock(&net->mctp.neigh_lock);
+}
+
+int mctp_neigh_lookup(struct mctp_dev *mdev, mctp_eid_t eid, void *ret_hwaddr)
+{
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_neigh *neigh;
+ int rc = -EHOSTUNREACH; // TODO: or ENOENT?
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(neigh, &net->mctp.neighbours, list) {
+ if (mdev == neigh->dev && eid == neigh->eid) {
+ if (ret_hwaddr)
+ memcpy(ret_hwaddr, neigh->ha,
+ sizeof(neigh->ha));
+ rc = 0;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ return rc;
+}
+
+/* namespace registration */
+static int __net_init mctp_neigh_net_init(struct net *net)
+{
+ struct netns_mctp *ns = &net->mctp;
+
+ INIT_LIST_HEAD(&ns->neighbours);
+ return 0;
+}
+
+static void __net_exit mctp_neigh_net_exit(struct net *net)
+{
+ struct netns_mctp *ns = &net->mctp;
+ struct mctp_neigh *neigh;
+
+ list_for_each_entry(neigh, &ns->neighbours, list)
+ call_rcu(&neigh->rcu, __mctp_neigh_free);
+}
+
+/* net namespace implementation */
+
+static struct pernet_operations mctp_net_ops = {
+ .init = mctp_neigh_net_init,
+ .exit = mctp_neigh_net_exit,
+};
+
+int __init mctp_neigh_init(void)
+{
+ return register_pernet_subsys(&mctp_net_ops);
+}
+
+void __exit mctp_neigh_exit(void)
+{
+ unregister_pernet_subsys(&mctp_net_ops);
+}
--
2.30.2
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH RFC net-next v2 10/16] mctp: Add neighbour netlink interface
2021-07-13 6:20 [PATCH RFC net-next v2 00/16] Add Management Component Transport Protocol support Jeremy Kerr
` (8 preceding siblings ...)
2021-07-13 6:20 ` [PATCH RFC net-next v2 09/16] mctp: Add neighbour implementation Jeremy Kerr
@ 2021-07-13 6:20 ` Jeremy Kerr
2021-07-13 6:20 ` [PATCH RFC net-next v2 11/16] mctp: Populate socket implementation Jeremy Kerr
` (5 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Jeremy Kerr @ 2021-07-13 6:20 UTC (permalink / raw)
To: netdev; +Cc: Matt Johnston, Andrew Jeffery
From: Matt Johnston <matt@codeconstruct.com.au>
This change adds the netlink interfaces for manipulating the MCTP
neighbour table.
Signed-off-by: Matt Johnston <matt@codeconstruct.com.au>
---
net/mctp/neigh.c | 207 ++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 204 insertions(+), 3 deletions(-)
diff --git a/net/mctp/neigh.c b/net/mctp/neigh.c
index 8603f0c45a8f..90ed2f02d1fb 100644
--- a/net/mctp/neigh.c
+++ b/net/mctp/neigh.c
@@ -21,9 +21,9 @@
#include <net/netlink.h>
#include <net/sock.h>
-static int __always_unused mctp_neigh_add(struct mctp_dev *mdev, mctp_eid_t eid,
- enum mctp_neigh_source source,
- size_t lladdr_len, const void *lladdr)
+static int mctp_neigh_add(struct mctp_dev *mdev, mctp_eid_t eid,
+ enum mctp_neigh_source source,
+ size_t lladdr_len, const void *lladdr)
{
struct net *net = dev_net(mdev->dev);
struct mctp_neigh *neigh;
@@ -85,6 +85,196 @@ void mctp_neigh_remove_dev(struct mctp_dev *mdev)
mutex_unlock(&net->mctp.neigh_lock);
}
+// TODO: add a "source" flag so netlink can only delete static neighbours?
+static int mctp_neigh_remove(struct mctp_dev *mdev, mctp_eid_t eid)
+{
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_neigh *neigh, *tmp;
+ bool dropped = false;
+
+ mutex_lock(&net->mctp.neigh_lock);
+ list_for_each_entry_safe(neigh, tmp, &net->mctp.neighbours, list) {
+ if (neigh->dev == mdev && neigh->eid == eid) {
+ list_del_rcu(&neigh->list);
+ /* TODO: immediate RTM_DELNEIGH */
+ call_rcu(&neigh->rcu, __mctp_neigh_free);
+ dropped = true;
+ }
+ }
+
+ mutex_unlock(&net->mctp.neigh_lock);
+ return dropped ? 0 : -ENOENT;
+}
+
+static const struct nla_policy nd_mctp_policy[NDA_MAX + 1] = {
+ [NDA_DST] = { .type = NLA_U8 },
+ [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
+};
+
+static int mctp_rtm_newneigh(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct net_device *dev;
+ struct mctp_dev *mdev;
+ struct ndmsg *ndm;
+ struct nlattr *tb[NDA_MAX + 1];
+ int rc;
+ mctp_eid_t eid;
+ void *lladdr;
+ int lladdr_len;
+
+ rc = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, nd_mctp_policy,
+ extack);
+ if (rc < 0) {
+ NL_SET_ERR_MSG(extack, "lladdr too large?");
+ return rc;
+ }
+
+ if (!tb[NDA_DST]) {
+ NL_SET_ERR_MSG(extack, "Neighbour EID must be specified");
+ return -EINVAL;
+ }
+
+ if (!tb[NDA_LLADDR]) {
+ NL_SET_ERR_MSG(extack, "Neighbour lladdr must be specified");
+ return -EINVAL;
+ }
+
+ eid = nla_get_u8(tb[NDA_DST]);
+ if (!mctp_address_ok(eid)) {
+ NL_SET_ERR_MSG(extack, "Invalid neighbour EID");
+ return -EINVAL;
+ }
+
+ lladdr = nla_data(tb[NDA_LLADDR]);
+ lladdr_len = nla_len(tb[NDA_LLADDR]);
+
+ ndm = nlmsg_data(nlh);
+
+ dev = __dev_get_by_index(net, ndm->ndm_ifindex);
+ if (!dev)
+ return -ENODEV;
+
+ mdev = mctp_dev_get_rtnl(dev);
+ if (!mdev)
+ return -ENODEV;
+
+ if (lladdr_len != dev->addr_len) {
+ NL_SET_ERR_MSG(extack, "Wrong lladdr length");
+ return -EINVAL;
+ }
+
+ return mctp_neigh_add(mdev, eid, MCTP_NEIGH_STATIC,
+ lladdr_len, lladdr);
+}
+
+static int mctp_rtm_delneigh(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nlattr *tb[NDA_MAX + 1];
+ struct net_device *dev;
+ struct mctp_dev *mdev;
+ struct ndmsg *ndm;
+ int rc;
+ mctp_eid_t eid;
+
+ rc = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, nd_mctp_policy,
+ extack);
+ if (rc < 0) {
+ NL_SET_ERR_MSG(extack, "incorrect format");
+ return rc;
+ }
+
+ if (!tb[NDA_DST]) {
+ NL_SET_ERR_MSG(extack, "Neighbour EID must be specified");
+ return -EINVAL;
+ }
+ eid = nla_get_u8(tb[NDA_DST]);
+
+ ndm = nlmsg_data(nlh);
+ dev = __dev_get_by_index(net, ndm->ndm_ifindex);
+ if (!dev)
+ return -ENODEV;
+
+ mdev = mctp_dev_get_rtnl(dev);
+ if (!mdev)
+ return -ENODEV;
+
+ return mctp_neigh_remove(mdev, eid);
+}
+
+static int mctp_fill_neigh(struct sk_buff *skb, u32 portid, u32 seq, int event,
+ unsigned int flags, struct mctp_neigh *neigh)
+{
+ struct net_device *dev = neigh->dev->dev;
+ struct nlmsghdr *nlh;
+ struct ndmsg *hdr;
+
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ hdr = nlmsg_data(nlh);
+ hdr->ndm_family = AF_MCTP;
+ hdr->ndm_ifindex = dev->ifindex;
+ hdr->ndm_state = 0; // TODO other state bits?
+ if (neigh->source == MCTP_NEIGH_STATIC)
+ hdr->ndm_state |= NUD_PERMANENT;
+ hdr->ndm_flags = 0;
+ hdr->ndm_type = RTN_UNICAST; // TODO: is loopback RTN_LOCAL?
+
+ if (nla_put_u8(skb, NDA_DST, neigh->eid))
+ goto cancel;
+
+ if (nla_put(skb, NDA_LLADDR, dev->addr_len, neigh->ha))
+ goto cancel;
+
+ nlmsg_end(skb, nlh);
+
+ return 0;
+cancel:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int mctp_rtm_getneigh(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ int rc, idx, req_ifindex;
+ struct mctp_neigh *neigh;
+ struct ndmsg *ndmsg;
+ struct {
+ int idx;
+ } *cbctx = (void *)cb->ctx;
+
+ ndmsg = nlmsg_data(cb->nlh);
+ req_ifindex = ndmsg->ndm_ifindex;
+
+ idx = 0;
+ rcu_read_lock();
+ list_for_each_entry_rcu(neigh, &net->mctp.neighbours, list) {
+ if (idx < cbctx->idx)
+ goto cont;
+
+ rc = 0;
+ if (req_ifindex == 0 || req_ifindex == neigh->dev->dev->ifindex)
+ rc = mctp_fill_neigh(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNEIGH, NLM_F_MULTI, neigh);
+
+ if (rc)
+ break;
+cont:
+ idx++;
+ }
+ rcu_read_unlock();
+
+ cbctx->idx = idx;
+ return skb->len;
+}
+
int mctp_neigh_lookup(struct mctp_dev *mdev, mctp_eid_t eid, void *ret_hwaddr)
{
struct net *net = dev_net(mdev->dev);
@@ -111,6 +301,7 @@ static int __net_init mctp_neigh_net_init(struct net *net)
struct netns_mctp *ns = &net->mctp;
INIT_LIST_HEAD(&ns->neighbours);
+ mutex_init(&ns->neigh_lock);
return 0;
}
@@ -132,10 +323,20 @@ static struct pernet_operations mctp_net_ops = {
int __init mctp_neigh_init(void)
{
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWNEIGH,
+ mctp_rtm_newneigh, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELNEIGH,
+ mctp_rtm_delneigh, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETNEIGH,
+ NULL, mctp_rtm_getneigh, 0);
+
return register_pernet_subsys(&mctp_net_ops);
}
void __exit mctp_neigh_exit(void)
{
unregister_pernet_subsys(&mctp_net_ops);
+ rtnl_unregister(PF_MCTP, RTM_GETNEIGH);
+ rtnl_unregister(PF_MCTP, RTM_DELNEIGH);
+ rtnl_unregister(PF_MCTP, RTM_NEWNEIGH);
}
--
2.30.2
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH RFC net-next v2 11/16] mctp: Populate socket implementation
2021-07-13 6:20 [PATCH RFC net-next v2 00/16] Add Management Component Transport Protocol support Jeremy Kerr
` (9 preceding siblings ...)
2021-07-13 6:20 ` [PATCH RFC net-next v2 10/16] mctp: Add neighbour netlink interface Jeremy Kerr
@ 2021-07-13 6:20 ` Jeremy Kerr
2021-07-13 6:20 ` [PATCH RFC net-next v2 12/16] mctp: Add dest neighbour lladdr to route output Jeremy Kerr
` (4 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Jeremy Kerr @ 2021-07-13 6:20 UTC (permalink / raw)
To: netdev; +Cc: Matt Johnston, Andrew Jeffery
Start filling-out the socket syscalls: bind, sendmsg & recvmsg.
This requires an input route implementation, so we add to
mctp_route_input, allowing lookups on binds & message tags. This just
handles single-packet messages at present, we will add fragmentation in
a future change.
Signed-off-by: Jeremy Kerr <jk@codeconstruct.com.au>
v2:
- require CAP_NET_BIND_SERVICE for bind(), CAP_NET_RAW for TX.
- be strict about sendmsg() tag bits
---
include/net/mctp.h | 59 ++++++++++
include/net/netns/mctp.h | 13 +++
net/mctp/af_mctp.c | 203 ++++++++++++++++++++++++++++++++--
net/mctp/route.c | 227 ++++++++++++++++++++++++++++++++++++++-
4 files changed, 492 insertions(+), 10 deletions(-)
diff --git a/include/net/mctp.h b/include/net/mctp.h
index 8a7353a0aa2e..38b787e26179 100644
--- a/include/net/mctp.h
+++ b/include/net/mctp.h
@@ -12,6 +12,7 @@
#include <linux/bits.h>
#include <linux/mctp.h>
#include <net/net_namespace.h>
+#include <net/sock.h>
/* MCTP packet definitions */
struct mctp_hdr {
@@ -46,6 +47,64 @@ static inline struct mctp_hdr *mctp_hdr(struct sk_buff *skb)
return (struct mctp_hdr *)skb_network_header(skb);
}
+/* socket implementation */
+struct mctp_sock {
+ struct sock sk;
+
+ /* bind() params */
+ int bind_net;
+ mctp_eid_t bind_addr;
+ __u8 bind_type;
+
+ /* list of mctp_sk_key, for incoming tag lookup. updates protected
+ * by sk->net->keys_lock
+ */
+ struct hlist_head __rcu keys;
+};
+
+/* Key for matching incoming packets to sockets or reassembly contexts.
+ * Packets are matched on (src,dest,tag).
+ *
+ * Lifetime requirements:
+ *
+ * - keys are free()ed via RCU
+ *
+ * - a mctp_sk_key contains a reference to a struct sock; this is valid
+ * for the life of the key. On sock destruction (through unhash), the key is
+ * removed from lists (see below), and will not be observable after a RCU
+ * grace period.
+ *
+ * any RX occuring within that grace period may still queue to the socket,
+ * but will hit the SOCK_DEAD case before the socket is freed.
+ *
+ * - these mctp_sk_keys appear on two lists:
+ * 1) the struct mctp_sock->keys list
+ * 2) the struct netns_mctp->keys list
+ *
+ * updates to either list are performed under the netns_mctp->keys
+ * lock.
+ *
+ * - there is a single destruction path for a mctp_sk_key - through socket
+ * unhash (see mctp_sk_unhash). This performs performs the list removal under
+ * keys_lock.
+ */
+struct mctp_sk_key {
+ mctp_eid_t peer_addr;
+ mctp_eid_t local_addr;
+ __u8 tag; /* incoming tag match; invert TO for local */
+
+ /* we hold a ref to sk when set */
+ struct sock *sk;
+
+ /* routing lookup list */
+ struct hlist_node hlist;
+
+ /* per-socket list */
+ struct hlist_node sklist;
+
+ struct rcu_head rcu;
+};
+
struct mctp_skb_cb {
unsigned int magic;
unsigned int net;
diff --git a/include/net/netns/mctp.h b/include/net/netns/mctp.h
index 2f5ebeeb320e..14ae6d37e52a 100644
--- a/include/net/netns/mctp.h
+++ b/include/net/netns/mctp.h
@@ -12,6 +12,19 @@ struct netns_mctp {
/* Only updated under RTNL, entries freed via RCU */
struct list_head routes;
+ /* Bound sockets: list of sockets bound by type.
+ * This list is updated from non-atomic contexts (under bind_lock),
+ * and read (under rcu) in packet rx
+ */
+ struct mutex bind_lock;
+ struct hlist_head binds;
+
+ /* tag allocations. This list is read and updated from atomic contexts,
+ * but elements are free()ed after a RCU grace-period
+ */
+ spinlock_t keys_lock;
+ struct hlist_head keys;
+
/* neighbour table */
struct mutex neigh_lock;
struct list_head neighbours;
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index 58701e6b282c..52bd7f2b78db 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -18,10 +18,6 @@
/* socket implementation */
-struct mctp_sock {
- struct sock sk;
-};
-
static int mctp_release(struct socket *sock)
{
struct sock *sk = sock->sk;
@@ -36,18 +32,160 @@ static int mctp_release(struct socket *sock)
static int mctp_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
{
- return 0;
+ struct sock *sk = sock->sk;
+ struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
+ struct sockaddr_mctp *smctp;
+ int rc;
+
+ if (addrlen < sizeof(*smctp))
+ return -EINVAL;
+
+ if (addr->sa_family != AF_MCTP)
+ return -EAFNOSUPPORT;
+
+ if (!capable(CAP_NET_BIND_SERVICE))
+ return -EACCES;
+
+ /* it's a valid sockaddr for MCTP, cast and do protocol checks */
+ smctp = (struct sockaddr_mctp *)addr;
+
+ lock_sock(sk);
+
+ /* TODO: allow rebind */
+ if (sk_hashed(sk)) {
+ rc = -EADDRINUSE;
+ goto out_release;
+ }
+ msk->bind_net = smctp->smctp_network;
+ msk->bind_addr = smctp->smctp_addr.s_addr;
+ msk->bind_type = smctp->smctp_type & 0x7f; /* ignore the IC bit */
+
+ rc = sk->sk_prot->hash(sk);
+
+out_release:
+ release_sock(sk);
+
+ return rc;
}
static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
- return 0;
+ DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name);
+ const int hlen = MCTP_HEADER_MAXLEN + sizeof(struct mctp_hdr);
+ int rc, addrlen = msg->msg_namelen;
+ struct sock *sk = sock->sk;
+ struct mctp_skb_cb *cb;
+ struct mctp_route *rt;
+ struct sk_buff *skb;
+
+ if (addr) {
+ if (addrlen < sizeof(struct sockaddr_mctp))
+ return -EINVAL;
+ if (addr->smctp_family != AF_MCTP)
+ return -EINVAL;
+ if (addr->smctp_tag & ~(MCTP_TAG_MASK | MCTP_TAG_OWNER))
+ return -EINVAL;
+
+ } else {
+ /* TODO: connect()ed sockets */
+ return -EDESTADDRREQ;
+ }
+
+ if (!capable(CAP_NET_RAW))
+ return -EACCES;
+
+ rt = mctp_route_lookup(sock_net(sk), addr->smctp_network,
+ addr->smctp_addr.s_addr);
+ if (!rt)
+ return -EHOSTUNREACH;
+
+ skb = sock_alloc_send_skb(sk, hlen + 1 + len,
+ msg->msg_flags & MSG_DONTWAIT, &rc);
+ if (!skb)
+ return rc;
+
+ skb_reserve(skb, hlen);
+
+ /* set type as fist byte in payload */
+ *(u8 *)skb_put(skb, 1) = addr->smctp_type;
+
+ rc = memcpy_from_msg((void *)skb_put(skb, len), msg, len);
+ if (rc < 0) {
+ kfree_skb(skb);
+ return rc;
+ }
+
+ /* set up cb */
+ cb = __mctp_cb(skb);
+ cb->net = addr->smctp_network;
+
+ rc = mctp_local_output(sk, rt, skb, addr->smctp_addr.s_addr,
+ addr->smctp_tag);
+
+ return rc ? : len;
}
static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
int flags)
{
- return 0;
+ DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name);
+ struct sock *sk = sock->sk;
+ struct sk_buff *skb;
+ size_t msglen;
+ u8 type;
+ int rc;
+
+ if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK))
+ return -EOPNOTSUPP;
+
+ skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &rc);
+ if (!skb)
+ return rc;
+
+ if (!skb->len) {
+ rc = 0;
+ goto out_free;
+ }
+
+ /* extract message type, remove from data */
+ type = *((u8 *)skb->data);
+ msglen = skb->len - 1;
+
+ if (len < msglen)
+ msg->msg_flags |= MSG_TRUNC;
+ else
+ len = msglen;
+
+ rc = skb_copy_datagram_msg(skb, 1, msg, len);
+ if (rc < 0)
+ goto out_free;
+
+ sock_recv_ts_and_drops(msg, sk, skb);
+
+ if (addr) {
+ struct mctp_skb_cb *cb = mctp_cb(skb);
+ /* TODO: expand mctp_skb_cb for header fields? */
+ struct mctp_hdr *hdr = mctp_hdr(skb);
+
+ hdr = mctp_hdr(skb);
+ addr = msg->msg_name;
+ addr->smctp_family = AF_MCTP;
+ addr->smctp_network = cb->net;
+ addr->smctp_addr.s_addr = hdr->src;
+ addr->smctp_type = type;
+ addr->smctp_tag = hdr->flags_seq_tag &
+ (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
+ msg->msg_namelen = sizeof(*addr);
+ }
+
+ rc = len;
+
+ if (flags & MSG_TRUNC)
+ rc = msglen;
+
+out_free:
+ skb_free_datagram(sk, skb);
+ return rc;
}
static int mctp_setsockopt(struct socket *sock, int level, int optname,
@@ -83,16 +221,63 @@ static const struct proto_ops mctp_dgram_ops = {
.sendpage = sock_no_sendpage,
};
+static int mctp_sk_init(struct sock *sk)
+{
+ struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
+
+ INIT_HLIST_HEAD(&msk->keys);
+ return 0;
+}
+
static void mctp_sk_close(struct sock *sk, long timeout)
{
sk_common_release(sk);
}
+static int mctp_sk_hash(struct sock *sk)
+{
+ struct net *net = sock_net(sk);
+
+ mutex_lock(&net->mctp.bind_lock);
+ sk_add_node_rcu(sk, &net->mctp.binds);
+ mutex_unlock(&net->mctp.bind_lock);
+
+ return 0;
+}
+
+static void mctp_sk_unhash(struct sock *sk)
+{
+ struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
+ struct net *net = sock_net(sk);
+ struct mctp_sk_key *key;
+ struct hlist_node *tmp;
+ unsigned long flags;
+
+ /* remove from any type-based binds */
+ mutex_lock(&net->mctp.bind_lock);
+ sk_del_node_init_rcu(sk);
+ mutex_unlock(&net->mctp.bind_lock);
+
+ /* remove tag allocations */
+ spin_lock_irqsave(&net->mctp.keys_lock, flags);
+ hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) {
+ hlist_del_rcu(&key->sklist);
+ hlist_del_rcu(&key->hlist);
+ kfree_rcu(key, rcu);
+ }
+ spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+
+ synchronize_rcu();
+}
+
static struct proto mctp_proto = {
.name = "MCTP",
.owner = THIS_MODULE,
.obj_size = sizeof(struct mctp_sock),
+ .init = mctp_sk_init,
.close = mctp_sk_close,
+ .hash = mctp_sk_hash,
+ .unhash = mctp_sk_unhash,
};
static int mctp_pf_create(struct net *net, struct socket *sock,
@@ -147,6 +332,10 @@ static __init int mctp_init(void)
{
int rc;
+ /* ensure our uapi tag definitions match the header format */
+ BUILD_BUG_ON(MCTP_TAG_OWNER != MCTP_HDR_FLAG_TO);
+ BUILD_BUG_ON(MCTP_TAG_MASK != MCTP_HDR_TAG_MASK);
+
pr_info("mctp: management component transport protocol core\n");
rc = sock_register(&mctp_pf);
diff --git a/net/mctp/route.c b/net/mctp/route.c
index 33908e218244..1909437f9a15 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -30,10 +30,139 @@ static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb)
return 0;
}
+static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
+{
+ struct mctp_skb_cb *cb = mctp_cb(skb);
+ struct mctp_hdr *mh;
+ struct sock *sk;
+ u8 type;
+
+ WARN_ON(!rcu_read_lock_held());
+
+ /* TODO: look up in skb->cb? */
+ mh = mctp_hdr(skb);
+
+ if (!skb_headlen(skb))
+ return NULL;
+
+ type = (*(u8 *)skb->data) & 0x7f;
+
+ sk_for_each_rcu(sk, &net->mctp.binds) {
+ struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
+
+ if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net)
+ continue;
+
+ if (msk->bind_type != type)
+ continue;
+
+ if (msk->bind_addr != MCTP_ADDR_ANY &&
+ msk->bind_addr != mh->dest)
+ continue;
+
+ return msk;
+ }
+
+ return NULL;
+}
+
+static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local,
+ mctp_eid_t peer, u8 tag)
+{
+ if (key->local_addr != local)
+ return false;
+
+ if (key->peer_addr != peer)
+ return false;
+
+ if (key->tag != tag)
+ return false;
+
+ return true;
+}
+
+static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb,
+ mctp_eid_t peer)
+{
+ struct mctp_sk_key *key, *ret;
+ struct mctp_hdr *mh;
+ u8 tag;
+
+ WARN_ON(!rcu_read_lock_held());
+
+ mh = mctp_hdr(skb);
+ tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
+
+ ret = NULL;
+
+ hlist_for_each_entry_rcu(key, &net->mctp.keys, hlist) {
+ if (mctp_key_match(key, mh->dest, peer, tag)) {
+ ret = key;
+ break;
+ }
+ }
+
+ return ret;
+}
+
static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
{
- /* -> to local stack */
- /* TODO: socket lookup, reassemble */
+ struct net *net = dev_net(skb->dev);
+ struct mctp_sk_key *key;
+ struct mctp_sock *msk;
+ struct mctp_hdr *mh;
+
+ msk = NULL;
+
+ /* we may be receiving a locally-routed packet; drop source sk
+ * accounting
+ */
+ skb_orphan(skb);
+
+ /* ensure we have enough data for a header and a type */
+ if (skb->len < sizeof(struct mctp_hdr) + 1)
+ goto drop;
+
+ /* grab header, advance data ptr */
+ mh = mctp_hdr(skb);
+ skb_pull(skb, sizeof(struct mctp_hdr));
+
+ if (mh->ver != 1)
+ goto drop;
+
+ /* TODO: reassembly */
+ if ((mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM))
+ != (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM))
+ goto drop;
+
+ rcu_read_lock();
+ /* 1. lookup socket matching (src,dest,tag) */
+ key = mctp_lookup_key(net, skb, mh->src);
+
+ /* 2. lookup socket macthing (BCAST,dest,tag) */
+ if (!key)
+ key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY);
+
+ /* 3. SOM? -> lookup bound socket, conditionally (!EOM) create
+ * mapping for future (1)/(2).
+ */
+ if (key)
+ msk = container_of(key->sk, struct mctp_sock, sk);
+ else if (!msk && (mh->flags_seq_tag & MCTP_HDR_FLAG_SOM))
+ msk = mctp_lookup_bind(net, skb);
+
+ if (!msk)
+ goto unlock_drop;
+
+ sock_queue_rcv_skb(&msk->sk, skb);
+
+ rcu_read_unlock();
+
+ return 0;
+
+unlock_drop:
+ rcu_read_unlock();
+drop:
kfree_skb(skb);
return 0;
}
@@ -91,6 +220,80 @@ static struct mctp_route *mctp_route_alloc(void)
return rt;
}
+/* tag management */
+static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key,
+ struct mctp_sock *msk)
+{
+ struct netns_mctp *mns = &net->mctp;
+
+ lockdep_assert_held(&mns->keys_lock);
+
+ key->sk = &msk->sk;
+
+ /* we hold the net->key_lock here, allowing updates to both
+ * then net and sk
+ */
+ hlist_add_head(&key->hlist, &mns->keys);
+ hlist_add_head(&key->sklist, &msk->keys);
+}
+
+/* Allocate a locally-owned tag value for (saddr, daddr), and reserve
+ * it for the socket msk
+ */
+static int mctp_alloc_local_tag(struct mctp_sock *msk,
+ mctp_eid_t saddr, mctp_eid_t daddr, u8 *tagp)
+{
+ struct net *net = sock_net(&msk->sk);
+ struct netns_mctp *mns = &net->mctp;
+ struct mctp_sk_key *key, *tmp;
+ unsigned long flags;
+ int rc = -EAGAIN;
+ u8 tagbits;
+
+ /* be optimistic, alloc now */
+ key = kzalloc(sizeof(*key), GFP_KERNEL);
+ if (!key)
+ return -ENOMEM;
+ key->local_addr = saddr;
+ key->peer_addr = daddr;
+
+ /* 8 possible tag values */
+ tagbits = 0xff;
+
+ spin_lock_irqsave(&mns->keys_lock, flags);
+
+ /* Walk through the existing keys, looking for potential conflicting
+ * tags. If we find a conflict, clear that bit from tagbits
+ */
+ hlist_for_each_entry(tmp, &mns->keys, hlist) {
+ /* if we don't own the tag, it can't conflict */
+ if (tmp->tag & MCTP_HDR_FLAG_TO)
+ continue;
+
+ if ((tmp->peer_addr == daddr ||
+ tmp->peer_addr == MCTP_ADDR_ANY) &&
+ tmp->local_addr == saddr)
+ tagbits &= ~(1 << tmp->tag);
+
+ if (!tagbits)
+ break;
+ }
+
+ if (tagbits) {
+ key->tag = __ffs(tagbits);
+ mctp_reserve_tag(net, key, msk);
+ *tagp = key->tag;
+ rc = 0;
+ }
+
+ spin_unlock_irqrestore(&mns->keys_lock, flags);
+
+ if (!tagbits)
+ kfree(key);
+
+ return rc;
+}
+
/* routing lookups */
static inline bool mctp_rt_match_eid(struct mctp_route *rt,
unsigned int net, mctp_eid_t eid)
@@ -140,9 +343,12 @@ int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb)
int mctp_local_output(struct sock *sk, struct mctp_route *rt,
struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag)
{
+ struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
struct mctp_skb_cb *cb = mctp_cb(skb);
struct mctp_hdr *hdr;
mctp_eid_t saddr;
+ int rc;
+ u8 tag;
if (WARN_ON(!rt->dev))
return -EINVAL;
@@ -154,6 +360,15 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
saddr = list_first_entry(&rt->dev->addrs,
struct mctp_ifaddr, dev_list)->eid;
+ if (req_tag & MCTP_HDR_FLAG_TO) {
+ rc = mctp_alloc_local_tag(msk, saddr, daddr, &tag);
+ if (rc)
+ return rc;
+ tag |= MCTP_HDR_FLAG_TO;
+ } else {
+ tag = req_tag;
+ }
+
/* TODO: we have the route MTU here; packetise */
skb_reset_transport_header(skb);
@@ -163,8 +378,10 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
hdr->ver = 1;
hdr->dest = daddr;
hdr->src = saddr;
- hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM; /* TODO */
+ hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM | /* TODO */
+ tag;
+ skb->dev = rt->dev->dev;
skb->protocol = htons(ETH_P_MCTP);
skb->priority = 0;
@@ -514,6 +731,10 @@ static int __net_init mctp_routes_net_init(struct net *net)
struct netns_mctp *ns = &net->mctp;
INIT_LIST_HEAD(&ns->routes);
+ INIT_HLIST_HEAD(&ns->binds);
+ mutex_init(&ns->bind_lock);
+ INIT_HLIST_HEAD(&ns->keys);
+ spin_lock_init(&ns->keys_lock);
return 0;
}
--
2.30.2
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH RFC net-next v2 12/16] mctp: Add dest neighbour lladdr to route output
2021-07-13 6:20 [PATCH RFC net-next v2 00/16] Add Management Component Transport Protocol support Jeremy Kerr
` (10 preceding siblings ...)
2021-07-13 6:20 ` [PATCH RFC net-next v2 11/16] mctp: Populate socket implementation Jeremy Kerr
@ 2021-07-13 6:20 ` Jeremy Kerr
2021-07-13 6:20 ` [PATCH RFC net-next v2 13/16] mctp: Implement message fragmentation & reassembly Jeremy Kerr
` (3 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Jeremy Kerr @ 2021-07-13 6:20 UTC (permalink / raw)
To: netdev; +Cc: Matt Johnston, Andrew Jeffery
From: Matt Johnston <matt@codeconstruct.com.au>
Now that we have a neighbour implementation, hook it up to the output
path to set the dest hardware address for outgoing packets.
Signed-off-by: Matt Johnston <matt@codeconstruct.com.au>
---
net/mctp/route.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/net/mctp/route.c b/net/mctp/route.c
index 1909437f9a15..cf06d88ca0ca 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -169,6 +169,9 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
{
+ struct mctp_hdr *hdr = mctp_hdr(skb);
+ char daddr_buf[MAX_ADDR_LEN];
+ char *daddr = NULL;
unsigned int mtu;
int rc;
@@ -180,9 +183,12 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
return -EMSGSIZE;
}
- /* TODO: daddr (from rt->neigh), saddr (from device?) */
+ /* If lookup fails let the device handle daddr==NULL */
+ if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0)
+ daddr = daddr_buf;
+
rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
- NULL, NULL, skb->len);
+ daddr, skb->dev->dev_addr, skb->len);
if (rc) {
kfree_skb(skb);
return -EHOSTUNREACH;
--
2.30.2
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH RFC net-next v2 13/16] mctp: Implement message fragmentation & reassembly
2021-07-13 6:20 [PATCH RFC net-next v2 00/16] Add Management Component Transport Protocol support Jeremy Kerr
` (11 preceding siblings ...)
2021-07-13 6:20 ` [PATCH RFC net-next v2 12/16] mctp: Add dest neighbour lladdr to route output Jeremy Kerr
@ 2021-07-13 6:20 ` Jeremy Kerr
2021-07-13 6:20 ` [PATCH RFC net-next v2 14/16] mctp: Allow per-netns default networks Jeremy Kerr
` (2 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Jeremy Kerr @ 2021-07-13 6:20 UTC (permalink / raw)
To: netdev; +Cc: Matt Johnston, Andrew Jeffery
This change implements MCTP fragmentation (based on route & device MTU),
and corresponding reassembly.
The MCTP specification only allows for fragmentation on the originating
message endpoint, and reassembly on the destination endpoint -
intermediate nodes do not need to reassemble/refragment. Consequently,
we only fragment in the local transmit path, and reassemble
locally-bound packets. Messages are required to be in-order, so we
simply cancel reassembly on out-of-order or missing packets.
In the fragmentation path, we just break up the message into MTU-sized
fragments; the skb structure is a simple copy for now, which we can later
improve with a shared data implementation.
For reassembly, we keep track of incoming message fragments using the
existing tag infrastructure, allocating a key on the (src,dest,tag)
tuple, and reassembles matching fragments into a skb->frag_list.
Signed-off-by: Jeremy Kerr <jk@codeconstruct.com.au>
v2:
- limit max reassembly size
---
include/net/mctp.h | 25 ++-
net/mctp/af_mctp.c | 8 +
net/mctp/route.c | 370 ++++++++++++++++++++++++++++++++++++++++-----
3 files changed, 359 insertions(+), 44 deletions(-)
diff --git a/include/net/mctp.h b/include/net/mctp.h
index 38b787e26179..34d980bd5949 100644
--- a/include/net/mctp.h
+++ b/include/net/mctp.h
@@ -84,9 +84,21 @@ struct mctp_sock {
* updates to either list are performed under the netns_mctp->keys
* lock.
*
- * - there is a single destruction path for a mctp_sk_key - through socket
- * unhash (see mctp_sk_unhash). This performs performs the list removal under
- * keys_lock.
+ * - a key may have a sk_buff attached as part of an in-progress message
+ * reassembly (->reasm_head). The reassembly context is protected by
+ * reasm_lock, which may be acquired with the keys lock (above) held, if
+ * necessary. Consequently, keys lock *cannot* be acquired with the
+ * reasm_lock held.
+ *
+ * - there are two destruction paths for a mctp_sk_key:
+ *
+ * - through socket unhash (see mctp_sk_unhash). This performs performs the
+ * list removal under keys_lock.
+ *
+ * - where a key is established to receive a reply message: after receiving
+ * the (complete) reply, or during reassembly errors. Here, we clean up
+ * the reassembly context (marking reasm_dead, to prevent another from
+ * starting), and remove the socket from the netns & socket lists.
*/
struct mctp_sk_key {
mctp_eid_t peer_addr;
@@ -102,6 +114,13 @@ struct mctp_sk_key {
/* per-socket list */
struct hlist_node sklist;
+ /* incoming fragment reassembly context */
+ spinlock_t reasm_lock;
+ struct sk_buff *reasm_head;
+ struct sk_buff **reasm_tailp;
+ bool reasm_dead;
+ u8 last_seq;
+
struct rcu_head rcu;
};
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index 52bd7f2b78db..9ca836df19d0 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -263,6 +263,14 @@ static void mctp_sk_unhash(struct sock *sk)
hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) {
hlist_del_rcu(&key->sklist);
hlist_del_rcu(&key->hlist);
+
+ spin_lock(&key->reasm_lock);
+ if (key->reasm_head)
+ kfree_skb(key->reasm_head);
+ key->reasm_head = NULL;
+ key->reasm_dead = true;
+ spin_unlock(&key->reasm_lock);
+
kfree_rcu(key, rcu);
}
spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
diff --git a/net/mctp/route.c b/net/mctp/route.c
index cf06d88ca0ca..77e37ac03b05 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -23,6 +23,8 @@
#include <net/netlink.h>
#include <net/sock.h>
+static const unsigned int mctp_message_maxlen = 64 * 1024;
+
/* route output callbacks */
static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb)
{
@@ -105,14 +107,124 @@ static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb,
return ret;
}
+static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk,
+ mctp_eid_t local, mctp_eid_t peer,
+ u8 tag, gfp_t gfp)
+{
+ struct mctp_sk_key *key;
+
+ key = kzalloc(sizeof(*key), gfp);
+ if (!key)
+ return NULL;
+
+ key->peer_addr = peer;
+ key->local_addr = local;
+ key->tag = tag;
+ key->sk = &msk->sk;
+ spin_lock_init(&key->reasm_lock);
+
+ return key;
+}
+
+static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
+{
+ struct net *net = sock_net(&msk->sk);
+ struct mctp_sk_key *tmp;
+ unsigned long flags;
+ int rc = 0;
+
+ spin_lock_irqsave(&net->mctp.keys_lock, flags);
+
+ hlist_for_each_entry(tmp, &net->mctp.keys, hlist) {
+ if (mctp_key_match(tmp, key->local_addr, key->peer_addr,
+ key->tag)) {
+ rc = -EEXIST;
+ break;
+ }
+ }
+
+ if (!rc) {
+ hlist_add_head(&key->hlist, &net->mctp.keys);
+ hlist_add_head(&key->sklist, &msk->keys);
+ }
+
+ spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+
+ return rc;
+}
+
+/* Must be called with key->reasm_lock, which it will release. Will schedule
+ * the key for an RCU free. */
+static void __mctp_key_unlock_drop(struct mctp_sk_key *key, struct net *net,
+ unsigned long flags)
+ __releases(&key->reasm_lock)
+{
+ struct sk_buff *skb;
+
+ skb = key->reasm_head;
+ key->reasm_head = NULL;
+ key->reasm_dead = true;
+ spin_unlock_irqrestore(&key->reasm_lock, flags);
+
+ spin_lock_irqsave(&net->mctp.keys_lock, flags);
+ hlist_del_rcu(&key->hlist);
+ hlist_del_rcu(&key->sklist);
+ spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+ kfree_rcu(key, rcu);
+
+ if (skb)
+ kfree_skb(skb);
+}
+
+static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb)
+{
+ struct mctp_hdr *hdr = mctp_hdr(skb);
+ u8 exp_seq, this_seq;
+
+ this_seq = (hdr->flags_seq_tag >> MCTP_HDR_SEQ_SHIFT)
+ & MCTP_HDR_SEQ_MASK;
+
+ if (!key->reasm_head) {
+ key->reasm_head = skb;
+ key->reasm_tailp = &(skb_shinfo(skb)->frag_list);
+ key->last_seq = this_seq;
+ return 0;
+ }
+
+ exp_seq = (key->last_seq + 1) & MCTP_HDR_SEQ_MASK;
+
+ if (this_seq != exp_seq)
+ return -EINVAL;
+
+ if (key->reasm_head->len + skb->len > mctp_message_maxlen)
+ return -EINVAL;
+
+ skb->next = NULL;
+ skb->sk = NULL;
+ *key->reasm_tailp = skb;
+ key->reasm_tailp = &skb->next;
+
+ key->last_seq = this_seq;
+
+ key->reasm_head->data_len += skb->len;
+ key->reasm_head->len += skb->len;
+ key->reasm_head->truesize += skb->truesize;
+
+ return 0;
+}
+
static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
struct mctp_sk_key *key;
struct mctp_sock *msk;
struct mctp_hdr *mh;
+ unsigned long f;
+ u8 tag, flags;
+ int rc;
msk = NULL;
+ rc = -EINVAL;
/* we may be receiving a locally-routed packet; drop source sk
* accounting
@@ -121,50 +233,143 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
/* ensure we have enough data for a header and a type */
if (skb->len < sizeof(struct mctp_hdr) + 1)
- goto drop;
+ goto out;
/* grab header, advance data ptr */
mh = mctp_hdr(skb);
skb_pull(skb, sizeof(struct mctp_hdr));
if (mh->ver != 1)
- goto drop;
+ goto out;
- /* TODO: reassembly */
- if ((mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM))
- != (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM))
- goto drop;
+ flags = mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM);
+ tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
rcu_read_lock();
- /* 1. lookup socket matching (src,dest,tag) */
+
+ /* lookup socket / reasm context, exactly matching (src,dest,tag) */
key = mctp_lookup_key(net, skb, mh->src);
- /* 2. lookup socket macthing (BCAST,dest,tag) */
- if (!key)
- key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY);
+ if (flags & MCTP_HDR_FLAG_SOM) {
+
+ if (key) {
+ msk = container_of(key->sk, struct mctp_sock, sk);
+ } else {
+ /* first response to a broadcast? do a more general
+ * key lookup to find the socket, but don't use this
+ * key for reassembly - we'll create a more specific
+ * one for future packets if required (ie, !EOM).
+ */
+ key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY);
+ if (key) {
+ msk = container_of(key->sk,
+ struct mctp_sock, sk);
+ key = NULL;
+ }
+ }
- /* 3. SOM? -> lookup bound socket, conditionally (!EOM) create
- * mapping for future (1)/(2).
- */
- if (key)
- msk = container_of(key->sk, struct mctp_sock, sk);
- else if (!msk && (mh->flags_seq_tag & MCTP_HDR_FLAG_SOM))
- msk = mctp_lookup_bind(net, skb);
+ if (!key && !msk && (tag & MCTP_HDR_FLAG_TO))
+ msk = mctp_lookup_bind(net, skb);
- if (!msk)
- goto unlock_drop;
+ if (!msk) {
+ rc = -ENOENT;
+ goto out;
+ }
- sock_queue_rcv_skb(&msk->sk, skb);
+ /* single-packet message? deliver to socket, clean up any
+ * pending key.
+ */
+ if (flags & MCTP_HDR_FLAG_EOM) {
+ sock_queue_rcv_skb(&msk->sk, skb);
+ if (key) {
+ spin_lock_irqsave(&key->reasm_lock, f);
+ /* we've hit a pending reassembly; not much we
+ * can do but drop it */
+ __mctp_key_unlock_drop(key, net, f);
+ }
+ rc = 0;
+ goto out;
+ }
- rcu_read_unlock();
+ /* broadcast response or a bind() - create a key for further
+ * packets for this message
+ */
+ if (!key) {
+ key = mctp_key_alloc(msk, mh->dest, mh->src,
+ tag, GFP_ATOMIC);
+ if (!key) {
+ rc = -ENOMEM;
+ goto out;
+ }
- return 0;
+ /* we can queue without the reasm lock here, as the
+ * key isn't observable yet
+ */
+ mctp_frag_queue(key, skb);
+
+ /* if the key_add fails, we've raced with another
+ * SOM packet with the same src, dest and tag. There's
+ * no way to distinguish future packets, so all we
+ * can do is drop; we'll free the skb on exit from
+ * this function.
+ */
+ rc = mctp_key_add(key, msk);
+ if (rc)
+ kfree(key);
+
+ } else {
+ /* existing key: start reassembly */
+ spin_lock_irqsave(&key->reasm_lock, f);
+
+ if (key->reasm_head || key->reasm_dead) {
+ /* duplicate start? drop everything */
+ __mctp_key_unlock_drop(key, net, f);
+ rc = -EEXIST;
+ } else {
+ rc = mctp_frag_queue(key, skb);
+ spin_unlock_irqrestore(&key->reasm_lock, f);
+ }
+ }
+
+ } else if (key) {
+ /* this packet continues a previous message; reassemble
+ * using the message-specific key
+ */
+
+ spin_lock_irqsave(&key->reasm_lock, f);
+
+ /* we need to be continuing an existing reassembly... */
+ if (!key->reasm_head)
+ rc = -EINVAL;
+ else
+ rc = mctp_frag_queue(key, skb);
+
+ /* end of message? deliver to socket, and we're done with
+ * the reassembly/response key
+ */
+ if (!rc && flags & MCTP_HDR_FLAG_EOM) {
+ sock_queue_rcv_skb(key->sk, key->reasm_head);
+ key->reasm_head = NULL;
+ __mctp_key_unlock_drop(key, net, f);
+ } else {
+ spin_unlock_irqrestore(&key->reasm_lock, f);
+ }
+
+ } else {
+ /* not a start, no matching key */
+ rc = -ENOENT;
+ }
-unlock_drop:
+out:
rcu_read_unlock();
-drop:
- kfree_skb(skb);
- return 0;
+ if (rc)
+ kfree_skb(skb);
+ return rc;
+}
+
+static unsigned int mctp_route_mtu(struct mctp_route *rt)
+{
+ return rt->mtu ?: READ_ONCE(rt->dev->dev->mtu);
}
static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
@@ -234,8 +439,6 @@ static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key,
lockdep_assert_held(&mns->keys_lock);
- key->sk = &msk->sk;
-
/* we hold the net->key_lock here, allowing updates to both
* then net and sk
*/
@@ -257,11 +460,9 @@ static int mctp_alloc_local_tag(struct mctp_sock *msk,
u8 tagbits;
/* be optimistic, alloc now */
- key = kzalloc(sizeof(*key), GFP_KERNEL);
+ key = mctp_key_alloc(msk, saddr, daddr, 0, GFP_KERNEL);
if (!key)
return -ENOMEM;
- key->local_addr = saddr;
- key->peer_addr = daddr;
/* 8 possible tag values */
tagbits = 0xff;
@@ -346,12 +547,93 @@ int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb)
return rc;
}
+static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
+ unsigned int mtu, u8 tag)
+{
+ const unsigned int hlen = sizeof(struct mctp_hdr);
+ struct mctp_hdr *hdr, *hdr2;
+ unsigned int pos, size;
+ struct sk_buff *skb2;
+ int rc;
+ u8 seq;
+
+ hdr = mctp_hdr(skb);
+ seq = 0;
+ rc = 0;
+
+ if (mtu < hlen + 1) {
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
+ /* we've got the header */
+ skb_pull(skb, hlen);
+
+ for (pos = 0; pos < skb->len;) {
+ /* size of message payload */
+ size = min(mtu - hlen, skb->len - pos);
+
+ skb2 = alloc_skb(MCTP_HEADER_MAXLEN + hlen + size, GFP_KERNEL);
+ if (!skb2) {
+ rc = -ENOMEM;
+ break;
+ }
+
+ /* generic skb copy */
+ skb2->protocol = skb->protocol;
+ skb2->priority = skb->priority;
+ skb2->dev = skb->dev;
+ memcpy(skb2->cb, skb->cb, sizeof(skb2->cb));
+
+ if (skb->sk)
+ skb_set_owner_w(skb2, skb->sk);
+
+ /* establish packet */
+ skb_reserve(skb2, MCTP_HEADER_MAXLEN);
+ skb_reset_network_header(skb2);
+ skb_put(skb2, hlen + size);
+ skb2->transport_header = skb2->network_header + hlen;
+
+ /* copy header fields, calculate SOM/EOM flags & seq */
+ hdr2 = mctp_hdr(skb2);
+ hdr2->ver = hdr->ver;
+ hdr2->dest = hdr->dest;
+ hdr2->src = hdr->src;
+ hdr2->flags_seq_tag = tag &
+ (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
+
+ if (pos == 0)
+ hdr2->flags_seq_tag |= MCTP_HDR_FLAG_SOM;
+
+ if (pos + size == skb->len)
+ hdr2->flags_seq_tag |= MCTP_HDR_FLAG_EOM;
+
+ hdr2->flags_seq_tag |= seq << MCTP_HDR_SEQ_SHIFT;
+
+ /* copy message payload */
+ skb_copy_bits(skb, pos, skb_transport_header(skb2), size);
+
+ /* do route, but don't drop the rt reference */
+ rc = rt->output(rt, skb2);
+ if (rc)
+ break;
+
+ seq = (seq + 1) & MCTP_HDR_SEQ_MASK;
+ pos += size;
+ }
+
+ mctp_route_release(rt);
+ consume_skb(skb);
+ return rc;
+}
+
int mctp_local_output(struct sock *sk, struct mctp_route *rt,
struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag)
{
struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
struct mctp_skb_cb *cb = mctp_cb(skb);
struct mctp_hdr *hdr;
+ unsigned int mtu;
mctp_eid_t saddr;
int rc;
u8 tag;
@@ -375,26 +657,32 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
tag = req_tag;
}
- /* TODO: we have the route MTU here; packetise */
+ skb->protocol = htons(ETH_P_MCTP);
+ skb->priority = 0;
skb_reset_transport_header(skb);
skb_push(skb, sizeof(struct mctp_hdr));
skb_reset_network_header(skb);
+ skb->dev = rt->dev->dev;
+
+ /* cb->net will have been set on initial ingress */
+ cb->src = saddr;
+
+ /* set up common header fields */
hdr = mctp_hdr(skb);
hdr->ver = 1;
hdr->dest = daddr;
hdr->src = saddr;
- hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM | /* TODO */
- tag;
- skb->dev = rt->dev->dev;
- skb->protocol = htons(ETH_P_MCTP);
- skb->priority = 0;
+ mtu = mctp_route_mtu(rt);
- /* cb->net will have been set on initial ingress */
- cb->src = saddr;
-
- return mctp_do_route(rt, skb);
+ if (skb->len + sizeof(struct mctp_hdr) <= mtu) {
+ hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM |
+ tag;
+ return mctp_do_route(rt, skb);
+ } else {
+ return mctp_do_fragment_route(rt, skb, mtu, tag);
+ }
}
/* route management */
--
2.30.2
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH RFC net-next v2 14/16] mctp: Allow per-netns default networks
2021-07-13 6:20 [PATCH RFC net-next v2 00/16] Add Management Component Transport Protocol support Jeremy Kerr
` (12 preceding siblings ...)
2021-07-13 6:20 ` [PATCH RFC net-next v2 13/16] mctp: Implement message fragmentation & reassembly Jeremy Kerr
@ 2021-07-13 6:20 ` Jeremy Kerr
2021-07-13 6:20 ` [PATCH RFC net-next v2 15/16] mctp: Allow MCTP on tun devices Jeremy Kerr
2021-07-13 6:20 ` [PATCH RFC net-next v2 16/16] mctp: Add MCTP overview document Jeremy Kerr
15 siblings, 0 replies; 17+ messages in thread
From: Jeremy Kerr @ 2021-07-13 6:20 UTC (permalink / raw)
To: netdev; +Cc: Matt Johnston, Andrew Jeffery
From: Matt Johnston <matt@codeconstruct.com.au>
Currently we have a compile-time default network
(MCTP_INITIAL_DEFAULT_NET). This change introduces a default_net field
on the net namespace, allowing future configuration for new interfaces.
Signed-off-by: Matt Johnston <matt@codeconstruct.com.au>
---
include/net/mctp.h | 4 ++++
include/net/netns/mctp.h | 3 +++
include/uapi/linux/mctp.h | 1 -
net/mctp/af_mctp.c | 3 +++
net/mctp/device.c | 2 +-
net/mctp/route.c | 14 ++++++++++++++
6 files changed, 25 insertions(+), 2 deletions(-)
diff --git a/include/net/mctp.h b/include/net/mctp.h
index 34d980bd5949..17705210e4f9 100644
--- a/include/net/mctp.h
+++ b/include/net/mctp.h
@@ -37,6 +37,8 @@ struct mctp_hdr {
#define MCTP_HEADER_MAXLEN 4
+#define MCTP_INITIAL_DEFAULT_NET 1
+
static inline bool mctp_address_ok(mctp_eid_t eid)
{
return eid >= 8 && eid < 255;
@@ -189,6 +191,8 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag);
/* routing <--> device interface */
+unsigned int mctp_default_net(struct net *net);
+int mctp_default_net_set(struct net *net, unsigned int index);
int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start,
unsigned int daddr_extent, unsigned int mtu, bool is_local);
int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start,
diff --git a/include/net/netns/mctp.h b/include/net/netns/mctp.h
index 14ae6d37e52a..acedef12a35e 100644
--- a/include/net/netns/mctp.h
+++ b/include/net/netns/mctp.h
@@ -25,6 +25,9 @@ struct netns_mctp {
spinlock_t keys_lock;
struct hlist_head keys;
+ /* MCTP network */
+ unsigned int default_net;
+
/* neighbour table */
struct mutex neigh_lock;
struct list_head neighbours;
diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h
index 7be546e7abe7..5c6a2e0c4e59 100644
--- a/include/uapi/linux/mctp.h
+++ b/include/uapi/linux/mctp.h
@@ -26,7 +26,6 @@ struct sockaddr_mctp {
};
#define MCTP_NET_ANY 0x0
-#define MCTP_NET_DEFAULT 0x0
#define MCTP_ADDR_NULL 0x00
#define MCTP_ADDR_ANY 0xff
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index 9ca836df19d0..84f722d31fd7 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -94,6 +94,9 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
if (!capable(CAP_NET_RAW))
return -EACCES;
+ if (addr->smctp_network == MCTP_NET_ANY)
+ addr->smctp_network = mctp_default_net(sock_net(sk));
+
rt = mctp_route_lookup(sock_net(sk), addr->smctp_network,
addr->smctp_addr.s_addr);
if (!rt)
diff --git a/net/mctp/device.c b/net/mctp/device.c
index 280679798a2e..4c2f83fc4abc 100644
--- a/net/mctp/device.c
+++ b/net/mctp/device.c
@@ -208,7 +208,7 @@ static struct mctp_dev *mctp_add_dev(struct net_device *dev)
INIT_LIST_HEAD(&mdev->addrs);
- mdev->net = MCTP_INITIAL_DEFAULT_NET;
+ mdev->net = mctp_default_net(dev_net(dev));
/* associate to net_device */
rcu_assign_pointer(dev->mctp_ptr, mdev);
diff --git a/net/mctp/route.c b/net/mctp/route.c
index 77e37ac03b05..b9dd6d5bbf33 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -431,6 +431,19 @@ static struct mctp_route *mctp_route_alloc(void)
return rt;
}
+unsigned int mctp_default_net(struct net *net)
+{
+ return READ_ONCE(net->mctp.default_net);
+}
+
+int mctp_default_net_set(struct net *net, unsigned int index)
+{
+ if (index == 0)
+ return -EINVAL;
+ WRITE_ONCE(net->mctp.default_net, index);
+ return 0;
+}
+
/* tag management */
static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key,
struct mctp_sock *msk)
@@ -1029,6 +1042,7 @@ static int __net_init mctp_routes_net_init(struct net *net)
mutex_init(&ns->bind_lock);
INIT_HLIST_HEAD(&ns->keys);
spin_lock_init(&ns->keys_lock);
+ WARN_ON(mctp_default_net_set(net, MCTP_INITIAL_DEFAULT_NET));
return 0;
}
--
2.30.2
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH RFC net-next v2 15/16] mctp: Allow MCTP on tun devices
2021-07-13 6:20 [PATCH RFC net-next v2 00/16] Add Management Component Transport Protocol support Jeremy Kerr
` (13 preceding siblings ...)
2021-07-13 6:20 ` [PATCH RFC net-next v2 14/16] mctp: Allow per-netns default networks Jeremy Kerr
@ 2021-07-13 6:20 ` Jeremy Kerr
2021-07-13 6:20 ` [PATCH RFC net-next v2 16/16] mctp: Add MCTP overview document Jeremy Kerr
15 siblings, 0 replies; 17+ messages in thread
From: Jeremy Kerr @ 2021-07-13 6:20 UTC (permalink / raw)
To: netdev; +Cc: Matt Johnston, Andrew Jeffery
From: Matt Johnston <matt@codeconstruct.com.au>
Allowing TUN is useful for testing, to route packets to userspace or to
tunnel between machines.
Signed-off-by: Matt Johnston <matt@codeconstruct.com.au>
---
net/mctp/device.c | 7 +++++--
net/mctp/route.c | 13 ++++++++-----
2 files changed, 13 insertions(+), 7 deletions(-)
diff --git a/net/mctp/device.c b/net/mctp/device.c
index 4c2f83fc4abc..06f9fc633598 100644
--- a/net/mctp/device.c
+++ b/net/mctp/device.c
@@ -302,9 +302,12 @@ static int mctp_register(struct net_device *dev)
if (rtnl_dereference(dev->mctp_ptr))
return 0;
- /* only register specific types; MCTP-specific and loopback for now */
- if (dev->type != ARPHRD_MCTP && dev->type != ARPHRD_LOOPBACK)
+ /* only register specific types (inc. NONE for TUN devices) */
+ if (!(dev->type == ARPHRD_MCTP ||
+ dev->type == ARPHRD_LOOPBACK ||
+ dev->type == ARPHRD_NONE)) {
return 0;
+ }
mdev = mctp_add_dev(dev);
if (IS_ERR(mdev))
diff --git a/net/mctp/route.c b/net/mctp/route.c
index b9dd6d5bbf33..5ea236f2afee 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -797,13 +797,18 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
struct net_device *orig_dev)
{
struct net *net = dev_net(dev);
+ struct mctp_dev *mdev;
struct mctp_skb_cb *cb;
struct mctp_route *rt;
struct mctp_hdr *mh;
- /* basic non-data sanity checks */
- if (dev->type != ARPHRD_MCTP)
+ rcu_read_lock();
+ mdev = __mctp_dev_get(dev);
+ rcu_read_unlock();
+ if (!mdev) {
+ /* basic non-data sanity checks */
goto err_drop;
+ }
if (!pskb_may_pull(skb, sizeof(struct mctp_hdr)))
goto err_drop;
@@ -817,9 +822,7 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
goto err_drop;
cb = __mctp_cb(skb);
- rcu_read_lock();
- cb->net = READ_ONCE(__mctp_dev_get(dev)->net);
- rcu_read_unlock();
+ cb->net = READ_ONCE(mdev->net);
rt = mctp_route_lookup(net, cb->net, mh->dest);
if (!rt)
--
2.30.2
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH RFC net-next v2 16/16] mctp: Add MCTP overview document
2021-07-13 6:20 [PATCH RFC net-next v2 00/16] Add Management Component Transport Protocol support Jeremy Kerr
` (14 preceding siblings ...)
2021-07-13 6:20 ` [PATCH RFC net-next v2 15/16] mctp: Allow MCTP on tun devices Jeremy Kerr
@ 2021-07-13 6:20 ` Jeremy Kerr
15 siblings, 0 replies; 17+ messages in thread
From: Jeremy Kerr @ 2021-07-13 6:20 UTC (permalink / raw)
To: netdev; +Cc: Matt Johnston, Andrew Jeffery
This change adds a brief document about the sockets API provided for
sending and receiving MCTP messages from userspace.
This is roughly based on the OpenBMC design document, at:
https://github.com/openbmc/docs/blob/master/designs/mctp/mctp-kernel.md
Signed-off-by: Jeremy Kerr <jk@codeconstruct.com.au>
---
v2:
- Controller -> component
---
Documentation/networking/index.rst | 1 +
Documentation/networking/mctp.rst | 213 +++++++++++++++++++++++++++++
MAINTAINERS | 1 +
3 files changed, 215 insertions(+)
create mode 100644 Documentation/networking/mctp.rst
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index e9ce55992aa9..eea3a79f4ea0 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -68,6 +68,7 @@ Contents:
l2tp
lapb-module
mac80211-injection
+ mctp
mpls-sysctl
mptcp-sysctl
multiqueue
diff --git a/Documentation/networking/mctp.rst b/Documentation/networking/mctp.rst
new file mode 100644
index 000000000000..6100cdc220f6
--- /dev/null
+++ b/Documentation/networking/mctp.rst
@@ -0,0 +1,213 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================================
+Management Component Transport Protocol (MCTP)
+==============================================
+
+net/mctp/ contains protocol support for MCTP, as defined by DMTF standard
+DSP0236. Physical interface drivers ("bindings" in the specification) are
+provided in drivers/net/mctp/.
+
+The core code provides a socket-based interface to send and receive MCTP
+messages, through an AF_MCTP, SOCK_DGRAM socket.
+
+Structure: interfaces & networks
+================================
+
+The kernel models the local MCTP topology through two items: interfaces and
+networks.
+
+An interface (or "link") is an instance of an MCTP physical transport binding
+(as defined by DSP0236, section 3.2.47), likely connected to a specific hardware
+device. This is represented as a ``struct netdevice``.
+
+A network defines a unique address space for MCTP endpoints by endpoint-ID
+(described by DSP0236, section 3.2.31). A network has a user-visible identifier
+to allow references from userspace. Route definitions are specific to one
+network.
+
+Interfaces are associated with one network. A network may be associated with one
+or more interfaces.
+
+If multiple networks are present, each may contain endpoint IDs (EIDs) that are
+also present on other networks.
+
+Sockets API
+===========
+
+Protocol definitions
+--------------------
+
+MCTP uses ``AF_MCTP`` / ``PF_MCTP`` for the address- and protocol- families.
+Since MCTP is message-based, only ``SOCK_DGRAM`` sockets are supported.
+
+.. code-block:: C
+
+ int sd = socket(AF_MCTP, SOCK_DGRAM, 0);
+
+The only (current) value for the ``protocol`` argument is 0.
+
+As with all socket address families, source and destination addresses are
+specified with a ``sockaddr`` type, with a single-byte endpoint address:
+
+.. code-block:: C
+
+ typedef __u8 mctp_eid_t;
+
+ struct mctp_addr {
+ mctp_eid_t s_addr;
+ };
+
+ struct sockaddr_mctp {
+ unsigned short int smctp_family;
+ int smctp_network;
+ struct mctp_addr smctp_addr;
+ __u8 smctp_type;
+ __u8 smctp_tag;
+ };
+
+ #define MCTP_NET_ANY 0x0
+ #define MCTP_ADDR_ANY 0xff
+
+
+Syscall behaviour
+-----------------
+
+The following sections describe the MCTP-specific behaviours of the standard
+socket system calls. These behaviours have been chosen to map closely to the
+existing sockets APIs.
+
+``bind()`` : set local socket address
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Sockets that receive incoming request packets will bind to a local address,
+using the ``bind()`` syscall.
+
+.. code-block:: C
+
+ struct sockaddr_mctp addr;
+
+ addr.smctp_family = AF_MCTP;
+ addr.smctp_network = MCTP_NET_ANY;
+ addr.smctp_addr.s_addr = MCTP_ADDR_ANY;
+ addr.smctp_type = MCTP_TYPE_PLDM;
+ addr.smctp_tag = MCTP_TAG_OWNER;
+
+ int rc = bind(sd, (struct sockaddr *)&addr, sizeof(addr));
+
+This establishes the local address of the socket. Incoming MCTP messages that
+match the network, address, and message type will be received by this socket.
+The reference to 'incoming' is important here; a bound socket will only receive
+messages with the TO bit set, to indicate an incoming request message, rather
+than a response.
+
+The ``smctp_tag`` value will configure the tags accepted from the remote side of
+this socket. Given the above, the only valid value is ``MCTP_TAG_OWNER``, which
+will result in remotely "owned" tags being routed to this socket. Since
+``MCTP_TAG_OWNER`` is set, the 3 least-significant bits of ``smctp_tag`` are not
+used; callers must set them to zero.
+
+A ``smctp_network`` value of ``MCTP_NET_ANY`` will configure the socket to
+receive incoming packets from any locally-connected network. A specific network
+value will cause the socket to only receive incoming messages from that network.
+
+The ``smctp_addr`` field specifies a local address to bind to. A value of
+``MCTP_ADDR_ANY`` configures the socket to receive messages addressed to any
+local destination EID.
+
+The ``smctp_type`` field specifies which message types to receive. Only the
+lower 7 bits of the type is matched on incoming messages (ie., the
+most-significant IC bit is not part of the match). This results in the socket
+receiving packets with and without a message integrity check footer.
+
+``sendto()``, ``sendmsg()``, ``send()`` : transmit an MCTP message
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+An MCTP message is transmitted using one of the ``sendto()``, ``sendmsg()`` or
+``send()`` syscalls. Using ``sendto()`` as the primary example:
+
+.. code-block:: C
+
+ struct sockaddr_mctp addr;
+ char buf[14];
+ ssize_t len;
+
+ /* set message destination */
+ addr.smctp_family = AF_MCTP;
+ addr.smctp_network = 0;
+ addr.smctp_addr.s_addr = 8;
+ addr.smctp_tag = MCTP_TAG_OWNER;
+ addr.smctp_type = MCTP_TYPE_ECHO;
+
+ /* arbitrary message to send, with message-type header */
+ buf[0] = MCTP_TYPE_ECHO;
+ memcpy(buf + 1, "hello, world!", sizeof(buf) - 1);
+
+ len = sendto(sd, buf, sizeof(buf), 0,
+ (struct sockaddr_mctp *)&addr, sizeof(addr));
+
+The network and address fields of ``addr`` define the remote address to send to.
+If ``smctp_tag`` has the ``MCTP_TAG_OWNER``, the kernel will ignore any bits set
+in ``MCTP_TAG_VALUE``, and generate a tag value suitable for the destination
+EID. If ``MCTP_TAG_OWNER`` is not set, the message will be sent with the tag
+value as specified. If a tag value cannot be allocated, the system call will
+report an errno of ``EAGAIN``.
+
+The application must provide the message type byte as the first byte of the
+message buffer passed to ``sendto()``. If a message integrity check is to be
+included in the transmitted message, it must also be provided in the message
+buffer, and the most-significant bit of the message type byte must be 1.
+
+The ``sendmsg()`` system call allows a more compact argument interface, and the
+message buffer to be specified as a scatter-gather list. At present no ancillary
+message types (used for the ``msg_control`` data passed to ``sendmsg()``) are
+defined.
+
+Transmitting a message on an unconnected socket with ``MCTP_TAG_OWNER``
+specified will cause an allocation of a tag, if no valid tag is already
+allocated for that destination. The (destination-eid,tag) tuple acts as an
+implicit local socket address, to allow the socket to receive responses to this
+outgoing message. If any previous allocation has been performed (to for a
+different remote EID), that allocation is lost.
+
+Sockets will only receive responses to requests they have sent (with TO=1) and
+may only respond (with TO=0) to requests they have received.
+
+``recvfrom()``, ``recvmsg()``, ``recv()`` : receive an MCTP message
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+An MCTP message can be received by an application using one of the
+``recvfrom()``, ``recvmsg()``, or ``recv()`` system calls. Using ``recvfrom()``
+as the primary example:
+
+.. code-block:: C
+
+ struct sockaddr_mctp addr;
+ socklen_t addrlen;
+ char buf[14];
+ ssize_t len;
+
+ addrlen = sizeof(addr);
+
+ len = recvfrom(sd, buf, sizeof(buf), 0,
+ (struct sockaddr_mctp *)&addr, &addrlen);
+
+ /* We can expect addr to describe an MCTP address */
+ assert(addrlen >= sizeof(buf));
+ assert(addr.smctp_family == AF_MCTP);
+
+ printf("received %zd bytes from remote EID %d\n", rc, addr.smctp_addr);
+
+The address argument to ``recvfrom`` and ``recvmsg`` is populated with the
+remote address of the incoming message, including tag value (this will be needed
+in order to reply to the message).
+
+The first byte of the message buffer will contain the message type byte. If an
+integrity check follows the message, it will be included in the received buffer.
+
+The ``recv()`` system call behaves in a similar way, but does not provide a
+remote address to the application. Therefore, these are only useful if the
+remote address is already known, or the message does not require a reply.
+
+Like the send calls, sockets will only receive responses to requests they have
+sent (TO=1) and may only respond (TO=0) to requests they have received.
diff --git a/MAINTAINERS b/MAINTAINERS
index 9a591fa2384d..c73c6dd598a0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10874,6 +10874,7 @@ M: Jeremy Kerr <jk@codeconstruct.com.au>
M: Matt Johnston <matt@codeconstruct.com.au>
L: netdev@vger.kernel.org
S: Maintained
+F: Documentation/networking/mctp.rst
F: drivers/net/mctp/
F: include/net/mctp.h
F: include/net/mctpdevice.h
--
2.30.2
^ permalink raw reply related [flat|nested] 17+ messages in thread