All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jeremy Kerr <jk@codeconstruct.com.au>
To: netdev@vger.kernel.org
Cc: Matt Johnston <matt@codeconstruct.com.au>,
	Andrew Jeffery <andrew@aj.id.au>
Subject: [PATCH RFC net-next v2 07/16] mctp: Add initial routing framework
Date: Tue, 13 Jul 2021 14:20:14 +0800	[thread overview]
Message-ID: <20210713062023.3286895-8-jk@codeconstruct.com.au> (raw)
In-Reply-To: <20210713062023.3286895-1-jk@codeconstruct.com.au>

Add a simple routing table, and a couple of route output handlers, and
the mctp packet_type & handler.

Includes changes from Matt Johnston <matt@codeconstruct.com.au>.

Signed-off-by: Jeremy Kerr <jk@codeconstruct.com.au>

---
v2:
 - Controller -> component
---
 MAINTAINERS                 |   1 +
 include/net/mctp.h          |  75 +++++++++
 include/net/net_namespace.h |   4 +
 include/net/netns/mctp.h    |  16 ++
 net/mctp/Makefile           |   2 +-
 net/mctp/af_mctp.c          |   7 +
 net/mctp/device.c           |   7 +-
 net/mctp/route.c            | 320 ++++++++++++++++++++++++++++++++++++
 8 files changed, 427 insertions(+), 5 deletions(-)
 create mode 100644 include/net/netns/mctp.h
 create mode 100644 net/mctp/route.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 407b265b026b..9a591fa2384d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10877,6 +10877,7 @@ S:	Maintained
 F:	drivers/net/mctp/
 F:	include/net/mctp.h
 F:	include/net/mctpdevice.h
+F:	include/net/netns/mctp.h
 F:	net/mctp/
 
 MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7
diff --git a/include/net/mctp.h b/include/net/mctp.h
index 61452e03aa85..4094bec5e5db 100644
--- a/include/net/mctp.h
+++ b/include/net/mctp.h
@@ -11,6 +11,7 @@
 
 #include <linux/bits.h>
 #include <linux/mctp.h>
+#include <net/net_namespace.h>
 
 /* MCTP packet definitions */
 struct mctp_hdr {
@@ -33,6 +34,8 @@ struct mctp_hdr {
 #define MCTP_HDR_TAG_SHIFT	0
 #define MCTP_HDR_TAG_MASK	GENMASK(2, 0)
 
+#define MCTP_HEADER_MAXLEN	4
+
 static inline bool mctp_address_ok(mctp_eid_t eid)
 {
 	return eid >= 8 && eid < 255;
@@ -43,6 +46,78 @@ static inline struct mctp_hdr *mctp_hdr(struct sk_buff *skb)
 	return (struct mctp_hdr *)skb_network_header(skb);
 }
 
+struct mctp_skb_cb {
+	unsigned int	magic;
+	unsigned int	net;
+	mctp_eid_t	src;
+};
+
+/* skb control-block accessors with a little extra debugging for initial
+ * development.
+ *
+ * TODO: remove checks & mctp_skb_cb->magic; replace callers of __mctp_cb
+ * with mctp_cb().
+ *
+ * __mctp_cb() is only for the initial ingress code; we should see ->magic set
+ * at all times after this.
+ */
+static inline struct mctp_skb_cb *__mctp_cb(struct sk_buff *skb)
+{
+	struct mctp_skb_cb *cb = (void *)skb->cb;
+
+	cb->magic = 0x4d435450;
+	return cb;
+}
+
+static inline struct mctp_skb_cb *mctp_cb(struct sk_buff *skb)
+{
+	struct mctp_skb_cb *cb = (void *)skb->cb;
+
+	WARN_ON(cb->magic != 0x4d435450);
+	return (void *)(skb->cb);
+}
+
+/* Route definition.
+ *
+ * These are held in the pernet->mctp.routes list, with RCU protection for
+ * removed routes. We hold a reference to the netdev; routes need to be
+ * dropped on NETDEV_UNREGISTER events.
+ *
+ * Updates to the route table are performed under rtnl; all reads under RCU,
+ * so routes cannot be referenced over a RCU grace period. Specifically: A
+ * caller cannot block between mctp_route_lookup and passing the route to
+ * mctp_do_route.
+ */
+struct mctp_route {
+	mctp_eid_t		min, max;
+
+	struct mctp_dev		*dev;
+	unsigned int		mtu;
+	int			(*output)(struct mctp_route *route,
+					  struct sk_buff *skb);
+
+	struct list_head	list;
+	refcount_t		refs;
+	struct rcu_head		rcu;
+};
+
+/* route interfaces */
+struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
+				     mctp_eid_t daddr);
+
+int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb);
+
+int mctp_local_output(struct sock *sk, struct mctp_route *rt,
+		      struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag);
+
+/* routing <--> device interface */
+int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr);
+int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr);
+void mctp_route_remove_dev(struct mctp_dev *mdev);
+
+int mctp_routes_init(void);
+void mctp_routes_exit(void);
+
 void mctp_device_init(void);
 void mctp_device_exit(void);
 
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index fa5887143f0d..71fce3086fc3 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -33,6 +33,7 @@
 #include <net/netns/can.h>
 #include <net/netns/xdp.h>
 #include <net/netns/bpf.h>
+#include <net/netns/mctp.h>
 #include <linux/ns_common.h>
 #include <linux/idr.h>
 #include <linux/skbuff.h>
@@ -166,6 +167,9 @@ struct net {
 #ifdef CONFIG_XDP_SOCKETS
 	struct netns_xdp	xdp;
 #endif
+#if IS_ENABLED(CONFIG_MCTP)
+	struct netns_mctp	mctp;
+#endif
 #if IS_ENABLED(CONFIG_CRYPTO_USER)
 	struct sock		*crypto_nlsk;
 #endif
diff --git a/include/net/netns/mctp.h b/include/net/netns/mctp.h
new file mode 100644
index 000000000000..508459b08a59
--- /dev/null
+++ b/include/net/netns/mctp.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * MCTP per-net structures
+ */
+
+#ifndef __NETNS_MCTP_H__
+#define __NETNS_MCTP_H__
+
+#include <linux/types.h>
+
+struct netns_mctp {
+	/* Only updated under RTNL, entries freed via RCU */
+	struct list_head routes;
+};
+
+#endif /* __NETNS_MCTP_H__ */
diff --git a/net/mctp/Makefile b/net/mctp/Makefile
index 2ea98c27b262..b1a330e9d82a 100644
--- a/net/mctp/Makefile
+++ b/net/mctp/Makefile
@@ -1,3 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_MCTP) += mctp.o
-mctp-objs := af_mctp.o device.o
+mctp-objs := af_mctp.o device.o route.o
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index 401b4fa141a5..8085f5912101 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -157,10 +157,16 @@ static __init int mctp_init(void)
 	if (rc)
 		goto err_unreg_sock;
 
+	rc = mctp_routes_init();
+	if (rc)
+		goto err_unreg_proto;
+
 	mctp_device_init();
 
 	return 0;
 
+err_unreg_proto:
+	proto_unregister(&mctp_proto);
 err_unreg_sock:
 	sock_unregister(PF_MCTP);
 
@@ -170,6 +176,7 @@ static __init int mctp_init(void)
 static __exit void mctp_exit(void)
 {
 	mctp_device_exit();
+	mctp_routes_exit();
 	proto_unregister(&mctp_proto);
 	sock_unregister(PF_MCTP);
 }
diff --git a/net/mctp/device.c b/net/mctp/device.c
index 6ad3367d1209..579f747c89c3 100644
--- a/net/mctp/device.c
+++ b/net/mctp/device.c
@@ -191,6 +191,8 @@ static int mctp_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
 	ifaddr->dev = mdev;
 	list_add(&ifaddr->dev_list, &mdev->addrs);
 
+	mctp_route_add_local(mdev, addr->s_addr);
+
 	return 0;
 }
 
@@ -282,10 +284,7 @@ static void mctp_unregister(struct net_device *dev)
 
 	RCU_INIT_POINTER(mdev->dev->mctp_ptr, NULL);
 
-	/* TODO: drop routes, to the point where we can dev_put(); requires
-	 * coordination with the route layer; each route should hold a dev
-	 * reference. We should only need to synchronize_rcu() once for this.
-	 */
+	mctp_route_remove_dev(mdev);
 	list_for_each_entry_safe(ifa, tmp, &mdev->addrs, dev_list) {
 		list_del_rcu(&ifa->dev_list);
 		kfree_rcu(ifa, rcu);
diff --git a/net/mctp/route.c b/net/mctp/route.c
new file mode 100644
index 000000000000..f0bbc582d6f7
--- /dev/null
+++ b/net/mctp/route.c
@@ -0,0 +1,320 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Management Component Transport Protocol (MCTP) - routing
+ * implementation.
+ *
+ * This is currently based on a simple routing table, with no dst cache. The
+ * number of routes should stay fairly small, so the lookup cost is small.
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#include <linux/idr.h>
+#include <linux/mctp.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+
+#include <uapi/linux/if_arp.h>
+
+#include <net/mctp.h>
+#include <net/mctpdevice.h>
+
+/* route output callbacks */
+static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb)
+{
+	kfree_skb(skb);
+	return 0;
+}
+
+static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
+{
+	/* -> to local stack */
+	/* TODO: socket lookup, reassemble */
+	kfree_skb(skb);
+	return 0;
+}
+
+static int __always_unused mctp_route_output(struct mctp_route *route,
+					     struct sk_buff *skb)
+{
+	unsigned int mtu;
+	int rc;
+
+	skb->protocol = htons(ETH_P_MCTP);
+
+	mtu = READ_ONCE(skb->dev->mtu);
+	if (skb->len > mtu) {
+		kfree_skb(skb);
+		return -EMSGSIZE;
+	}
+
+	/* TODO: daddr (from rt->neigh), saddr (from device?)  */
+	rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
+			     NULL, NULL, skb->len);
+	if (rc) {
+		kfree_skb(skb);
+		return -EHOSTUNREACH;
+	}
+
+	rc = dev_queue_xmit(skb);
+	if (rc)
+		rc = net_xmit_errno(rc);
+
+	return rc;
+}
+
+/* route alloc/release */
+static void mctp_route_release(struct mctp_route *rt)
+{
+	if (refcount_dec_and_test(&rt->refs)) {
+		dev_put(rt->dev->dev);
+		kfree_rcu(rt, rcu);
+	}
+}
+
+/* returns a route with the refcount at 1 */
+static struct mctp_route *mctp_route_alloc(void)
+{
+	struct mctp_route *rt;
+
+	rt = kzalloc(sizeof(*rt), GFP_KERNEL);
+	if (!rt)
+		return NULL;
+
+	INIT_LIST_HEAD(&rt->list);
+	refcount_set(&rt->refs, 1);
+	rt->output = mctp_route_discard;
+
+	return rt;
+}
+
+/* routing lookups */
+static inline bool mctp_rt_match_eid(struct mctp_route *rt,
+				     unsigned int net, mctp_eid_t eid)
+{
+	return READ_ONCE(rt->dev->net) == net &&
+		rt->min <= eid && rt->max >= eid;
+}
+
+/* compares match, used for duplicate prevention */
+static bool mctp_rt_compare_exact(struct mctp_route *rt1,
+				  struct mctp_route *rt2)
+{
+	ASSERT_RTNL();
+	return rt1->dev->net == rt2->dev->net &&
+		rt1->min == rt2->min &&
+		rt1->max == rt2->max;
+}
+
+struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
+				     mctp_eid_t daddr)
+{
+	struct mctp_route *tmp, *rt = NULL;
+
+	list_for_each_entry_rcu(tmp, &net->mctp.routes, list) {
+		/* TODO: add metrics */
+		if (mctp_rt_match_eid(tmp, dnet, daddr)) {
+			if (refcount_inc_not_zero(&tmp->refs)) {
+				rt = tmp;
+				break;
+			}
+		}
+	}
+
+	return rt;
+}
+
+/* sends a skb to rt and releases the route. */
+int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb)
+{
+	int rc;
+
+	rc = rt->output(rt, skb);
+	mctp_route_release(rt);
+	return rc;
+}
+
+int mctp_local_output(struct sock *sk, struct mctp_route *rt,
+		      struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag)
+{
+	struct mctp_skb_cb *cb = mctp_cb(skb);
+	struct mctp_hdr *hdr;
+	mctp_eid_t saddr;
+
+	if (WARN_ON(!rt->dev))
+		return -EINVAL;
+
+	if (list_empty(&rt->dev->addrs))
+		return -EHOSTUNREACH;
+
+	/* use the outbound interface's first address as our source */
+	saddr = list_first_entry(&rt->dev->addrs,
+				 struct mctp_ifaddr, dev_list)->eid;
+
+	/* TODO: we have the route MTU here; packetise */
+
+	skb_reset_transport_header(skb);
+	skb_push(skb, sizeof(struct mctp_hdr));
+	skb_reset_network_header(skb);
+	hdr = mctp_hdr(skb);
+	hdr->ver = 1;
+	hdr->dest = daddr;
+	hdr->src = saddr;
+	hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM; /* TODO */
+
+	skb->protocol = htons(ETH_P_MCTP);
+	skb->priority = 0;
+
+	/* cb->net will have been set on initial ingress */
+	cb->src = saddr;
+
+	return mctp_do_route(rt, skb);
+}
+
+/* route management */
+int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr)
+{
+	struct net *net = dev_net(mdev->dev);
+	struct mctp_route *rt, *ert;
+
+	rt = mctp_route_alloc();
+	if (!rt)
+		return -ENOMEM;
+
+	rt->min = addr;
+	rt->max = addr;
+	rt->dev = mdev;
+	dev_hold(rt->dev->dev);
+	rt->output = mctp_route_input;
+
+	ASSERT_RTNL();
+	/* Prevent duplicate identical routes. */
+	list_for_each_entry(ert, &net->mctp.routes, list) {
+		if (mctp_rt_compare_exact(rt, ert)) {
+			mctp_route_release(rt);
+			return -EEXIST;
+		}
+	}
+
+	list_add_rcu(&rt->list, &net->mctp.routes);
+
+	return 0;
+}
+
+int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr)
+{
+	struct net *net = dev_net(mdev->dev);
+	struct mctp_route *rt, *tmp;
+
+	ASSERT_RTNL();
+
+	list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
+		if (rt->dev == mdev && rt->min == addr && rt->max == addr) {
+			list_del_rcu(&rt->list);
+			/* TODO: immediate RTM_DELROUTE */
+			mctp_route_release(rt);
+		}
+	}
+
+	return 0;
+}
+
+/* removes all entries for a given device */
+void mctp_route_remove_dev(struct mctp_dev *mdev)
+{
+	struct net *net = dev_net(mdev->dev);
+	struct mctp_route *rt, *tmp;
+	ASSERT_RTNL();
+	list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
+		if (rt->dev == mdev) {
+			list_del_rcu(&rt->list);
+			/* TODO: immediate RTM_DELROUTE */
+			mctp_route_release(rt);
+		}
+	}
+}
+
+/* Incoming packet-handling */
+
+static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
+				struct packet_type *pt,
+				struct net_device *orig_dev)
+{
+	struct net *net = dev_net(dev);
+	struct mctp_skb_cb *cb;
+	struct mctp_route *rt;
+	struct mctp_hdr *mh;
+
+	/* basic non-data sanity checks */
+	if (dev->type != ARPHRD_MCTP)
+		goto err_drop;
+
+	if (!pskb_may_pull(skb, sizeof(struct mctp_hdr)))
+		goto err_drop;
+
+	skb_reset_transport_header(skb);
+	skb_reset_network_header(skb);
+
+	/* We have enough for a header; decode and route */
+	mh = mctp_hdr(skb);
+	if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX)
+		goto err_drop;
+
+	cb = __mctp_cb(skb);
+	rcu_read_lock();
+	cb->net = READ_ONCE(__mctp_dev_get(dev)->net);
+	rcu_read_unlock();
+
+	rt = mctp_route_lookup(net, cb->net, mh->dest);
+	if (!rt)
+		goto err_drop;
+
+	mctp_do_route(rt, skb);
+
+	return NET_RX_SUCCESS;
+
+err_drop:
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+
+static struct packet_type mctp_packet_type = {
+	.type = cpu_to_be16(ETH_P_MCTP),
+	.func = mctp_pkttype_receive,
+};
+
+/* net namespace implementation */
+static int __net_init mctp_routes_net_init(struct net *net)
+{
+	struct netns_mctp *ns = &net->mctp;
+
+	INIT_LIST_HEAD(&ns->routes);
+	return 0;
+}
+
+static void __net_exit mctp_routes_net_exit(struct net *net)
+{
+	struct mctp_route *rt;
+
+	list_for_each_entry_rcu(rt, &net->mctp.routes, list)
+		mctp_route_release(rt);
+}
+
+static struct pernet_operations mctp_net_ops = {
+	.init = mctp_routes_net_init,
+	.exit = mctp_routes_net_exit,
+};
+
+int __init mctp_routes_init(void)
+{
+	dev_add_pack(&mctp_packet_type);
+	return register_pernet_subsys(&mctp_net_ops);
+}
+
+void __exit mctp_routes_exit(void)
+{
+	unregister_pernet_subsys(&mctp_net_ops);
+	dev_remove_pack(&mctp_packet_type);
+}
-- 
2.30.2


  parent reply	other threads:[~2021-07-13  6:27 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-13  6:20 [PATCH RFC net-next v2 00/16] Add Management Component Transport Protocol support Jeremy Kerr
2021-07-13  6:20 ` [PATCH RFC net-next v2 01/16] mctp: Add MCTP base Jeremy Kerr
2021-07-13  6:20 ` [PATCH RFC net-next v2 02/16] mctp: Add base socket/protocol definitions Jeremy Kerr
2021-07-13  6:20 ` [PATCH RFC net-next v2 03/16] mctp: Add base packet definitions Jeremy Kerr
2021-07-13  6:20 ` [PATCH RFC net-next v2 04/16] mctp: Add sockaddr_mctp to uapi Jeremy Kerr
2021-07-13  6:20 ` [PATCH RFC net-next v2 05/16] mctp: Add initial driver infrastructure Jeremy Kerr
2021-07-13  6:20 ` [PATCH RFC net-next v2 06/16] mctp: Add device handling and netlink interface Jeremy Kerr
2021-07-13  6:20 ` Jeremy Kerr [this message]
2021-07-13  6:20 ` [PATCH RFC net-next v2 08/16] mctp: Add netlink route management Jeremy Kerr
2021-07-13  6:20 ` [PATCH RFC net-next v2 09/16] mctp: Add neighbour implementation Jeremy Kerr
2021-07-13  6:20 ` [PATCH RFC net-next v2 10/16] mctp: Add neighbour netlink interface Jeremy Kerr
2021-07-13  6:20 ` [PATCH RFC net-next v2 11/16] mctp: Populate socket implementation Jeremy Kerr
2021-07-13  6:20 ` [PATCH RFC net-next v2 12/16] mctp: Add dest neighbour lladdr to route output Jeremy Kerr
2021-07-13  6:20 ` [PATCH RFC net-next v2 13/16] mctp: Implement message fragmentation & reassembly Jeremy Kerr
2021-07-13  6:20 ` [PATCH RFC net-next v2 14/16] mctp: Allow per-netns default networks Jeremy Kerr
2021-07-13  6:20 ` [PATCH RFC net-next v2 15/16] mctp: Allow MCTP on tun devices Jeremy Kerr
2021-07-13  6:20 ` [PATCH RFC net-next v2 16/16] mctp: Add MCTP overview document Jeremy Kerr

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210713062023.3286895-8-jk@codeconstruct.com.au \
    --to=jk@codeconstruct.com.au \
    --cc=andrew@aj.id.au \
    --cc=matt@codeconstruct.com.au \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.