All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jakub Kicinski <kuba@kernel.org>
To: davem@davemloft.net
Cc: netdev@vger.kernel.org, anthony.l.nguyen@intel.com,
	Jakub Kicinski <kuba@kernel.org>
Subject: [PATCH net-next v2 01/10] udp_tunnel: add the ability to share port tables
Date: Fri, 25 Sep 2020 17:56:40 -0700	[thread overview]
Message-ID: <20200926005649.3285089-2-kuba@kernel.org> (raw)
In-Reply-To: <20200926005649.3285089-1-kuba@kernel.org>

Unfortunately recent Intel NIC designs share the UDP port table
across netdevs. So far the UDP tunnel port state was maintained
per netdev, we need to extend that to cater to Intel NICs.

Expect NICs to allocate the info structure dynamically and link
to the state from there. All the shared NICs will record port
offload information in the one instance of the table so we need
to make sure that the use count can accommodate larger numbers.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/udp_tunnel.h  | 24 ++++++++++
 net/ipv4/udp_tunnel_nic.c | 96 +++++++++++++++++++++++++++++++++++----
 2 files changed, 110 insertions(+), 10 deletions(-)

diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 94bb7a882250..2ea453dac876 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -200,11 +200,27 @@ enum udp_tunnel_nic_info_flags {
 	UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN	= BIT(3),
 };
 
+struct udp_tunnel_nic;
+
+#define UDP_TUNNEL_NIC_MAX_SHARING_DEVICES	(U16_MAX / 2)
+
+struct udp_tunnel_nic_shared {
+	struct udp_tunnel_nic *udp_tunnel_nic_info;
+
+	struct list_head devices;
+};
+
+struct udp_tunnel_nic_shared_node {
+	struct net_device *dev;
+	struct list_head list;
+};
+
 /**
  * struct udp_tunnel_nic_info - driver UDP tunnel offload information
  * @set_port:	callback for adding a new port
  * @unset_port:	callback for removing a port
  * @sync_table:	callback for syncing the entire port table at once
+ * @shared:	reference to device global state (optional)
  * @flags:	device flags from enum udp_tunnel_nic_info_flags
  * @tables:	UDP port tables this device has
  * @tables.n_entries:		number of entries in this table
@@ -213,6 +229,12 @@ enum udp_tunnel_nic_info_flags {
  * Drivers are expected to provide either @set_port and @unset_port callbacks
  * or the @sync_table callback. Callbacks are invoked with rtnl lock held.
  *
+ * Devices which (misguidedly) share the UDP tunnel port table across multiple
+ * netdevs should allocate an instance of struct udp_tunnel_nic_shared and
+ * point @shared at it.
+ * There must never be more than %UDP_TUNNEL_NIC_MAX_SHARING_DEVICES devices
+ * sharing a table.
+ *
  * Known limitations:
  *  - UDP tunnel port notifications are fundamentally best-effort -
  *    it is likely the driver will both see skbs which use a UDP tunnel port,
@@ -234,6 +256,8 @@ struct udp_tunnel_nic_info {
 	/* all at once */
 	int (*sync_table)(struct net_device *dev, unsigned int table);
 
+	struct udp_tunnel_nic_shared *shared;
+
 	unsigned int flags;
 
 	struct udp_tunnel_nic_table_info {
diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c
index 69962165c0e8..0d122edc368d 100644
--- a/net/ipv4/udp_tunnel_nic.c
+++ b/net/ipv4/udp_tunnel_nic.c
@@ -19,8 +19,9 @@ enum udp_tunnel_nic_table_entry_flags {
 struct udp_tunnel_nic_table_entry {
 	__be16 port;
 	u8 type;
-	u8 use_cnt;
 	u8 flags;
+	u16 use_cnt;
+#define UDP_TUNNEL_NIC_USE_CNT_MAX	U16_MAX
 	u8 hw_priv;
 };
 
@@ -370,6 +371,8 @@ udp_tunnel_nic_entry_adj(struct udp_tunnel_nic *utn,
 	bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL;
 	unsigned int from, to;
 
+	WARN_ON(entry->use_cnt + (u32)use_cnt_adj > U16_MAX);
+
 	/* If not going from used to unused or vice versa - all done.
 	 * For dodgy entries make sure we try to sync again (queue the entry).
 	 */
@@ -675,6 +678,7 @@ static void
 udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn)
 {
 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
+	struct udp_tunnel_nic_shared_node *node;
 	unsigned int i, j;
 
 	/* Freeze all the ports we are already tracking so that the replay
@@ -686,7 +690,12 @@ udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn)
 	utn->missed = 0;
 	utn->need_replay = 0;
 
-	udp_tunnel_get_rx_info(dev);
+	if (!info->shared) {
+		udp_tunnel_get_rx_info(dev);
+	} else {
+		list_for_each_entry(node, &info->shared->devices, list)
+			udp_tunnel_get_rx_info(node->dev);
+	}
 
 	for (i = 0; i < utn->n_tables; i++)
 		for (j = 0; j < info->tables[i].n_entries; j++)
@@ -742,20 +751,39 @@ udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info,
 	return NULL;
 }
 
+static void udp_tunnel_nic_free(struct udp_tunnel_nic *utn)
+{
+	unsigned int i;
+
+	for (i = 0; i < utn->n_tables; i++)
+		kfree(utn->entries[i]);
+	kfree(utn->entries);
+	kfree(utn);
+}
+
 static int udp_tunnel_nic_register(struct net_device *dev)
 {
 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
+	struct udp_tunnel_nic_shared_node *node = NULL;
 	struct udp_tunnel_nic *utn;
 	unsigned int n_tables, i;
 
 	BUILD_BUG_ON(sizeof(utn->missed) * BITS_PER_BYTE <
 		     UDP_TUNNEL_NIC_MAX_TABLES);
+	/* Expect use count of at most 2 (IPv4, IPv6) per device */
+	BUILD_BUG_ON(UDP_TUNNEL_NIC_USE_CNT_MAX <
+		     UDP_TUNNEL_NIC_MAX_SHARING_DEVICES * 2);
 
+	/* Check that the driver info is sane */
 	if (WARN_ON(!info->set_port != !info->unset_port) ||
 	    WARN_ON(!info->set_port == !info->sync_table) ||
 	    WARN_ON(!info->tables[0].n_entries))
 		return -EINVAL;
 
+	if (WARN_ON(info->shared &&
+		    info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY))
+		return -EINVAL;
+
 	n_tables = 1;
 	for (i = 1; i < UDP_TUNNEL_NIC_MAX_TABLES; i++) {
 		if (!info->tables[i].n_entries)
@@ -766,9 +794,33 @@ static int udp_tunnel_nic_register(struct net_device *dev)
 			return -EINVAL;
 	}
 
-	utn = udp_tunnel_nic_alloc(info, n_tables);
-	if (!utn)
-		return -ENOMEM;
+	/* Create UDP tunnel state structures */
+	if (info->shared) {
+		node = kzalloc(sizeof(*node), GFP_KERNEL);
+		if (!node)
+			return -ENOMEM;
+
+		node->dev = dev;
+	}
+
+	if (info->shared && info->shared->udp_tunnel_nic_info) {
+		utn = info->shared->udp_tunnel_nic_info;
+	} else {
+		utn = udp_tunnel_nic_alloc(info, n_tables);
+		if (!utn) {
+			kfree(node);
+			return -ENOMEM;
+		}
+	}
+
+	if (info->shared) {
+		if (!info->shared->udp_tunnel_nic_info) {
+			INIT_LIST_HEAD(&info->shared->devices);
+			info->shared->udp_tunnel_nic_info = utn;
+		}
+
+		list_add_tail(&node->list, &info->shared->devices);
+	}
 
 	utn->dev = dev;
 	dev_hold(dev);
@@ -783,7 +835,33 @@ static int udp_tunnel_nic_register(struct net_device *dev)
 static void
 udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
 {
-	unsigned int i;
+	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
+
+	/* For a shared table remove this dev from the list of sharing devices
+	 * and if there are other devices just detach.
+	 */
+	if (info->shared) {
+		struct udp_tunnel_nic_shared_node *node, *first;
+
+		list_for_each_entry(node, &info->shared->devices, list)
+			if (node->dev == dev)
+				break;
+		if (node->dev != dev)
+			return;
+
+		list_del(&node->list);
+		kfree(node);
+
+		first = list_first_entry_or_null(&info->shared->devices,
+						 typeof(*first), list);
+		if (first) {
+			udp_tunnel_drop_rx_info(dev);
+			utn->dev = first->dev;
+			goto release_dev;
+		}
+
+		info->shared->udp_tunnel_nic_info = NULL;
+	}
 
 	/* Flush before we check work, so we don't waste time adding entries
 	 * from the work which we will boot immediately.
@@ -796,10 +874,8 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
 	if (utn->work_pending)
 		return;
 
-	for (i = 0; i < utn->n_tables; i++)
-		kfree(utn->entries[i]);
-	kfree(utn->entries);
-	kfree(utn);
+	udp_tunnel_nic_free(utn);
+release_dev:
 	dev->udp_tunnel_nic = NULL;
 	dev_put(dev);
 }
-- 
2.26.2


  reply	other threads:[~2020-09-26  0:56 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-26  0:56 [PATCH net-next v2 00/10] udp_tunnel: convert Intel drivers with shared tables Jakub Kicinski
2020-09-26  0:56 ` Jakub Kicinski [this message]
2020-09-26  0:56 ` [PATCH net-next v2 02/10] netdevsim: add warnings on unexpected UDP tunnel port errors Jakub Kicinski
2020-09-26  0:56 ` [PATCH net-next v2 03/10] netdevsim: shared UDP tunnel port table support Jakub Kicinski
2020-09-26  0:56 ` [PATCH net-next v2 04/10] selftests: net: add a test for shared UDP tunnel info tables Jakub Kicinski
2020-09-26  0:56 ` [PATCH net-next v2 05/10] i40e: convert to new udp_tunnel infrastructure Jakub Kicinski
2020-09-26  0:56 ` [PATCH net-next v2 06/10] ice: remove unused args from ice_get_open_tunnel_port() Jakub Kicinski
2020-09-26  0:56 ` [PATCH net-next v2 07/10] ice: convert to new udp_tunnel infrastructure Jakub Kicinski
2020-09-26  0:56 ` [PATCH net-next v2 08/10] netdevsim: support the static IANA VXLAN port flag Jakub Kicinski
2020-09-26  0:56 ` [PATCH net-next v2 09/10] selftests: net: add a test for static UDP tunnel ports Jakub Kicinski
2020-09-26  0:56 ` [PATCH net-next v2 10/10] docs: vxlan: add info about device features Jakub Kicinski
2020-09-28 16:23 ` [PATCH net-next v2 00/10] udp_tunnel: convert Intel drivers with shared tables Nguyen, Anthony L
2020-09-28 19:49   ` David Miller
2020-09-28 19:50 ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200926005649.3285089-2-kuba@kernel.org \
    --to=kuba@kernel.org \
    --cc=anthony.l.nguyen@intel.com \
    --cc=davem@davemloft.net \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.