All of lore.kernel.org
 help / color / mirror / Atom feed
* [net-next PATCH 0/3] Symmetric queue selection using XPS for Rx queues
@ 2018-04-20  1:04 Amritha Nambiar
  2018-04-20  1:04 ` [net-next PATCH 1/3] net: Refactor XPS for CPUs and " Amritha Nambiar
                   ` (3 more replies)
  0 siblings, 4 replies; 14+ messages in thread
From: Amritha Nambiar @ 2018-04-20  1:04 UTC (permalink / raw)
  To: netdev, davem
  Cc: alexander.h.duyck, amritha.nambiar, sridhar.samudrala, edumazet,
	hannes, tom

This patch series implements support for Tx queue selection based on
Rx queue map. This is done by configuring Rx queue map per Tx-queue
using sysfs attribute. If the user configuration for Rx queues does
not apply, then the Tx queue selection falls back to XPS using CPUs and
finally to hashing.

XPS is refactored to support Tx queue selection based on either the
CPU map or the Rx-queue map. The config option CONFIG_XPS needs to be
enabled. By default no receive queues are configured for the Tx queue.

- /sys/class/net/eth0/queues/tx-*/xps_rxqs

This is to enable sending packets on the same Tx-Rx queue pair as this
is useful for busy polling multi-threaded workloads where it is not
possible to pin the threads to a CPU. This is a rework of Sridhar's
patch for symmetric queueing via socket option:
https://www.spinics.net/lists/netdev/msg453106.html

---

Amritha Nambiar (3):
      net: Refactor XPS for CPUs and Rx queues
      net: Enable Tx queue selection based on Rx queues
      net-sysfs: Add interface for Rx queue map per Tx queue


 include/linux/netdevice.h |   82 +++++++++++++++
 include/net/sock.h        |   18 +++
 net/core/dev.c            |  240 +++++++++++++++++++++++++++++++--------------
 net/core/net-sysfs.c      |   85 ++++++++++++++++
 net/core/sock.c           |    5 +
 net/ipv4/tcp_input.c      |    7 +
 net/ipv4/tcp_ipv4.c       |    1 
 net/ipv4/tcp_minisocks.c  |    1 
 8 files changed, 357 insertions(+), 82 deletions(-)

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [net-next PATCH 1/3] net: Refactor XPS for CPUs and Rx queues
  2018-04-20  1:04 [net-next PATCH 0/3] Symmetric queue selection using XPS for Rx queues Amritha Nambiar
@ 2018-04-20  1:04 ` Amritha Nambiar
  2018-05-09 20:31   ` Tom Herbert
  2018-04-20  1:04 ` [net-next PATCH 2/3] net: Enable Tx queue selection based on " Amritha Nambiar
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 14+ messages in thread
From: Amritha Nambiar @ 2018-04-20  1:04 UTC (permalink / raw)
  To: netdev, davem
  Cc: alexander.h.duyck, amritha.nambiar, sridhar.samudrala, edumazet,
	hannes, tom

Refactor XPS code to support Tx queue selection based on
CPU map or Rx queue map.

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
---
 include/linux/netdevice.h |   82 +++++++++++++++++-
 net/core/dev.c            |  206 +++++++++++++++++++++++++++++----------------
 net/core/net-sysfs.c      |    4 -
 3 files changed, 216 insertions(+), 76 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 14e0777..40a9171 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -730,10 +730,21 @@ struct xps_map {
  */
 struct xps_dev_maps {
 	struct rcu_head rcu;
-	struct xps_map __rcu *cpu_map[0];
+	struct xps_map __rcu *attr_map[0];
 };
-#define XPS_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +		\
+
+#define XPS_CPU_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +	\
 	(nr_cpu_ids * (_tcs) * sizeof(struct xps_map *)))
+
+#define XPS_RXQ_DEV_MAPS_SIZE(_tcs, _rxqs) (sizeof(struct xps_dev_maps) +\
+	(_rxqs * (_tcs) * sizeof(struct xps_map *)))
+
+enum xps_map_type {
+	XPS_MAP_RXQS,
+	XPS_MAP_CPUS,
+	__XPS_MAP_MAX
+};
+
 #endif /* CONFIG_XPS */
 
 #define TC_MAX_QUEUE	16
@@ -1867,7 +1878,7 @@ struct net_device {
 	int			watchdog_timeo;
 
 #ifdef CONFIG_XPS
-	struct xps_dev_maps __rcu *xps_maps;
+	struct xps_dev_maps __rcu *xps_maps[__XPS_MAP_MAX];
 #endif
 #ifdef CONFIG_NET_CLS_ACT
 	struct mini_Qdisc __rcu	*miniq_egress;
@@ -3204,6 +3215,71 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 #ifdef CONFIG_XPS
 int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 			u16 index);
+int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
+			  u16 index, enum xps_map_type type);
+
+static inline bool attr_test_mask(unsigned long j, const unsigned long *mask,
+				  unsigned int nr_bits)
+{
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+	WARN_ON_ONCE(j >= nr_bits);
+#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
+	return test_bit(j, mask);
+}
+
+static inline bool attr_test_online(unsigned long j,
+				    const unsigned long *online_mask,
+				    unsigned int nr_bits)
+{
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+	WARN_ON_ONCE(j >= nr_bits);
+#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
+
+	if (online_mask)
+		return test_bit(j, online_mask);
+
+	if (j >= 0 && j < nr_bits)
+		return true;
+
+	return false;
+}
+
+static inline unsigned int attrmask_next(int n, const unsigned long *srcp,
+					 unsigned int nr_bits)
+{
+	/* -1 is a legal arg here. */
+	if (n != -1) {
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+		WARN_ON_ONCE(n >= nr_bits);
+#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
+	}
+
+	if (srcp)
+		return find_next_bit(srcp, nr_bits, n + 1);
+
+	return n + 1;
+}
+
+static inline int attrmask_next_and(int n, const unsigned long *src1p,
+				    const unsigned long *src2p,
+				    unsigned int nr_bits)
+{
+	/* -1 is a legal arg here. */
+	if (n != -1) {
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+		WARN_ON_ONCE(n >= nr_bits);
+#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
+	}
+
+	if (src1p && src2p)
+		return find_next_and_bit(src1p, src2p, nr_bits, n + 1);
+	else if (src1p)
+		return find_next_bit(src1p, nr_bits, n + 1);
+	else if (src2p)
+		return find_next_bit(src2p, nr_bits, n + 1);
+
+	return n + 1;
+}
 #else
 static inline int netif_set_xps_queue(struct net_device *dev,
 				      const struct cpumask *mask,
diff --git a/net/core/dev.c b/net/core/dev.c
index a490ef6..17c4883 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2092,7 +2092,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
 	int pos;
 
 	if (dev_maps)
-		map = xmap_dereference(dev_maps->cpu_map[tci]);
+		map = xmap_dereference(dev_maps->attr_map[tci]);
 	if (!map)
 		return false;
 
@@ -2105,7 +2105,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
 			break;
 		}
 
-		RCU_INIT_POINTER(dev_maps->cpu_map[tci], NULL);
+		RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
 		kfree_rcu(map, rcu);
 		return false;
 	}
@@ -2138,30 +2138,47 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
 static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
 				   u16 count)
 {
+	const unsigned long *possible_mask = NULL;
+	enum xps_map_type type = XPS_MAP_RXQS;
 	struct xps_dev_maps *dev_maps;
-	int cpu, i;
 	bool active = false;
+	unsigned int nr_ids;
+	int i, j;
 
 	mutex_lock(&xps_map_mutex);
-	dev_maps = xmap_dereference(dev->xps_maps);
 
-	if (!dev_maps)
-		goto out_no_maps;
+	while (type < __XPS_MAP_MAX) {
+		dev_maps = xmap_dereference(dev->xps_maps[type]);
+		if (!dev_maps)
+			goto out_no_maps;
+
+		if (type == XPS_MAP_CPUS) {
+			if (num_possible_cpus() > 1)
+				possible_mask = cpumask_bits(cpu_possible_mask);
+			nr_ids = nr_cpu_ids;
+		} else if (type == XPS_MAP_RXQS) {
+			nr_ids = dev->num_rx_queues;
+		}
 
-	for_each_possible_cpu(cpu)
-		active |= remove_xps_queue_cpu(dev, dev_maps, cpu,
-					       offset, count);
+		for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
+		     j < nr_ids;)
+			active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
+						       count);
+		if (!active) {
+			RCU_INIT_POINTER(dev->xps_maps[type], NULL);
+			kfree_rcu(dev_maps, rcu);
+		}
 
-	if (!active) {
-		RCU_INIT_POINTER(dev->xps_maps, NULL);
-		kfree_rcu(dev_maps, rcu);
+		if (type == XPS_MAP_CPUS) {
+			for (i = offset + (count - 1); count--; i--)
+				netdev_queue_numa_node_write(
+					netdev_get_tx_queue(dev, i),
+							    NUMA_NO_NODE);
+		}
+out_no_maps:
+		type++;
 	}
 
-	for (i = offset + (count - 1); count--; i--)
-		netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
-					     NUMA_NO_NODE);
-
-out_no_maps:
 	mutex_unlock(&xps_map_mutex);
 }
 
@@ -2170,11 +2187,11 @@ static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
 	netif_reset_xps_queues(dev, index, dev->num_tx_queues - index);
 }
 
-static struct xps_map *expand_xps_map(struct xps_map *map,
-				      int cpu, u16 index)
+static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index,
+				      u16 index, enum xps_map_type type)
 {
-	struct xps_map *new_map;
 	int alloc_len = XPS_MIN_MAP_ALLOC;
+	struct xps_map *new_map = NULL;
 	int i, pos;
 
 	for (pos = 0; map && pos < map->len; pos++) {
@@ -2183,7 +2200,7 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
 		return map;
 	}
 
-	/* Need to add queue to this CPU's existing map */
+	/* Need to add tx-queue to this CPU's/rx-queue's existing map */
 	if (map) {
 		if (pos < map->alloc_len)
 			return map;
@@ -2191,9 +2208,14 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
 		alloc_len = map->alloc_len * 2;
 	}
 
-	/* Need to allocate new map to store queue on this CPU's map */
-	new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
-			       cpu_to_node(cpu));
+	/* Need to allocate new map to store tx-queue on this CPU's/rx-queue's
+	 *  map
+	 */
+	if (type == XPS_MAP_RXQS)
+		new_map = kzalloc(XPS_MAP_SIZE(alloc_len), GFP_KERNEL);
+	else if (type == XPS_MAP_CPUS)
+		new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
+				       cpu_to_node(attr_index));
 	if (!new_map)
 		return NULL;
 
@@ -2205,14 +2227,16 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
 	return new_map;
 }
 
-int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
-			u16 index)
+int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
+			  u16 index, enum xps_map_type type)
 {
+	const unsigned long *online_mask = NULL, *possible_mask = NULL;
 	struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
-	int i, cpu, tci, numa_node_id = -2;
+	int i, j, tci, numa_node_id = -2;
 	int maps_sz, num_tc = 1, tc = 0;
 	struct xps_map *map, *new_map;
 	bool active = false;
+	unsigned int nr_ids;
 
 	if (dev->num_tc) {
 		num_tc = dev->num_tc;
@@ -2221,16 +2245,33 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 			return -EINVAL;
 	}
 
-	maps_sz = XPS_DEV_MAPS_SIZE(num_tc);
+	switch (type) {
+	case XPS_MAP_RXQS:
+		maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
+		dev_maps = xmap_dereference(dev->xps_maps[XPS_MAP_RXQS]);
+		nr_ids = dev->num_rx_queues;
+		break;
+	case XPS_MAP_CPUS:
+		maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc);
+		if (num_possible_cpus() > 1) {
+			online_mask = cpumask_bits(cpu_online_mask);
+			possible_mask = cpumask_bits(cpu_possible_mask);
+		}
+		dev_maps = xmap_dereference(dev->xps_maps[XPS_MAP_CPUS]);
+		nr_ids = nr_cpu_ids;
+		break;
+	default:
+		return -EINVAL;
+	}
+
 	if (maps_sz < L1_CACHE_BYTES)
 		maps_sz = L1_CACHE_BYTES;
 
 	mutex_lock(&xps_map_mutex);
 
-	dev_maps = xmap_dereference(dev->xps_maps);
-
 	/* allocate memory for queue storage */
-	for_each_cpu_and(cpu, cpu_online_mask, mask) {
+	for (j = -1; j = attrmask_next_and(j, online_mask, mask, nr_ids),
+	     j < nr_ids;) {
 		if (!new_dev_maps)
 			new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
 		if (!new_dev_maps) {
@@ -2238,73 +2279,81 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 			return -ENOMEM;
 		}
 
-		tci = cpu * num_tc + tc;
-		map = dev_maps ? xmap_dereference(dev_maps->cpu_map[tci]) :
+		tci = j * num_tc + tc;
+		map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) :
 				 NULL;
 
-		map = expand_xps_map(map, cpu, index);
+		map = expand_xps_map(map, j, index, type);
 		if (!map)
 			goto error;
 
-		RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+		RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
 	}
 
 	if (!new_dev_maps)
 		goto out_no_new_maps;
 
-	for_each_possible_cpu(cpu) {
+	for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
+	     j < nr_ids;) {
 		/* copy maps belonging to foreign traffic classes */
-		for (i = tc, tci = cpu * num_tc; dev_maps && i--; tci++) {
+		for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) {
 			/* fill in the new device map from the old device map */
-			map = xmap_dereference(dev_maps->cpu_map[tci]);
-			RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+			map = xmap_dereference(dev_maps->attr_map[tci]);
+			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
 		}
 
 		/* We need to explicitly update tci as prevous loop
 		 * could break out early if dev_maps is NULL.
 		 */
-		tci = cpu * num_tc + tc;
+		tci = j * num_tc + tc;
 
-		if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
-			/* add queue to CPU maps */
+		if (attr_test_mask(j, mask, nr_ids) &&
+		    attr_test_online(j, online_mask, nr_ids)) {
+			/* add tx-queue to CPU/rx-queue maps */
 			int pos = 0;
 
-			map = xmap_dereference(new_dev_maps->cpu_map[tci]);
+			map = xmap_dereference(new_dev_maps->attr_map[tci]);
 			while ((pos < map->len) && (map->queues[pos] != index))
 				pos++;
 
 			if (pos == map->len)
 				map->queues[map->len++] = index;
 #ifdef CONFIG_NUMA
-			if (numa_node_id == -2)
-				numa_node_id = cpu_to_node(cpu);
-			else if (numa_node_id != cpu_to_node(cpu))
-				numa_node_id = -1;
+			if (type == XPS_MAP_CPUS) {
+				if (numa_node_id == -2)
+					numa_node_id = cpu_to_node(j);
+				else if (numa_node_id != cpu_to_node(j))
+					numa_node_id = -1;
+			}
 #endif
 		} else if (dev_maps) {
 			/* fill in the new device map from the old device map */
-			map = xmap_dereference(dev_maps->cpu_map[tci]);
-			RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+			map = xmap_dereference(dev_maps->attr_map[tci]);
+			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
 		}
 
 		/* copy maps belonging to foreign traffic classes */
 		for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
 			/* fill in the new device map from the old device map */
-			map = xmap_dereference(dev_maps->cpu_map[tci]);
-			RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+			map = xmap_dereference(dev_maps->attr_map[tci]);
+			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
 		}
 	}
 
-	rcu_assign_pointer(dev->xps_maps, new_dev_maps);
+	if (type == XPS_MAP_RXQS)
+		rcu_assign_pointer(dev->xps_maps[XPS_MAP_RXQS], new_dev_maps);
+	else if (type == XPS_MAP_CPUS)
+		rcu_assign_pointer(dev->xps_maps[XPS_MAP_CPUS], new_dev_maps);
 
 	/* Cleanup old maps */
 	if (!dev_maps)
 		goto out_no_old_maps;
 
-	for_each_possible_cpu(cpu) {
-		for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
-			new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
-			map = xmap_dereference(dev_maps->cpu_map[tci]);
+	for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
+	     j < nr_ids;) {
+		for (i = num_tc, tci = j * num_tc; i--; tci++) {
+			new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
+			map = xmap_dereference(dev_maps->attr_map[tci]);
 			if (map && map != new_map)
 				kfree_rcu(map, rcu);
 		}
@@ -2317,19 +2366,23 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 	active = true;
 
 out_no_new_maps:
-	/* update Tx queue numa node */
-	netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
-				     (numa_node_id >= 0) ? numa_node_id :
-				     NUMA_NO_NODE);
+	if (type == XPS_MAP_CPUS) {
+		/* update Tx queue numa node */
+		netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
+					     (numa_node_id >= 0) ?
+					     numa_node_id : NUMA_NO_NODE);
+	}
 
 	if (!dev_maps)
 		goto out_no_maps;
 
-	/* removes queue from unused CPUs */
-	for_each_possible_cpu(cpu) {
-		for (i = tc, tci = cpu * num_tc; i--; tci++)
+	/* removes tx-queue from unused CPUs/rx-queues */
+	for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
+	     j < nr_ids;) {
+		for (i = tc, tci = j * num_tc; i--; tci++)
 			active |= remove_xps_queue(dev_maps, tci, index);
-		if (!cpumask_test_cpu(cpu, mask) || !cpu_online(cpu))
+		if (!attr_test_mask(j, mask, nr_ids) ||
+		    !attr_test_online(j, online_mask, nr_ids))
 			active |= remove_xps_queue(dev_maps, tci, index);
 		for (i = num_tc - tc, tci++; --i; tci++)
 			active |= remove_xps_queue(dev_maps, tci, index);
@@ -2337,7 +2390,10 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 
 	/* free map if not active */
 	if (!active) {
-		RCU_INIT_POINTER(dev->xps_maps, NULL);
+		if (type == XPS_MAP_RXQS)
+			RCU_INIT_POINTER(dev->xps_maps[XPS_MAP_RXQS], NULL);
+		else if (type == XPS_MAP_CPUS)
+			RCU_INIT_POINTER(dev->xps_maps[XPS_MAP_CPUS], NULL);
 		kfree_rcu(dev_maps, rcu);
 	}
 
@@ -2347,11 +2403,12 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 	return 0;
 error:
 	/* remove any maps that we added */
-	for_each_possible_cpu(cpu) {
-		for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
-			new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
+	for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
+	     j < nr_ids;) {
+		for (i = num_tc, tci = j * num_tc; i--; tci++) {
+			new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
 			map = dev_maps ?
-			      xmap_dereference(dev_maps->cpu_map[tci]) :
+			      xmap_dereference(dev_maps->attr_map[tci]) :
 			      NULL;
 			if (new_map && new_map != map)
 				kfree(new_map);
@@ -2363,6 +2420,13 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 	kfree(new_dev_maps);
 	return -ENOMEM;
 }
+
+int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
+			u16 index)
+{
+	return __netif_set_xps_queue(dev, cpumask_bits(mask), index,
+				     XPS_MAP_CPUS);
+}
 EXPORT_SYMBOL(netif_set_xps_queue);
 
 #endif
@@ -3400,7 +3464,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 	int queue_index = -1;
 
 	rcu_read_lock();
-	dev_maps = rcu_dereference(dev->xps_maps);
+	dev_maps = rcu_dereference(dev->xps_maps[XPS_MAP_CPUS]);
 	if (dev_maps) {
 		unsigned int tci = skb->sender_cpu - 1;
 
@@ -3409,7 +3473,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 			tci += netdev_get_prio_tc_map(dev, skb->priority);
 		}
 
-		map = rcu_dereference(dev_maps->cpu_map[tci]);
+		map = rcu_dereference(dev_maps->attr_map[tci]);
 		if (map) {
 			if (map->len == 1)
 				queue_index = map->queues[0];
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index c476f07..d7abd33 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1227,13 +1227,13 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
 	}
 
 	rcu_read_lock();
-	dev_maps = rcu_dereference(dev->xps_maps);
+	dev_maps = rcu_dereference(dev->xps_maps[XPS_MAP_CPUS]);
 	if (dev_maps) {
 		for_each_possible_cpu(cpu) {
 			int i, tci = cpu * num_tc + tc;
 			struct xps_map *map;
 
-			map = rcu_dereference(dev_maps->cpu_map[tci]);
+			map = rcu_dereference(dev_maps->attr_map[tci]);
 			if (!map)
 				continue;
 

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [net-next PATCH 2/3] net: Enable Tx queue selection based on Rx queues
  2018-04-20  1:04 [net-next PATCH 0/3] Symmetric queue selection using XPS for Rx queues Amritha Nambiar
  2018-04-20  1:04 ` [net-next PATCH 1/3] net: Refactor XPS for CPUs and " Amritha Nambiar
@ 2018-04-20  1:04 ` Amritha Nambiar
  2018-04-20  1:04 ` [net-next PATCH 3/3] net-sysfs: Add interface for Rx queue map per Tx queue Amritha Nambiar
  2018-04-20  2:41 ` [net-next PATCH 0/3] Symmetric queue selection using XPS for Rx queues Eric Dumazet
  3 siblings, 0 replies; 14+ messages in thread
From: Amritha Nambiar @ 2018-04-20  1:04 UTC (permalink / raw)
  To: netdev, davem
  Cc: alexander.h.duyck, amritha.nambiar, sridhar.samudrala, edumazet,
	hannes, tom

This patch adds support to pick Tx queue based on the Rx queue map
configuration set by the admin through the sysfs attribute
for each Tx queue. If the user configuration for receive
queue map does not apply, then the Tx queue selection falls back
to CPU map based selection and finally to hashing.

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
---
 include/net/sock.h       |   18 ++++++++++++++++++
 net/core/dev.c           |   36 +++++++++++++++++++++++++++++-------
 net/core/sock.c          |    5 +++++
 net/ipv4/tcp_input.c     |    7 +++++++
 net/ipv4/tcp_ipv4.c      |    1 +
 net/ipv4/tcp_minisocks.c |    1 +
 6 files changed, 61 insertions(+), 7 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 74d725f..f10b2a2 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -139,6 +139,8 @@ typedef __u64 __bitwise __addrpair;
  *	@skc_node: main hash linkage for various protocol lookup tables
  *	@skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol
  *	@skc_tx_queue_mapping: tx queue number for this connection
+ *	@skc_rx_queue_mapping: rx queue number for this connection
+ *	@skc_rx_ifindex: rx ifindex for this connection
  *	@skc_flags: place holder for sk_flags
  *		%SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
  *		%SO_OOBINLINE settings, %SO_TIMESTAMPING settings
@@ -215,6 +217,10 @@ struct sock_common {
 		struct hlist_nulls_node skc_nulls_node;
 	};
 	int			skc_tx_queue_mapping;
+#ifdef CONFIG_XPS
+	int			skc_rx_queue_mapping;
+	int			skc_rx_ifindex;
+#endif
 	union {
 		int		skc_incoming_cpu;
 		u32		skc_rcv_wnd;
@@ -326,6 +332,10 @@ struct sock {
 #define sk_nulls_node		__sk_common.skc_nulls_node
 #define sk_refcnt		__sk_common.skc_refcnt
 #define sk_tx_queue_mapping	__sk_common.skc_tx_queue_mapping
+#ifdef CONFIG_XPS
+#define sk_rx_queue_mapping	__sk_common.skc_rx_queue_mapping
+#define sk_rx_ifindex		__sk_common.skc_rx_ifindex
+#endif
 
 #define sk_dontcopy_begin	__sk_common.skc_dontcopy_begin
 #define sk_dontcopy_end		__sk_common.skc_dontcopy_end
@@ -1691,6 +1701,14 @@ static inline int sk_tx_queue_get(const struct sock *sk)
 	return sk ? sk->sk_tx_queue_mapping : -1;
 }
 
+static inline void sk_mark_rx_queue(struct sock *sk, struct sk_buff *skb)
+{
+#ifdef CONFIG_XPS
+	sk->sk_rx_ifindex = skb->skb_iif;
+	sk->sk_rx_queue_mapping = skb_get_rx_queue(skb);
+#endif
+}
+
 static inline void sk_set_socket(struct sock *sk, struct socket *sock)
 {
 	sk_tx_queue_clear(sk);
diff --git a/net/core/dev.c b/net/core/dev.c
index 17c4883..cf24d47 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3456,18 +3456,14 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
 }
 #endif /* CONFIG_NET_EGRESS */
 
-static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
-{
 #ifdef CONFIG_XPS
-	struct xps_dev_maps *dev_maps;
+static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
+			       struct xps_dev_maps *dev_maps, unsigned int tci)
+{
 	struct xps_map *map;
 	int queue_index = -1;
 
-	rcu_read_lock();
-	dev_maps = rcu_dereference(dev->xps_maps[XPS_MAP_CPUS]);
 	if (dev_maps) {
-		unsigned int tci = skb->sender_cpu - 1;
-
 		if (dev->num_tc) {
 			tci *= dev->num_tc;
 			tci += netdev_get_prio_tc_map(dev, skb->priority);
@@ -3484,6 +3480,32 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 				queue_index = -1;
 		}
 	}
+	return queue_index;
+}
+#endif
+
+static int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+{
+#ifdef CONFIG_XPS
+	enum xps_map_type i = XPS_MAP_RXQS;
+	struct xps_dev_maps *dev_maps;
+	struct sock *sk = skb->sk;
+	int queue_index = -1;
+	unsigned int tci = 0;
+
+	if (sk && sk->sk_rx_queue_mapping <= dev->real_num_rx_queues &&
+	    dev->ifindex == sk->sk_rx_ifindex)
+		tci = sk->sk_rx_queue_mapping;
+
+	rcu_read_lock();
+	while (queue_index < 0 && i < __XPS_MAP_MAX) {
+		if (i == XPS_MAP_CPUS)
+			tci = skb->sender_cpu - 1;
+		dev_maps = rcu_dereference(dev->xps_maps[i]);
+		queue_index = __get_xps_queue_idx(dev, skb, dev_maps, tci);
+		i++;
+	}
+
 	rcu_read_unlock();
 
 	return queue_index;
diff --git a/net/core/sock.c b/net/core/sock.c
index b2c3db1..f7a4b46 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2820,6 +2820,11 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_pacing_rate = ~0U;
 	sk->sk_pacing_shift = 10;
 	sk->sk_incoming_cpu = -1;
+
+#ifdef CONFIG_XPS
+	sk->sk_rx_ifindex = -1;
+	sk->sk_rx_queue_mapping = -1;
+#endif
 	/*
 	 * Before updating sk_refcnt, we must commit prior changes to memory
 	 * (Documentation/RCU/rculist_nulls.txt for details)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0396fb9..157f401 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -78,6 +78,7 @@
 #include <linux/errqueue.h>
 #include <trace/events/tcp.h>
 #include <linux/static_key.h>
+#include <net/busy_poll.h>
 
 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
 
@@ -5535,6 +5536,11 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
 		__tcp_fast_path_on(tp, tp->snd_wnd);
 	else
 		tp->pred_flags = 0;
+
+	if (skb) {
+		sk_mark_napi_id(sk, skb);
+		sk_mark_rx_queue(sk, skb);
+	}
 }
 
 static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
@@ -6347,6 +6353,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	tcp_rsk(req)->snt_isn = isn;
 	tcp_rsk(req)->txhash = net_tx_rndhash();
 	tcp_openreq_init_rwin(req, sk, dst);
+	sk_mark_rx_queue(req_to_sk(req), skb);
 	if (!want_cookie) {
 		tcp_reqsk_record_syn(sk, req, skb);
 		fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f70586b..132d9af 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1467,6 +1467,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 
 		sock_rps_save_rxhash(sk, skb);
 		sk_mark_napi_id(sk, skb);
+		sk_mark_rx_queue(sk, skb);
 		if (dst) {
 			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
 			    !dst->ops->check(dst, 0)) {
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 57b5468..c18d6f2 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -835,6 +835,7 @@ int tcp_child_process(struct sock *parent, struct sock *child,
 
 	/* record NAPI ID of child */
 	sk_mark_napi_id(child, skb);
+	sk_mark_rx_queue(child, skb);
 
 	tcp_segs_in(tcp_sk(child), skb);
 	if (!sock_owned_by_user(child)) {

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [net-next PATCH 3/3] net-sysfs: Add interface for Rx queue map per Tx queue
  2018-04-20  1:04 [net-next PATCH 0/3] Symmetric queue selection using XPS for Rx queues Amritha Nambiar
  2018-04-20  1:04 ` [net-next PATCH 1/3] net: Refactor XPS for CPUs and " Amritha Nambiar
  2018-04-20  1:04 ` [net-next PATCH 2/3] net: Enable Tx queue selection based on " Amritha Nambiar
@ 2018-04-20  1:04 ` Amritha Nambiar
  2018-04-20  2:41 ` [net-next PATCH 0/3] Symmetric queue selection using XPS for Rx queues Eric Dumazet
  3 siblings, 0 replies; 14+ messages in thread
From: Amritha Nambiar @ 2018-04-20  1:04 UTC (permalink / raw)
  To: netdev, davem
  Cc: alexander.h.duyck, amritha.nambiar, sridhar.samudrala, edumazet,
	hannes, tom

Extend transmit queue sysfs attribute to configure Rx queue map
per Tx queue. By default no receive queues are configured for the
Tx queue.

- /sys/class/net/eth0/queues/tx-*/xps_rxqs

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
---
 net/core/net-sysfs.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index d7abd33..0654243 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1283,6 +1283,86 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue,
 
 static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init
 	= __ATTR_RW(xps_cpus);
+
+static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
+{
+	struct net_device *dev = queue->dev;
+	struct xps_dev_maps *dev_maps;
+	unsigned long *mask, index;
+	int j, len, num_tc = 1, tc = 0;
+
+	mask = kcalloc(BITS_TO_LONGS(dev->num_rx_queues), sizeof(long),
+		       GFP_KERNEL);
+	if (!mask)
+		return -ENOMEM;
+
+	index = get_netdev_queue_index(queue);
+
+	if (dev->num_tc) {
+		num_tc = dev->num_tc;
+		tc = netdev_txq_to_tc(dev, index);
+		if (tc < 0)
+			return -EINVAL;
+	}
+
+	rcu_read_lock();
+	dev_maps = rcu_dereference(dev->xps_maps[XPS_MAP_RXQS]);
+	if (dev_maps) {
+		for (j = -1; j = attrmask_next(j, NULL, dev->num_rx_queues),
+		     j < dev->num_rx_queues;) {
+			int i, tci = j * num_tc + tc;
+			struct xps_map *map;
+
+			map = rcu_dereference(dev_maps->attr_map[tci]);
+			if (!map)
+				continue;
+
+			for (i = map->len; i--;) {
+				if (map->queues[i] == index) {
+					set_bit(j, mask);
+					break;
+				}
+			}
+		}
+	}
+
+	len = bitmap_print_to_pagebuf(false, buf, mask, dev->num_rx_queues);
+	rcu_read_unlock();
+	kfree(mask);
+
+	return len < PAGE_SIZE ? len : -EINVAL;
+}
+
+static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
+			      size_t len)
+{
+	struct net_device *dev = queue->dev;
+	unsigned long *mask, index;
+	int err;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	mask = kcalloc(BITS_TO_LONGS(dev->num_rx_queues), sizeof(long),
+		       GFP_KERNEL);
+	if (!mask)
+		return -ENOMEM;
+
+	index = get_netdev_queue_index(queue);
+
+	err = bitmap_parse(buf, len, mask, dev->num_rx_queues);
+	if (err) {
+		kfree(mask);
+		return err;
+	}
+
+	err = __netif_set_xps_queue(dev, mask, index, XPS_MAP_RXQS);
+	kfree(mask);
+	return err ? : len;
+}
+
+static struct netdev_queue_attribute xps_rxqs_attribute __ro_after_init
+	= __ATTR_RW(xps_rxqs);
 #endif /* CONFIG_XPS */
 
 static struct attribute *netdev_queue_default_attrs[] __ro_after_init = {
@@ -1290,6 +1370,7 @@ static struct attribute *netdev_queue_default_attrs[] __ro_after_init = {
 	&queue_traffic_class.attr,
 #ifdef CONFIG_XPS
 	&xps_cpus_attribute.attr,
+	&xps_rxqs_attribute.attr,
 	&queue_tx_maxrate.attr,
 #endif
 	NULL

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [net-next PATCH 0/3] Symmetric queue selection using XPS for Rx queues
  2018-04-20  1:04 [net-next PATCH 0/3] Symmetric queue selection using XPS for Rx queues Amritha Nambiar
                   ` (2 preceding siblings ...)
  2018-04-20  1:04 ` [net-next PATCH 3/3] net-sysfs: Add interface for Rx queue map per Tx queue Amritha Nambiar
@ 2018-04-20  2:41 ` Eric Dumazet
  2018-05-08 15:15   ` Tom Herbert
  3 siblings, 1 reply; 14+ messages in thread
From: Eric Dumazet @ 2018-04-20  2:41 UTC (permalink / raw)
  To: amritha.nambiar
  Cc: netdev, David Miller, Alexander Duyck, Samudrala, Sridhar,
	Hannes Frederic Sowa, Tom Herbert

On Thu, Apr 19, 2018 at 6:07 PM Amritha Nambiar <amritha.nambiar@intel.com>
wrote:

> This patch series implements support for Tx queue selection based on
> Rx queue map. This is done by configuring Rx queue map per Tx-queue
> using sysfs attribute. If the user configuration for Rx queues does
> not apply, then the Tx queue selection falls back to XPS using CPUs and
> finally to hashing.

> XPS is refactored to support Tx queue selection based on either the
> CPU map or the Rx-queue map. The config option CONFIG_XPS needs to be
> enabled. By default no receive queues are configured for the Tx queue.

> - /sys/class/net/eth0/queues/tx-*/xps_rxqs

> This is to enable sending packets on the same Tx-Rx queue pair as this
> is useful for busy polling multi-threaded workloads where it is not
> possible to pin the threads to a CPU. This is a rework of Sridhar's
> patch for symmetric queueing via socket option:
> https://www.spinics.net/lists/netdev/msg453106.html

> ---

> Amritha Nambiar (3):
>        net: Refactor XPS for CPUs and Rx queues
>        net: Enable Tx queue selection based on Rx queues
>        net-sysfs: Add interface for Rx queue map per Tx queue


>   include/linux/netdevice.h |   82 +++++++++++++++
>   include/net/sock.h        |   18 +++
>   net/core/dev.c            |  240
+++++++++++++++++++++++++++++++--------------
>   net/core/net-sysfs.c      |   85 ++++++++++++++++
>   net/core/sock.c           |    5 +
>   net/ipv4/tcp_input.c      |    7 +
>   net/ipv4/tcp_ipv4.c       |    1
>   net/ipv4/tcp_minisocks.c  |    1
>   8 files changed, 357 insertions(+), 82 deletions(-)


Without a clear documentation (for example in
Documentation/networking/scaling.txt)
, I really do not understand what problem you want to solve, and why we
need ~300 additional LOC in kernel.

Referring to an old thread (
https://www.spinics.net/lists/netdev/msg453106.html ) is not the way to go.

Sorry :/

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [net-next PATCH 0/3] Symmetric queue selection using XPS for Rx queues
  2018-04-20  2:41 ` [net-next PATCH 0/3] Symmetric queue selection using XPS for Rx queues Eric Dumazet
@ 2018-05-08 15:15   ` Tom Herbert
  2018-05-08 16:02     ` Alexander Duyck
  0 siblings, 1 reply; 14+ messages in thread
From: Tom Herbert @ 2018-05-08 15:15 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: amritha.nambiar, netdev, David Miller, Alexander Duyck,
	Samudrala, Sridhar, Hannes Frederic Sowa

On Thu, Apr 19, 2018 at 7:41 PM, Eric Dumazet <edumazet@google.com> wrote:
> On Thu, Apr 19, 2018 at 6:07 PM Amritha Nambiar <amritha.nambiar@intel.com>
> wrote:
>
>> This patch series implements support for Tx queue selection based on
>> Rx queue map. This is done by configuring Rx queue map per Tx-queue
>> using sysfs attribute. If the user configuration for Rx queues does
>> not apply, then the Tx queue selection falls back to XPS using CPUs and
>> finally to hashing.
>
>> XPS is refactored to support Tx queue selection based on either the
>> CPU map or the Rx-queue map. The config option CONFIG_XPS needs to be
>> enabled. By default no receive queues are configured for the Tx queue.
>
>> - /sys/class/net/eth0/queues/tx-*/xps_rxqs
>
>> This is to enable sending packets on the same Tx-Rx queue pair as this
>> is useful for busy polling multi-threaded workloads where it is not
>> possible to pin the threads to a CPU. This is a rework of Sridhar's
>> patch for symmetric queueing via socket option:
>> https://www.spinics.net/lists/netdev/msg453106.html
>
I suspect this is an artifact of flow director which I believe
required queue pairs to be able to work (i.e. receive queue chose
hardware is determined send queue). But that was only required because
of hardware design, I don't see the rationale for introducing queue
pairs in the software stack. There's no need to correlate the transmit
path with receive path, no need to enforce a 1-1 mapping between RX
and TX queues, and the OOO mitigations should be sufficient when TX
queue changes for a flow.

Tom

>> ---
>
>> Amritha Nambiar (3):
>>        net: Refactor XPS for CPUs and Rx queues
>>        net: Enable Tx queue selection based on Rx queues
>>        net-sysfs: Add interface for Rx queue map per Tx queue
>
>
>>   include/linux/netdevice.h |   82 +++++++++++++++
>>   include/net/sock.h        |   18 +++
>>   net/core/dev.c            |  240
> +++++++++++++++++++++++++++++++--------------
>>   net/core/net-sysfs.c      |   85 ++++++++++++++++
>>   net/core/sock.c           |    5 +
>>   net/ipv4/tcp_input.c      |    7 +
>>   net/ipv4/tcp_ipv4.c       |    1
>>   net/ipv4/tcp_minisocks.c  |    1
>>   8 files changed, 357 insertions(+), 82 deletions(-)
>
>
> Without a clear documentation (for example in
> Documentation/networking/scaling.txt)
> , I really do not understand what problem you want to solve, and why we
> need ~300 additional LOC in kernel.
>
> Referring to an old thread (
> https://www.spinics.net/lists/netdev/msg453106.html ) is not the way to go.
>
> Sorry :/

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [net-next PATCH 0/3] Symmetric queue selection using XPS for Rx queues
  2018-05-08 15:15   ` Tom Herbert
@ 2018-05-08 16:02     ` Alexander Duyck
  2018-05-08 17:07       ` Eric Dumazet
  0 siblings, 1 reply; 14+ messages in thread
From: Alexander Duyck @ 2018-05-08 16:02 UTC (permalink / raw)
  To: Tom Herbert
  Cc: Eric Dumazet, Amritha Nambiar, netdev, David Miller,
	Alexander Duyck, Samudrala, Sridhar, Hannes Frederic Sowa

On Tue, May 8, 2018 at 8:15 AM, Tom Herbert <tom@herbertland.com> wrote:
> On Thu, Apr 19, 2018 at 7:41 PM, Eric Dumazet <edumazet@google.com> wrote:
>> On Thu, Apr 19, 2018 at 6:07 PM Amritha Nambiar <amritha.nambiar@intel.com>
>> wrote:
>>
>>> This patch series implements support for Tx queue selection based on
>>> Rx queue map. This is done by configuring Rx queue map per Tx-queue
>>> using sysfs attribute. If the user configuration for Rx queues does
>>> not apply, then the Tx queue selection falls back to XPS using CPUs and
>>> finally to hashing.
>>
>>> XPS is refactored to support Tx queue selection based on either the
>>> CPU map or the Rx-queue map. The config option CONFIG_XPS needs to be
>>> enabled. By default no receive queues are configured for the Tx queue.
>>
>>> - /sys/class/net/eth0/queues/tx-*/xps_rxqs
>>
>>> This is to enable sending packets on the same Tx-Rx queue pair as this
>>> is useful for busy polling multi-threaded workloads where it is not
>>> possible to pin the threads to a CPU. This is a rework of Sridhar's
>>> patch for symmetric queueing via socket option:
>>> https://www.spinics.net/lists/netdev/msg453106.html
>>
> I suspect this is an artifact of flow director which I believe
> required queue pairs to be able to work (i.e. receive queue chose
> hardware is determined send queue). But that was only required because
> of hardware design, I don't see the rationale for introducing queue
> pairs in the software stack. There's no need to correlate the transmit
> path with receive path, no need to enforce a 1-1 mapping between RX
> and TX queues, and the OOO mitigations should be sufficient when TX
> queue changes for a flow.
>
> Tom

If I am not mistaken I think there are benefits to doing this sort of
thing with polling as it keeps the Tx work locked into the same queue
pair that a given application is polling on. So as a result you can
keep the interrupts contained to the queue pair that is being busy
polled on and if the application cleans up the packets during the busy
poll it ends up being a net savings in terms of both latency and power
since the Tx clean-up happens sooner, and it happens on the queue that
is already busy polling instead of possibly triggering an interrupt on
another CPU.

So for example in the case of routing and bridging workloads we
already had code that would take the Rx queue and associate it to a Tx
queue. One of the ideas behind doing this is to try and keep the CPU
overhead low by having a 1:1 mapping. In the case of this code we
allow for a little more flexibility in that you could have
many-to-many mappings but the general idea and common use case is the
same which is a 1:1 mapping.

- Alex

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [net-next PATCH 0/3] Symmetric queue selection using XPS for Rx queues
  2018-05-08 16:02     ` Alexander Duyck
@ 2018-05-08 17:07       ` Eric Dumazet
  2018-05-08 17:19         ` Alexander Duyck
  0 siblings, 1 reply; 14+ messages in thread
From: Eric Dumazet @ 2018-05-08 17:07 UTC (permalink / raw)
  To: Alexander Duyck, Tom Herbert
  Cc: Eric Dumazet, Amritha Nambiar, netdev, David Miller,
	Alexander Duyck, Samudrala, Sridhar, Hannes Frederic Sowa



On 05/08/2018 09:02 AM, Alexander Duyck wrote:
> On Tue, May 8, 2018 at 8:15 AM, Tom Herbert <tom@herbertland.com> wrote:
>> On Thu, Apr 19, 2018 at 7:41 PM, Eric Dumazet <edumazet@google.com> wrote:
>>> On Thu, Apr 19, 2018 at 6:07 PM Amritha Nambiar <amritha.nambiar@intel.com>
>>> wrote:
>>>
>>>> This patch series implements support for Tx queue selection based on
>>>> Rx queue map. This is done by configuring Rx queue map per Tx-queue
>>>> using sysfs attribute. If the user configuration for Rx queues does
>>>> not apply, then the Tx queue selection falls back to XPS using CPUs and
>>>> finally to hashing.
>>>
>>>> XPS is refactored to support Tx queue selection based on either the
>>>> CPU map or the Rx-queue map. The config option CONFIG_XPS needs to be
>>>> enabled. By default no receive queues are configured for the Tx queue.
>>>
>>>> - /sys/class/net/eth0/queues/tx-*/xps_rxqs
>>>
>>>> This is to enable sending packets on the same Tx-Rx queue pair as this
>>>> is useful for busy polling multi-threaded workloads where it is not
>>>> possible to pin the threads to a CPU. This is a rework of Sridhar's
>>>> patch for symmetric queueing via socket option:
>>>> https://www.spinics.net/lists/netdev/msg453106.html
>>>
>> I suspect this is an artifact of flow director which I believe
>> required queue pairs to be able to work (i.e. receive queue chose
>> hardware is determined send queue). But that was only required because
>> of hardware design, I don't see the rationale for introducing queue
>> pairs in the software stack. There's no need to correlate the transmit
>> path with receive path, no need to enforce a 1-1 mapping between RX
>> and TX queues, and the OOO mitigations should be sufficient when TX
>> queue changes for a flow.
>>
>> Tom
> 
> If I am not mistaken I think there are benefits to doing this sort of
> thing with polling as it keeps the Tx work locked into the same queue
> pair that a given application is polling on. So as a result you can
> keep the interrupts contained to the queue pair that is being busy
> polled on and if the application cleans up the packets during the busy
> poll it ends up being a net savings in terms of both latency and power
> since the Tx clean-up happens sooner, and it happens on the queue that
> is already busy polling instead of possibly triggering an interrupt on
> another CPU.
> 
> So for example in the case of routing and bridging workloads we
> already had code that would take the Rx queue and associate it to a Tx
> queue. One of the ideas behind doing this is to try and keep the CPU
> overhead low by having a 1:1 mapping. In the case of this code we
> allow for a little more flexibility in that you could have
> many-to-many mappings but the general idea and common use case is the
> same which is a 1:1 mapping.


I thought we had everything in place to be able to have this already.

Setting IRQ affinities and XPS is certainly something doable.

This is why I wanted a proper documentation of yet another way to reach the
same behavior.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [net-next PATCH 0/3] Symmetric queue selection using XPS for Rx queues
  2018-05-08 17:07       ` Eric Dumazet
@ 2018-05-08 17:19         ` Alexander Duyck
  2018-05-08 17:56           ` Nambiar, Amritha
  0 siblings, 1 reply; 14+ messages in thread
From: Alexander Duyck @ 2018-05-08 17:19 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Tom Herbert, Eric Dumazet, Amritha Nambiar, netdev, David Miller,
	Alexander Duyck, Samudrala, Sridhar, Hannes Frederic Sowa

On Tue, May 8, 2018 at 10:07 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
>
> On 05/08/2018 09:02 AM, Alexander Duyck wrote:
>> On Tue, May 8, 2018 at 8:15 AM, Tom Herbert <tom@herbertland.com> wrote:
>>> On Thu, Apr 19, 2018 at 7:41 PM, Eric Dumazet <edumazet@google.com> wrote:
>>>> On Thu, Apr 19, 2018 at 6:07 PM Amritha Nambiar <amritha.nambiar@intel.com>
>>>> wrote:
>>>>
>>>>> This patch series implements support for Tx queue selection based on
>>>>> Rx queue map. This is done by configuring Rx queue map per Tx-queue
>>>>> using sysfs attribute. If the user configuration for Rx queues does
>>>>> not apply, then the Tx queue selection falls back to XPS using CPUs and
>>>>> finally to hashing.
>>>>
>>>>> XPS is refactored to support Tx queue selection based on either the
>>>>> CPU map or the Rx-queue map. The config option CONFIG_XPS needs to be
>>>>> enabled. By default no receive queues are configured for the Tx queue.
>>>>
>>>>> - /sys/class/net/eth0/queues/tx-*/xps_rxqs
>>>>
>>>>> This is to enable sending packets on the same Tx-Rx queue pair as this
>>>>> is useful for busy polling multi-threaded workloads where it is not
>>>>> possible to pin the threads to a CPU. This is a rework of Sridhar's
>>>>> patch for symmetric queueing via socket option:
>>>>> https://www.spinics.net/lists/netdev/msg453106.html
>>>>
>>> I suspect this is an artifact of flow director which I believe
>>> required queue pairs to be able to work (i.e. receive queue chose
>>> hardware is determined send queue). But that was only required because
>>> of hardware design, I don't see the rationale for introducing queue
>>> pairs in the software stack. There's no need to correlate the transmit
>>> path with receive path, no need to enforce a 1-1 mapping between RX
>>> and TX queues, and the OOO mitigations should be sufficient when TX
>>> queue changes for a flow.
>>>
>>> Tom
>>
>> If I am not mistaken I think there are benefits to doing this sort of
>> thing with polling as it keeps the Tx work locked into the same queue
>> pair that a given application is polling on. So as a result you can
>> keep the interrupts contained to the queue pair that is being busy
>> polled on and if the application cleans up the packets during the busy
>> poll it ends up being a net savings in terms of both latency and power
>> since the Tx clean-up happens sooner, and it happens on the queue that
>> is already busy polling instead of possibly triggering an interrupt on
>> another CPU.
>>
>> So for example in the case of routing and bridging workloads we
>> already had code that would take the Rx queue and associate it to a Tx
>> queue. One of the ideas behind doing this is to try and keep the CPU
>> overhead low by having a 1:1 mapping. In the case of this code we
>> allow for a little more flexibility in that you could have
>> many-to-many mappings but the general idea and common use case is the
>> same which is a 1:1 mapping.
>
>
> I thought we had everything in place to be able to have this already.
>
> Setting IRQ affinities and XPS is certainly something doable.
>
> This is why I wanted a proper documentation of yet another way to reach the
> same behavior.

IRQ affinities and XPS work for pure NAPI setups, but the problem is
you have to also do application affinity in the case of busy polling
which can provide some additional challenges since then you have to
add code in your application to associate a given queue/CPU to a given
application thread. I believe this is a way of simplifying this.

I agree on the documentation aspect. The usage of this should be well
documented as well as the why of using it.

- Alex

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [net-next PATCH 0/3] Symmetric queue selection using XPS for Rx queues
  2018-05-08 17:19         ` Alexander Duyck
@ 2018-05-08 17:56           ` Nambiar, Amritha
  0 siblings, 0 replies; 14+ messages in thread
From: Nambiar, Amritha @ 2018-05-08 17:56 UTC (permalink / raw)
  To: Alexander Duyck, Eric Dumazet
  Cc: Tom Herbert, Eric Dumazet, netdev, David Miller, Alexander Duyck,
	Samudrala, Sridhar, Hannes Frederic Sowa

On 5/8/2018 10:19 AM, Alexander Duyck wrote:
> On Tue, May 8, 2018 at 10:07 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>>
>>
>> On 05/08/2018 09:02 AM, Alexander Duyck wrote:
>>> On Tue, May 8, 2018 at 8:15 AM, Tom Herbert <tom@herbertland.com> wrote:
>>>> On Thu, Apr 19, 2018 at 7:41 PM, Eric Dumazet <edumazet@google.com> wrote:
>>>>> On Thu, Apr 19, 2018 at 6:07 PM Amritha Nambiar <amritha.nambiar@intel.com>
>>>>> wrote:
>>>>>
>>>>>> This patch series implements support for Tx queue selection based on
>>>>>> Rx queue map. This is done by configuring Rx queue map per Tx-queue
>>>>>> using sysfs attribute. If the user configuration for Rx queues does
>>>>>> not apply, then the Tx queue selection falls back to XPS using CPUs and
>>>>>> finally to hashing.
>>>>>
>>>>>> XPS is refactored to support Tx queue selection based on either the
>>>>>> CPU map or the Rx-queue map. The config option CONFIG_XPS needs to be
>>>>>> enabled. By default no receive queues are configured for the Tx queue.
>>>>>
>>>>>> - /sys/class/net/eth0/queues/tx-*/xps_rxqs
>>>>>
>>>>>> This is to enable sending packets on the same Tx-Rx queue pair as this
>>>>>> is useful for busy polling multi-threaded workloads where it is not
>>>>>> possible to pin the threads to a CPU. This is a rework of Sridhar's
>>>>>> patch for symmetric queueing via socket option:
>>>>>> https://www.spinics.net/lists/netdev/msg453106.html
>>>>>
>>>> I suspect this is an artifact of flow director which I believe
>>>> required queue pairs to be able to work (i.e. receive queue chose
>>>> hardware is determined send queue). But that was only required because
>>>> of hardware design, I don't see the rationale for introducing queue
>>>> pairs in the software stack. There's no need to correlate the transmit
>>>> path with receive path, no need to enforce a 1-1 mapping between RX
>>>> and TX queues, and the OOO mitigations should be sufficient when TX
>>>> queue changes for a flow.
>>>>
>>>> Tom
>>>
>>> If I am not mistaken I think there are benefits to doing this sort of
>>> thing with polling as it keeps the Tx work locked into the same queue
>>> pair that a given application is polling on. So as a result you can
>>> keep the interrupts contained to the queue pair that is being busy
>>> polled on and if the application cleans up the packets during the busy
>>> poll it ends up being a net savings in terms of both latency and power
>>> since the Tx clean-up happens sooner, and it happens on the queue that
>>> is already busy polling instead of possibly triggering an interrupt on
>>> another CPU.
>>>
>>> So for example in the case of routing and bridging workloads we
>>> already had code that would take the Rx queue and associate it to a Tx
>>> queue. One of the ideas behind doing this is to try and keep the CPU
>>> overhead low by having a 1:1 mapping. In the case of this code we
>>> allow for a little more flexibility in that you could have
>>> many-to-many mappings but the general idea and common use case is the
>>> same which is a 1:1 mapping.
>>
>>
>> I thought we had everything in place to be able to have this already.
>>
>> Setting IRQ affinities and XPS is certainly something doable.
>>
>> This is why I wanted a proper documentation of yet another way to reach the
>> same behavior.
> 
> IRQ affinities and XPS work for pure NAPI setups, but the problem is
> you have to also do application affinity in the case of busy polling
> which can provide some additional challenges since then you have to
> add code in your application to associate a given queue/CPU to a given
> application thread. I believe this is a way of simplifying this.
> 
> I agree on the documentation aspect. The usage of this should be well
> documented as well as the why of using it.
> 
> - Alex
> 
I'll submit another version of the series with the documentation added.

- Amritha

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [net-next PATCH 1/3] net: Refactor XPS for CPUs and Rx queues
  2018-04-20  1:04 ` [net-next PATCH 1/3] net: Refactor XPS for CPUs and " Amritha Nambiar
@ 2018-05-09 20:31   ` Tom Herbert
  2018-05-09 20:54     ` Nambiar, Amritha
  0 siblings, 1 reply; 14+ messages in thread
From: Tom Herbert @ 2018-05-09 20:31 UTC (permalink / raw)
  To: Amritha Nambiar
  Cc: Linux Kernel Network Developers, David S. Miller,
	Alexander Duyck, Sridhar Samudrala, Eric Dumazet,
	Hannes Frederic Sowa

On Thu, Apr 19, 2018 at 6:04 PM, Amritha Nambiar
<amritha.nambiar@intel.com> wrote:
> Refactor XPS code to support Tx queue selection based on
> CPU map or Rx queue map.
>
> Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
> ---
>  include/linux/netdevice.h |   82 +++++++++++++++++-
>  net/core/dev.c            |  206 +++++++++++++++++++++++++++++----------------
>  net/core/net-sysfs.c      |    4 -
>  3 files changed, 216 insertions(+), 76 deletions(-)
>
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 14e0777..40a9171 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -730,10 +730,21 @@ struct xps_map {
>   */
>  struct xps_dev_maps {
>         struct rcu_head rcu;
> -       struct xps_map __rcu *cpu_map[0];
> +       struct xps_map __rcu *attr_map[0];

This seems unnecessarily complicated to me. Why not just add another
map called something like "rxq2txq_map". Then when selecting TXQ just
check the new map first and then the normal cpu_map if there's not a
hit.

>  };
> -#define XPS_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +         \
> +
> +#define XPS_CPU_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +     \
>         (nr_cpu_ids * (_tcs) * sizeof(struct xps_map *)))
> +
> +#define XPS_RXQ_DEV_MAPS_SIZE(_tcs, _rxqs) (sizeof(struct xps_dev_maps) +\
> +       (_rxqs * (_tcs) * sizeof(struct xps_map *)))
> +
> +enum xps_map_type {
> +       XPS_MAP_RXQS,
> +       XPS_MAP_CPUS,
> +       __XPS_MAP_MAX
> +};
> +
>  #endif /* CONFIG_XPS */
>
>  #define TC_MAX_QUEUE   16
> @@ -1867,7 +1878,7 @@ struct net_device {
>         int                     watchdog_timeo;
>
>  #ifdef CONFIG_XPS
> -       struct xps_dev_maps __rcu *xps_maps;
> +       struct xps_dev_maps __rcu *xps_maps[__XPS_MAP_MAX];
>  #endif
>  #ifdef CONFIG_NET_CLS_ACT
>         struct mini_Qdisc __rcu *miniq_egress;
> @@ -3204,6 +3215,71 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
>  #ifdef CONFIG_XPS
>  int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>                         u16 index);
> +int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
> +                         u16 index, enum xps_map_type type);
> +
> +static inline bool attr_test_mask(unsigned long j, const unsigned long *mask,
> +                                 unsigned int nr_bits)
> +{
> +#ifdef CONFIG_DEBUG_PER_CPU_MAPS
> +       WARN_ON_ONCE(j >= nr_bits);
> +#endif /* CONFIG_DEBUG_PER_CPU_MAPS */

This #ifdef block appears 3 times in the patch. Seems like it should
be replace by simple macro.

> +       return test_bit(j, mask);
> +}
> +
> +static inline bool attr_test_online(unsigned long j,
> +                                   const unsigned long *online_mask,
> +                                   unsigned int nr_bits)
> +{
> +#ifdef CONFIG_DEBUG_PER_CPU_MAPS
> +       WARN_ON_ONCE(j >= nr_bits);
> +#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
> +
> +       if (online_mask)
> +               return test_bit(j, online_mask);
> +
> +       if (j >= 0 && j < nr_bits)
> +               return true;
> +
> +       return false;
> +}
> +
> +static inline unsigned int attrmask_next(int n, const unsigned long *srcp,
> +                                        unsigned int nr_bits)
> +{
> +       /* -1 is a legal arg here. */
> +       if (n != -1) {
> +#ifdef CONFIG_DEBUG_PER_CPU_MAPS
> +               WARN_ON_ONCE(n >= nr_bits);
> +#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
> +       }
> +
> +       if (srcp)
> +               return find_next_bit(srcp, nr_bits, n + 1);
> +
> +       return n + 1;
> +}
> +
> +static inline int attrmask_next_and(int n, const unsigned long *src1p,
> +                                   const unsigned long *src2p,
> +                                   unsigned int nr_bits)
> +{
> +       /* -1 is a legal arg here. */
> +       if (n != -1) {
> +#ifdef CONFIG_DEBUG_PER_CPU_MAPS
> +               WARN_ON_ONCE(n >= nr_bits);
> +#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
> +       }
> +
> +       if (src1p && src2p)
> +               return find_next_and_bit(src1p, src2p, nr_bits, n + 1);
> +       else if (src1p)
> +               return find_next_bit(src1p, nr_bits, n + 1);
> +       else if (src2p)
> +               return find_next_bit(src2p, nr_bits, n + 1);
> +
> +       return n + 1;
> +}
>  #else
>  static inline int netif_set_xps_queue(struct net_device *dev,
>                                       const struct cpumask *mask,
> diff --git a/net/core/dev.c b/net/core/dev.c
> index a490ef6..17c4883 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -2092,7 +2092,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
>         int pos;
>
>         if (dev_maps)
> -               map = xmap_dereference(dev_maps->cpu_map[tci]);
> +               map = xmap_dereference(dev_maps->attr_map[tci]);
>         if (!map)
>                 return false;
>
> @@ -2105,7 +2105,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
>                         break;
>                 }
>
> -               RCU_INIT_POINTER(dev_maps->cpu_map[tci], NULL);
> +               RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
>                 kfree_rcu(map, rcu);
>                 return false;
>         }
> @@ -2138,30 +2138,47 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
>  static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
>                                    u16 count)
>  {
> +       const unsigned long *possible_mask = NULL;
> +       enum xps_map_type type = XPS_MAP_RXQS;
>         struct xps_dev_maps *dev_maps;
> -       int cpu, i;
>         bool active = false;
> +       unsigned int nr_ids;
> +       int i, j;
>
>         mutex_lock(&xps_map_mutex);
> -       dev_maps = xmap_dereference(dev->xps_maps);
>
> -       if (!dev_maps)
> -               goto out_no_maps;
> +       while (type < __XPS_MAP_MAX) {
> +               dev_maps = xmap_dereference(dev->xps_maps[type]);
> +               if (!dev_maps)
> +                       goto out_no_maps;
> +
> +               if (type == XPS_MAP_CPUS) {
> +                       if (num_possible_cpus() > 1)
> +                               possible_mask = cpumask_bits(cpu_possible_mask);
> +                       nr_ids = nr_cpu_ids;
> +               } else if (type == XPS_MAP_RXQS) {
> +                       nr_ids = dev->num_rx_queues;
> +               }
>
> -       for_each_possible_cpu(cpu)
> -               active |= remove_xps_queue_cpu(dev, dev_maps, cpu,
> -                                              offset, count);
> +               for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
> +                    j < nr_ids;)
> +                       active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
> +                                                      count);
> +               if (!active) {
> +                       RCU_INIT_POINTER(dev->xps_maps[type], NULL);
> +                       kfree_rcu(dev_maps, rcu);
> +               }
>
> -       if (!active) {
> -               RCU_INIT_POINTER(dev->xps_maps, NULL);
> -               kfree_rcu(dev_maps, rcu);
> +               if (type == XPS_MAP_CPUS) {
> +                       for (i = offset + (count - 1); count--; i--)
> +                               netdev_queue_numa_node_write(
> +                                       netdev_get_tx_queue(dev, i),
> +                                                           NUMA_NO_NODE);
> +               }
> +out_no_maps:
> +               type++;
>         }
>
> -       for (i = offset + (count - 1); count--; i--)
> -               netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
> -                                            NUMA_NO_NODE);
> -
> -out_no_maps:
>         mutex_unlock(&xps_map_mutex);
>  }
>
> @@ -2170,11 +2187,11 @@ static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
>         netif_reset_xps_queues(dev, index, dev->num_tx_queues - index);
>  }
>
> -static struct xps_map *expand_xps_map(struct xps_map *map,
> -                                     int cpu, u16 index)
> +static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index,
> +                                     u16 index, enum xps_map_type type)
>  {
> -       struct xps_map *new_map;
>         int alloc_len = XPS_MIN_MAP_ALLOC;
> +       struct xps_map *new_map = NULL;
>         int i, pos;
>
>         for (pos = 0; map && pos < map->len; pos++) {
> @@ -2183,7 +2200,7 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
>                 return map;
>         }
>
> -       /* Need to add queue to this CPU's existing map */
> +       /* Need to add tx-queue to this CPU's/rx-queue's existing map */
>         if (map) {
>                 if (pos < map->alloc_len)
>                         return map;
> @@ -2191,9 +2208,14 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
>                 alloc_len = map->alloc_len * 2;
>         }
>
> -       /* Need to allocate new map to store queue on this CPU's map */
> -       new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
> -                              cpu_to_node(cpu));
> +       /* Need to allocate new map to store tx-queue on this CPU's/rx-queue's
> +        *  map
> +        */
> +       if (type == XPS_MAP_RXQS)
> +               new_map = kzalloc(XPS_MAP_SIZE(alloc_len), GFP_KERNEL);
> +       else if (type == XPS_MAP_CPUS)
> +               new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
> +                                      cpu_to_node(attr_index));
>         if (!new_map)
>                 return NULL;
>
> @@ -2205,14 +2227,16 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
>         return new_map;
>  }
>
> -int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
> -                       u16 index)
> +int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
> +                         u16 index, enum xps_map_type type)
>  {
> +       const unsigned long *online_mask = NULL, *possible_mask = NULL;
>         struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
> -       int i, cpu, tci, numa_node_id = -2;
> +       int i, j, tci, numa_node_id = -2;
>         int maps_sz, num_tc = 1, tc = 0;
>         struct xps_map *map, *new_map;
>         bool active = false;
> +       unsigned int nr_ids;
>
>         if (dev->num_tc) {
>                 num_tc = dev->num_tc;
> @@ -2221,16 +2245,33 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>                         return -EINVAL;
>         }
>
> -       maps_sz = XPS_DEV_MAPS_SIZE(num_tc);
> +       switch (type) {
> +       case XPS_MAP_RXQS:
> +               maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
> +               dev_maps = xmap_dereference(dev->xps_maps[XPS_MAP_RXQS]);
> +               nr_ids = dev->num_rx_queues;
> +               break;
> +       case XPS_MAP_CPUS:
> +               maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc);
> +               if (num_possible_cpus() > 1) {
> +                       online_mask = cpumask_bits(cpu_online_mask);
> +                       possible_mask = cpumask_bits(cpu_possible_mask);
> +               }
> +               dev_maps = xmap_dereference(dev->xps_maps[XPS_MAP_CPUS]);
> +               nr_ids = nr_cpu_ids;
> +               break;
> +       default:
> +               return -EINVAL;
> +       }
> +
>         if (maps_sz < L1_CACHE_BYTES)
>                 maps_sz = L1_CACHE_BYTES;
>
>         mutex_lock(&xps_map_mutex);
>
> -       dev_maps = xmap_dereference(dev->xps_maps);
> -
>         /* allocate memory for queue storage */
> -       for_each_cpu_and(cpu, cpu_online_mask, mask) {
> +       for (j = -1; j = attrmask_next_and(j, online_mask, mask, nr_ids),
> +            j < nr_ids;) {
>                 if (!new_dev_maps)
>                         new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
>                 if (!new_dev_maps) {
> @@ -2238,73 +2279,81 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>                         return -ENOMEM;
>                 }
>
> -               tci = cpu * num_tc + tc;
> -               map = dev_maps ? xmap_dereference(dev_maps->cpu_map[tci]) :
> +               tci = j * num_tc + tc;
> +               map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) :
>                                  NULL;
>
> -               map = expand_xps_map(map, cpu, index);
> +               map = expand_xps_map(map, j, index, type);
>                 if (!map)
>                         goto error;
>
> -               RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
> +               RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
>         }
>
>         if (!new_dev_maps)
>                 goto out_no_new_maps;
>
> -       for_each_possible_cpu(cpu) {
> +       for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
> +            j < nr_ids;) {
>                 /* copy maps belonging to foreign traffic classes */
> -               for (i = tc, tci = cpu * num_tc; dev_maps && i--; tci++) {
> +               for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) {
>                         /* fill in the new device map from the old device map */
> -                       map = xmap_dereference(dev_maps->cpu_map[tci]);
> -                       RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
> +                       map = xmap_dereference(dev_maps->attr_map[tci]);
> +                       RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
>                 }
>
>                 /* We need to explicitly update tci as prevous loop
>                  * could break out early if dev_maps is NULL.
>                  */
> -               tci = cpu * num_tc + tc;
> +               tci = j * num_tc + tc;
>
> -               if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
> -                       /* add queue to CPU maps */
> +               if (attr_test_mask(j, mask, nr_ids) &&
> +                   attr_test_online(j, online_mask, nr_ids)) {
> +                       /* add tx-queue to CPU/rx-queue maps */
>                         int pos = 0;
>
> -                       map = xmap_dereference(new_dev_maps->cpu_map[tci]);
> +                       map = xmap_dereference(new_dev_maps->attr_map[tci]);
>                         while ((pos < map->len) && (map->queues[pos] != index))
>                                 pos++;
>
>                         if (pos == map->len)
>                                 map->queues[map->len++] = index;
>  #ifdef CONFIG_NUMA
> -                       if (numa_node_id == -2)
> -                               numa_node_id = cpu_to_node(cpu);
> -                       else if (numa_node_id != cpu_to_node(cpu))
> -                               numa_node_id = -1;
> +                       if (type == XPS_MAP_CPUS) {
> +                               if (numa_node_id == -2)
> +                                       numa_node_id = cpu_to_node(j);
> +                               else if (numa_node_id != cpu_to_node(j))
> +                                       numa_node_id = -1;
> +                       }
>  #endif
>                 } else if (dev_maps) {
>                         /* fill in the new device map from the old device map */
> -                       map = xmap_dereference(dev_maps->cpu_map[tci]);
> -                       RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
> +                       map = xmap_dereference(dev_maps->attr_map[tci]);
> +                       RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
>                 }
>
>                 /* copy maps belonging to foreign traffic classes */
>                 for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
>                         /* fill in the new device map from the old device map */
> -                       map = xmap_dereference(dev_maps->cpu_map[tci]);
> -                       RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
> +                       map = xmap_dereference(dev_maps->attr_map[tci]);
> +                       RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
>                 }
>         }
>
> -       rcu_assign_pointer(dev->xps_maps, new_dev_maps);
> +       if (type == XPS_MAP_RXQS)
> +               rcu_assign_pointer(dev->xps_maps[XPS_MAP_RXQS], new_dev_maps);
> +       else if (type == XPS_MAP_CPUS)
> +               rcu_assign_pointer(dev->xps_maps[XPS_MAP_CPUS], new_dev_maps);
>
>         /* Cleanup old maps */
>         if (!dev_maps)
>                 goto out_no_old_maps;
>
> -       for_each_possible_cpu(cpu) {
> -               for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
> -                       new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
> -                       map = xmap_dereference(dev_maps->cpu_map[tci]);
> +       for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
> +            j < nr_ids;) {
> +               for (i = num_tc, tci = j * num_tc; i--; tci++) {
> +                       new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
> +                       map = xmap_dereference(dev_maps->attr_map[tci]);
>                         if (map && map != new_map)
>                                 kfree_rcu(map, rcu);
>                 }
> @@ -2317,19 +2366,23 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>         active = true;
>
>  out_no_new_maps:
> -       /* update Tx queue numa node */
> -       netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
> -                                    (numa_node_id >= 0) ? numa_node_id :
> -                                    NUMA_NO_NODE);
> +       if (type == XPS_MAP_CPUS) {
> +               /* update Tx queue numa node */
> +               netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
> +                                            (numa_node_id >= 0) ?
> +                                            numa_node_id : NUMA_NO_NODE);
> +       }
>
>         if (!dev_maps)
>                 goto out_no_maps;
>
> -       /* removes queue from unused CPUs */
> -       for_each_possible_cpu(cpu) {
> -               for (i = tc, tci = cpu * num_tc; i--; tci++)
> +       /* removes tx-queue from unused CPUs/rx-queues */
> +       for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
> +            j < nr_ids;) {
> +               for (i = tc, tci = j * num_tc; i--; tci++)
>                         active |= remove_xps_queue(dev_maps, tci, index);
> -               if (!cpumask_test_cpu(cpu, mask) || !cpu_online(cpu))
> +               if (!attr_test_mask(j, mask, nr_ids) ||
> +                   !attr_test_online(j, online_mask, nr_ids))
>                         active |= remove_xps_queue(dev_maps, tci, index);
>                 for (i = num_tc - tc, tci++; --i; tci++)
>                         active |= remove_xps_queue(dev_maps, tci, index);
> @@ -2337,7 +2390,10 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>
>         /* free map if not active */
>         if (!active) {
> -               RCU_INIT_POINTER(dev->xps_maps, NULL);
> +               if (type == XPS_MAP_RXQS)
> +                       RCU_INIT_POINTER(dev->xps_maps[XPS_MAP_RXQS], NULL);
> +               else if (type == XPS_MAP_CPUS)
> +                       RCU_INIT_POINTER(dev->xps_maps[XPS_MAP_CPUS], NULL);
>                 kfree_rcu(dev_maps, rcu);
>         }
>
> @@ -2347,11 +2403,12 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>         return 0;
>  error:
>         /* remove any maps that we added */
> -       for_each_possible_cpu(cpu) {
> -               for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
> -                       new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
> +       for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
> +            j < nr_ids;) {
> +               for (i = num_tc, tci = j * num_tc; i--; tci++) {
> +                       new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
>                         map = dev_maps ?
> -                             xmap_dereference(dev_maps->cpu_map[tci]) :
> +                             xmap_dereference(dev_maps->attr_map[tci]) :
>                               NULL;
>                         if (new_map && new_map != map)
>                                 kfree(new_map);
> @@ -2363,6 +2420,13 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>         kfree(new_dev_maps);
>         return -ENOMEM;
>  }
> +
> +int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
> +                       u16 index)
> +{
> +       return __netif_set_xps_queue(dev, cpumask_bits(mask), index,
> +                                    XPS_MAP_CPUS);
> +}
>  EXPORT_SYMBOL(netif_set_xps_queue);
>
>  #endif
> @@ -3400,7 +3464,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
>         int queue_index = -1;
>
>         rcu_read_lock();
> -       dev_maps = rcu_dereference(dev->xps_maps);
> +       dev_maps = rcu_dereference(dev->xps_maps[XPS_MAP_CPUS]);
>         if (dev_maps) {
>                 unsigned int tci = skb->sender_cpu - 1;
>
> @@ -3409,7 +3473,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
>                         tci += netdev_get_prio_tc_map(dev, skb->priority);
>                 }
>
> -               map = rcu_dereference(dev_maps->cpu_map[tci]);
> +               map = rcu_dereference(dev_maps->attr_map[tci]);
>                 if (map) {
>                         if (map->len == 1)
>                                 queue_index = map->queues[0];
> diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
> index c476f07..d7abd33 100644
> --- a/net/core/net-sysfs.c
> +++ b/net/core/net-sysfs.c
> @@ -1227,13 +1227,13 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
>         }
>
>         rcu_read_lock();
> -       dev_maps = rcu_dereference(dev->xps_maps);
> +       dev_maps = rcu_dereference(dev->xps_maps[XPS_MAP_CPUS]);
>         if (dev_maps) {
>                 for_each_possible_cpu(cpu) {
>                         int i, tci = cpu * num_tc + tc;
>                         struct xps_map *map;
>
> -                       map = rcu_dereference(dev_maps->cpu_map[tci]);
> +                       map = rcu_dereference(dev_maps->attr_map[tci]);
>                         if (!map)
>                                 continue;
>
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [net-next PATCH 1/3] net: Refactor XPS for CPUs and Rx queues
  2018-05-09 20:31   ` Tom Herbert
@ 2018-05-09 20:54     ` Nambiar, Amritha
  2018-05-14 16:53       ` Tom Herbert
  0 siblings, 1 reply; 14+ messages in thread
From: Nambiar, Amritha @ 2018-05-09 20:54 UTC (permalink / raw)
  To: Tom Herbert
  Cc: Linux Kernel Network Developers, David S. Miller,
	Alexander Duyck, Sridhar Samudrala, Eric Dumazet,
	Hannes Frederic Sowa

On 5/9/2018 1:31 PM, Tom Herbert wrote:
> On Thu, Apr 19, 2018 at 6:04 PM, Amritha Nambiar
> <amritha.nambiar@intel.com> wrote:
>> Refactor XPS code to support Tx queue selection based on
>> CPU map or Rx queue map.
>>
>> Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
>> ---
>>  include/linux/netdevice.h |   82 +++++++++++++++++-
>>  net/core/dev.c            |  206 +++++++++++++++++++++++++++++----------------
>>  net/core/net-sysfs.c      |    4 -
>>  3 files changed, 216 insertions(+), 76 deletions(-)
>>
>> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>> index 14e0777..40a9171 100644
>> --- a/include/linux/netdevice.h
>> +++ b/include/linux/netdevice.h
>> @@ -730,10 +730,21 @@ struct xps_map {
>>   */
>>  struct xps_dev_maps {
>>         struct rcu_head rcu;
>> -       struct xps_map __rcu *cpu_map[0];
>> +       struct xps_map __rcu *attr_map[0];
> 
> This seems unnecessarily complicated to me. Why not just add another
> map called something like "rxq2txq_map". Then when selecting TXQ just
> check the new map first and then the normal cpu_map if there's not a
> hit.
> 

This is just a change in the name to something more generic ('attr')
since the maps can either be cpu based or rxq based. I have added two
map types, XPS_MAP_RXQS, XPS_MAP_CPUS and the TXQ selection (in patch
2/3) works how you described,  first based on the RXQ map and if there
is no hit, falls to the normal CPU map.

>>  };
>> -#define XPS_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +         \
>> +
>> +#define XPS_CPU_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +     \
>>         (nr_cpu_ids * (_tcs) * sizeof(struct xps_map *)))
>> +
>> +#define XPS_RXQ_DEV_MAPS_SIZE(_tcs, _rxqs) (sizeof(struct xps_dev_maps) +\
>> +       (_rxqs * (_tcs) * sizeof(struct xps_map *)))
>> +
>> +enum xps_map_type {
>> +       XPS_MAP_RXQS,
>> +       XPS_MAP_CPUS,
>> +       __XPS_MAP_MAX
>> +};
>> +
>>  #endif /* CONFIG_XPS */
>>
>>  #define TC_MAX_QUEUE   16
>> @@ -1867,7 +1878,7 @@ struct net_device {
>>         int                     watchdog_timeo;
>>
>>  #ifdef CONFIG_XPS
>> -       struct xps_dev_maps __rcu *xps_maps;
>> +       struct xps_dev_maps __rcu *xps_maps[__XPS_MAP_MAX];
>>  #endif
>>  #ifdef CONFIG_NET_CLS_ACT
>>         struct mini_Qdisc __rcu *miniq_egress;
>> @@ -3204,6 +3215,71 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
>>  #ifdef CONFIG_XPS
>>  int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>                         u16 index);
>> +int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
>> +                         u16 index, enum xps_map_type type);
>> +
>> +static inline bool attr_test_mask(unsigned long j, const unsigned long *mask,
>> +                                 unsigned int nr_bits)
>> +{
>> +#ifdef CONFIG_DEBUG_PER_CPU_MAPS
>> +       WARN_ON_ONCE(j >= nr_bits);
>> +#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
> 
> This #ifdef block appears 3 times in the patch. Seems like it should
> be replace by simple macro.

Sure, will do in the next version.

> 
>> +       return test_bit(j, mask);
>> +}
>> +
>> +static inline bool attr_test_online(unsigned long j,
>> +                                   const unsigned long *online_mask,
>> +                                   unsigned int nr_bits)
>> +{
>> +#ifdef CONFIG_DEBUG_PER_CPU_MAPS
>> +       WARN_ON_ONCE(j >= nr_bits);
>> +#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
>> +
>> +       if (online_mask)
>> +               return test_bit(j, online_mask);
>> +
>> +       if (j >= 0 && j < nr_bits)
>> +               return true;
>> +
>> +       return false;
>> +}
>> +
>> +static inline unsigned int attrmask_next(int n, const unsigned long *srcp,
>> +                                        unsigned int nr_bits)
>> +{
>> +       /* -1 is a legal arg here. */
>> +       if (n != -1) {
>> +#ifdef CONFIG_DEBUG_PER_CPU_MAPS
>> +               WARN_ON_ONCE(n >= nr_bits);
>> +#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
>> +       }
>> +
>> +       if (srcp)
>> +               return find_next_bit(srcp, nr_bits, n + 1);
>> +
>> +       return n + 1;
>> +}
>> +
>> +static inline int attrmask_next_and(int n, const unsigned long *src1p,
>> +                                   const unsigned long *src2p,
>> +                                   unsigned int nr_bits)
>> +{
>> +       /* -1 is a legal arg here. */
>> +       if (n != -1) {
>> +#ifdef CONFIG_DEBUG_PER_CPU_MAPS
>> +               WARN_ON_ONCE(n >= nr_bits);
>> +#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
>> +       }
>> +
>> +       if (src1p && src2p)
>> +               return find_next_and_bit(src1p, src2p, nr_bits, n + 1);
>> +       else if (src1p)
>> +               return find_next_bit(src1p, nr_bits, n + 1);
>> +       else if (src2p)
>> +               return find_next_bit(src2p, nr_bits, n + 1);
>> +
>> +       return n + 1;
>> +}
>>  #else
>>  static inline int netif_set_xps_queue(struct net_device *dev,
>>                                       const struct cpumask *mask,
>> diff --git a/net/core/dev.c b/net/core/dev.c
>> index a490ef6..17c4883 100644
>> --- a/net/core/dev.c
>> +++ b/net/core/dev.c
>> @@ -2092,7 +2092,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
>>         int pos;
>>
>>         if (dev_maps)
>> -               map = xmap_dereference(dev_maps->cpu_map[tci]);
>> +               map = xmap_dereference(dev_maps->attr_map[tci]);
>>         if (!map)
>>                 return false;
>>
>> @@ -2105,7 +2105,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
>>                         break;
>>                 }
>>
>> -               RCU_INIT_POINTER(dev_maps->cpu_map[tci], NULL);
>> +               RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
>>                 kfree_rcu(map, rcu);
>>                 return false;
>>         }
>> @@ -2138,30 +2138,47 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
>>  static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
>>                                    u16 count)
>>  {
>> +       const unsigned long *possible_mask = NULL;
>> +       enum xps_map_type type = XPS_MAP_RXQS;
>>         struct xps_dev_maps *dev_maps;
>> -       int cpu, i;
>>         bool active = false;
>> +       unsigned int nr_ids;
>> +       int i, j;
>>
>>         mutex_lock(&xps_map_mutex);
>> -       dev_maps = xmap_dereference(dev->xps_maps);
>>
>> -       if (!dev_maps)
>> -               goto out_no_maps;
>> +       while (type < __XPS_MAP_MAX) {
>> +               dev_maps = xmap_dereference(dev->xps_maps[type]);
>> +               if (!dev_maps)
>> +                       goto out_no_maps;
>> +
>> +               if (type == XPS_MAP_CPUS) {
>> +                       if (num_possible_cpus() > 1)
>> +                               possible_mask = cpumask_bits(cpu_possible_mask);
>> +                       nr_ids = nr_cpu_ids;
>> +               } else if (type == XPS_MAP_RXQS) {
>> +                       nr_ids = dev->num_rx_queues;
>> +               }
>>
>> -       for_each_possible_cpu(cpu)
>> -               active |= remove_xps_queue_cpu(dev, dev_maps, cpu,
>> -                                              offset, count);
>> +               for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
>> +                    j < nr_ids;)
>> +                       active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
>> +                                                      count);
>> +               if (!active) {
>> +                       RCU_INIT_POINTER(dev->xps_maps[type], NULL);
>> +                       kfree_rcu(dev_maps, rcu);
>> +               }
>>
>> -       if (!active) {
>> -               RCU_INIT_POINTER(dev->xps_maps, NULL);
>> -               kfree_rcu(dev_maps, rcu);
>> +               if (type == XPS_MAP_CPUS) {
>> +                       for (i = offset + (count - 1); count--; i--)
>> +                               netdev_queue_numa_node_write(
>> +                                       netdev_get_tx_queue(dev, i),
>> +                                                           NUMA_NO_NODE);
>> +               }
>> +out_no_maps:
>> +               type++;
>>         }
>>
>> -       for (i = offset + (count - 1); count--; i--)
>> -               netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
>> -                                            NUMA_NO_NODE);
>> -
>> -out_no_maps:
>>         mutex_unlock(&xps_map_mutex);
>>  }
>>
>> @@ -2170,11 +2187,11 @@ static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
>>         netif_reset_xps_queues(dev, index, dev->num_tx_queues - index);
>>  }
>>
>> -static struct xps_map *expand_xps_map(struct xps_map *map,
>> -                                     int cpu, u16 index)
>> +static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index,
>> +                                     u16 index, enum xps_map_type type)
>>  {
>> -       struct xps_map *new_map;
>>         int alloc_len = XPS_MIN_MAP_ALLOC;
>> +       struct xps_map *new_map = NULL;
>>         int i, pos;
>>
>>         for (pos = 0; map && pos < map->len; pos++) {
>> @@ -2183,7 +2200,7 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
>>                 return map;
>>         }
>>
>> -       /* Need to add queue to this CPU's existing map */
>> +       /* Need to add tx-queue to this CPU's/rx-queue's existing map */
>>         if (map) {
>>                 if (pos < map->alloc_len)
>>                         return map;
>> @@ -2191,9 +2208,14 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
>>                 alloc_len = map->alloc_len * 2;
>>         }
>>
>> -       /* Need to allocate new map to store queue on this CPU's map */
>> -       new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
>> -                              cpu_to_node(cpu));
>> +       /* Need to allocate new map to store tx-queue on this CPU's/rx-queue's
>> +        *  map
>> +        */
>> +       if (type == XPS_MAP_RXQS)
>> +               new_map = kzalloc(XPS_MAP_SIZE(alloc_len), GFP_KERNEL);
>> +       else if (type == XPS_MAP_CPUS)
>> +               new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
>> +                                      cpu_to_node(attr_index));
>>         if (!new_map)
>>                 return NULL;
>>
>> @@ -2205,14 +2227,16 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
>>         return new_map;
>>  }
>>
>> -int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>> -                       u16 index)
>> +int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
>> +                         u16 index, enum xps_map_type type)
>>  {
>> +       const unsigned long *online_mask = NULL, *possible_mask = NULL;
>>         struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
>> -       int i, cpu, tci, numa_node_id = -2;
>> +       int i, j, tci, numa_node_id = -2;
>>         int maps_sz, num_tc = 1, tc = 0;
>>         struct xps_map *map, *new_map;
>>         bool active = false;
>> +       unsigned int nr_ids;
>>
>>         if (dev->num_tc) {
>>                 num_tc = dev->num_tc;
>> @@ -2221,16 +2245,33 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>                         return -EINVAL;
>>         }
>>
>> -       maps_sz = XPS_DEV_MAPS_SIZE(num_tc);
>> +       switch (type) {
>> +       case XPS_MAP_RXQS:
>> +               maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
>> +               dev_maps = xmap_dereference(dev->xps_maps[XPS_MAP_RXQS]);
>> +               nr_ids = dev->num_rx_queues;
>> +               break;
>> +       case XPS_MAP_CPUS:
>> +               maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc);
>> +               if (num_possible_cpus() > 1) {
>> +                       online_mask = cpumask_bits(cpu_online_mask);
>> +                       possible_mask = cpumask_bits(cpu_possible_mask);
>> +               }
>> +               dev_maps = xmap_dereference(dev->xps_maps[XPS_MAP_CPUS]);
>> +               nr_ids = nr_cpu_ids;
>> +               break;
>> +       default:
>> +               return -EINVAL;
>> +       }
>> +
>>         if (maps_sz < L1_CACHE_BYTES)
>>                 maps_sz = L1_CACHE_BYTES;
>>
>>         mutex_lock(&xps_map_mutex);
>>
>> -       dev_maps = xmap_dereference(dev->xps_maps);
>> -
>>         /* allocate memory for queue storage */
>> -       for_each_cpu_and(cpu, cpu_online_mask, mask) {
>> +       for (j = -1; j = attrmask_next_and(j, online_mask, mask, nr_ids),
>> +            j < nr_ids;) {
>>                 if (!new_dev_maps)
>>                         new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
>>                 if (!new_dev_maps) {
>> @@ -2238,73 +2279,81 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>                         return -ENOMEM;
>>                 }
>>
>> -               tci = cpu * num_tc + tc;
>> -               map = dev_maps ? xmap_dereference(dev_maps->cpu_map[tci]) :
>> +               tci = j * num_tc + tc;
>> +               map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) :
>>                                  NULL;
>>
>> -               map = expand_xps_map(map, cpu, index);
>> +               map = expand_xps_map(map, j, index, type);
>>                 if (!map)
>>                         goto error;
>>
>> -               RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
>> +               RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
>>         }
>>
>>         if (!new_dev_maps)
>>                 goto out_no_new_maps;
>>
>> -       for_each_possible_cpu(cpu) {
>> +       for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
>> +            j < nr_ids;) {
>>                 /* copy maps belonging to foreign traffic classes */
>> -               for (i = tc, tci = cpu * num_tc; dev_maps && i--; tci++) {
>> +               for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) {
>>                         /* fill in the new device map from the old device map */
>> -                       map = xmap_dereference(dev_maps->cpu_map[tci]);
>> -                       RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
>> +                       map = xmap_dereference(dev_maps->attr_map[tci]);
>> +                       RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
>>                 }
>>
>>                 /* We need to explicitly update tci as prevous loop
>>                  * could break out early if dev_maps is NULL.
>>                  */
>> -               tci = cpu * num_tc + tc;
>> +               tci = j * num_tc + tc;
>>
>> -               if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
>> -                       /* add queue to CPU maps */
>> +               if (attr_test_mask(j, mask, nr_ids) &&
>> +                   attr_test_online(j, online_mask, nr_ids)) {
>> +                       /* add tx-queue to CPU/rx-queue maps */
>>                         int pos = 0;
>>
>> -                       map = xmap_dereference(new_dev_maps->cpu_map[tci]);
>> +                       map = xmap_dereference(new_dev_maps->attr_map[tci]);
>>                         while ((pos < map->len) && (map->queues[pos] != index))
>>                                 pos++;
>>
>>                         if (pos == map->len)
>>                                 map->queues[map->len++] = index;
>>  #ifdef CONFIG_NUMA
>> -                       if (numa_node_id == -2)
>> -                               numa_node_id = cpu_to_node(cpu);
>> -                       else if (numa_node_id != cpu_to_node(cpu))
>> -                               numa_node_id = -1;
>> +                       if (type == XPS_MAP_CPUS) {
>> +                               if (numa_node_id == -2)
>> +                                       numa_node_id = cpu_to_node(j);
>> +                               else if (numa_node_id != cpu_to_node(j))
>> +                                       numa_node_id = -1;
>> +                       }
>>  #endif
>>                 } else if (dev_maps) {
>>                         /* fill in the new device map from the old device map */
>> -                       map = xmap_dereference(dev_maps->cpu_map[tci]);
>> -                       RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
>> +                       map = xmap_dereference(dev_maps->attr_map[tci]);
>> +                       RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
>>                 }
>>
>>                 /* copy maps belonging to foreign traffic classes */
>>                 for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
>>                         /* fill in the new device map from the old device map */
>> -                       map = xmap_dereference(dev_maps->cpu_map[tci]);
>> -                       RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
>> +                       map = xmap_dereference(dev_maps->attr_map[tci]);
>> +                       RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
>>                 }
>>         }
>>
>> -       rcu_assign_pointer(dev->xps_maps, new_dev_maps);
>> +       if (type == XPS_MAP_RXQS)
>> +               rcu_assign_pointer(dev->xps_maps[XPS_MAP_RXQS], new_dev_maps);
>> +       else if (type == XPS_MAP_CPUS)
>> +               rcu_assign_pointer(dev->xps_maps[XPS_MAP_CPUS], new_dev_maps);
>>
>>         /* Cleanup old maps */
>>         if (!dev_maps)
>>                 goto out_no_old_maps;
>>
>> -       for_each_possible_cpu(cpu) {
>> -               for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
>> -                       new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
>> -                       map = xmap_dereference(dev_maps->cpu_map[tci]);
>> +       for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
>> +            j < nr_ids;) {
>> +               for (i = num_tc, tci = j * num_tc; i--; tci++) {
>> +                       new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
>> +                       map = xmap_dereference(dev_maps->attr_map[tci]);
>>                         if (map && map != new_map)
>>                                 kfree_rcu(map, rcu);
>>                 }
>> @@ -2317,19 +2366,23 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>         active = true;
>>
>>  out_no_new_maps:
>> -       /* update Tx queue numa node */
>> -       netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
>> -                                    (numa_node_id >= 0) ? numa_node_id :
>> -                                    NUMA_NO_NODE);
>> +       if (type == XPS_MAP_CPUS) {
>> +               /* update Tx queue numa node */
>> +               netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
>> +                                            (numa_node_id >= 0) ?
>> +                                            numa_node_id : NUMA_NO_NODE);
>> +       }
>>
>>         if (!dev_maps)
>>                 goto out_no_maps;
>>
>> -       /* removes queue from unused CPUs */
>> -       for_each_possible_cpu(cpu) {
>> -               for (i = tc, tci = cpu * num_tc; i--; tci++)
>> +       /* removes tx-queue from unused CPUs/rx-queues */
>> +       for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
>> +            j < nr_ids;) {
>> +               for (i = tc, tci = j * num_tc; i--; tci++)
>>                         active |= remove_xps_queue(dev_maps, tci, index);
>> -               if (!cpumask_test_cpu(cpu, mask) || !cpu_online(cpu))
>> +               if (!attr_test_mask(j, mask, nr_ids) ||
>> +                   !attr_test_online(j, online_mask, nr_ids))
>>                         active |= remove_xps_queue(dev_maps, tci, index);
>>                 for (i = num_tc - tc, tci++; --i; tci++)
>>                         active |= remove_xps_queue(dev_maps, tci, index);
>> @@ -2337,7 +2390,10 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>
>>         /* free map if not active */
>>         if (!active) {
>> -               RCU_INIT_POINTER(dev->xps_maps, NULL);
>> +               if (type == XPS_MAP_RXQS)
>> +                       RCU_INIT_POINTER(dev->xps_maps[XPS_MAP_RXQS], NULL);
>> +               else if (type == XPS_MAP_CPUS)
>> +                       RCU_INIT_POINTER(dev->xps_maps[XPS_MAP_CPUS], NULL);
>>                 kfree_rcu(dev_maps, rcu);
>>         }
>>
>> @@ -2347,11 +2403,12 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>         return 0;
>>  error:
>>         /* remove any maps that we added */
>> -       for_each_possible_cpu(cpu) {
>> -               for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
>> -                       new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
>> +       for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
>> +            j < nr_ids;) {
>> +               for (i = num_tc, tci = j * num_tc; i--; tci++) {
>> +                       new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
>>                         map = dev_maps ?
>> -                             xmap_dereference(dev_maps->cpu_map[tci]) :
>> +                             xmap_dereference(dev_maps->attr_map[tci]) :
>>                               NULL;
>>                         if (new_map && new_map != map)
>>                                 kfree(new_map);
>> @@ -2363,6 +2420,13 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>         kfree(new_dev_maps);
>>         return -ENOMEM;
>>  }
>> +
>> +int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>> +                       u16 index)
>> +{
>> +       return __netif_set_xps_queue(dev, cpumask_bits(mask), index,
>> +                                    XPS_MAP_CPUS);
>> +}
>>  EXPORT_SYMBOL(netif_set_xps_queue);
>>
>>  #endif
>> @@ -3400,7 +3464,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
>>         int queue_index = -1;
>>
>>         rcu_read_lock();
>> -       dev_maps = rcu_dereference(dev->xps_maps);
>> +       dev_maps = rcu_dereference(dev->xps_maps[XPS_MAP_CPUS]);
>>         if (dev_maps) {
>>                 unsigned int tci = skb->sender_cpu - 1;
>>
>> @@ -3409,7 +3473,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
>>                         tci += netdev_get_prio_tc_map(dev, skb->priority);
>>                 }
>>
>> -               map = rcu_dereference(dev_maps->cpu_map[tci]);
>> +               map = rcu_dereference(dev_maps->attr_map[tci]);
>>                 if (map) {
>>                         if (map->len == 1)
>>                                 queue_index = map->queues[0];
>> diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
>> index c476f07..d7abd33 100644
>> --- a/net/core/net-sysfs.c
>> +++ b/net/core/net-sysfs.c
>> @@ -1227,13 +1227,13 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
>>         }
>>
>>         rcu_read_lock();
>> -       dev_maps = rcu_dereference(dev->xps_maps);
>> +       dev_maps = rcu_dereference(dev->xps_maps[XPS_MAP_CPUS]);
>>         if (dev_maps) {
>>                 for_each_possible_cpu(cpu) {
>>                         int i, tci = cpu * num_tc + tc;
>>                         struct xps_map *map;
>>
>> -                       map = rcu_dereference(dev_maps->cpu_map[tci]);
>> +                       map = rcu_dereference(dev_maps->attr_map[tci]);
>>                         if (!map)
>>                                 continue;
>>
>>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [net-next PATCH 1/3] net: Refactor XPS for CPUs and Rx queues
  2018-05-09 20:54     ` Nambiar, Amritha
@ 2018-05-14 16:53       ` Tom Herbert
  2018-05-14 17:18         ` Nambiar, Amritha
  0 siblings, 1 reply; 14+ messages in thread
From: Tom Herbert @ 2018-05-14 16:53 UTC (permalink / raw)
  To: Nambiar, Amritha
  Cc: Linux Kernel Network Developers, David S. Miller,
	Alexander Duyck, Sridhar Samudrala, Eric Dumazet,
	Hannes Frederic Sowa

On Wed, May 9, 2018 at 1:54 PM, Nambiar, Amritha
<amritha.nambiar@intel.com> wrote:
> On 5/9/2018 1:31 PM, Tom Herbert wrote:
>> On Thu, Apr 19, 2018 at 6:04 PM, Amritha Nambiar
>> <amritha.nambiar@intel.com> wrote:
>>> Refactor XPS code to support Tx queue selection based on
>>> CPU map or Rx queue map.
>>>
>>> Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
>>> ---
>>>  include/linux/netdevice.h |   82 +++++++++++++++++-
>>>  net/core/dev.c            |  206 +++++++++++++++++++++++++++++----------------
>>>  net/core/net-sysfs.c      |    4 -
>>>  3 files changed, 216 insertions(+), 76 deletions(-)
>>>
>>> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>>> index 14e0777..40a9171 100644
>>> --- a/include/linux/netdevice.h
>>> +++ b/include/linux/netdevice.h
>>> @@ -730,10 +730,21 @@ struct xps_map {
>>>   */
>>>  struct xps_dev_maps {
>>>         struct rcu_head rcu;
>>> -       struct xps_map __rcu *cpu_map[0];
>>> +       struct xps_map __rcu *attr_map[0];
>>
>> This seems unnecessarily complicated to me. Why not just add another
>> map called something like "rxq2txq_map". Then when selecting TXQ just
>> check the new map first and then the normal cpu_map if there's not a
>> hit.
>>
>
> This is just a change in the name to something more generic ('attr')
> since the maps can either be cpu based or rxq based. I have added two
> map types, XPS_MAP_RXQS, XPS_MAP_CPUS and the TXQ selection (in patch

I think adding map types is overkill and we really don't want to turn
this in to a generic but complex interface with a bunch of map types.
Just have two pointers to the two different maps.

> 2/3) works how you described,  first based on the RXQ map and if there
> is no hit, falls to the normal CPU map.
>
>>>  };
>>> -#define XPS_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +         \
>>> +
>>> +#define XPS_CPU_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +     \
>>>         (nr_cpu_ids * (_tcs) * sizeof(struct xps_map *)))
>>> +
>>> +#define XPS_RXQ_DEV_MAPS_SIZE(_tcs, _rxqs) (sizeof(struct xps_dev_maps) +\
>>> +       (_rxqs * (_tcs) * sizeof(struct xps_map *)))
>>> +
>>> +enum xps_map_type {
>>> +       XPS_MAP_RXQS,
>>> +       XPS_MAP_CPUS,
>>> +       __XPS_MAP_MAX
>>> +};
>>> +
>>>  #endif /* CONFIG_XPS */
>>>
>>>  #define TC_MAX_QUEUE   16
>>> @@ -1867,7 +1878,7 @@ struct net_device {
>>>         int                     watchdog_timeo;
>>>
>>>  #ifdef CONFIG_XPS
>>> -       struct xps_dev_maps __rcu *xps_maps;
>>> +       struct xps_dev_maps __rcu *xps_maps[__XPS_MAP_MAX];
>>>  #endif
>>>  #ifdef CONFIG_NET_CLS_ACT
>>>         struct mini_Qdisc __rcu *miniq_egress;
>>> @@ -3204,6 +3215,71 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
>>>  #ifdef CONFIG_XPS
>>>  int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>>                         u16 index);
>>> +int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
>>> +                         u16 index, enum xps_map_type type);
>>> +
>>> +static inline bool attr_test_mask(unsigned long j, const unsigned long *mask,
>>> +                                 unsigned int nr_bits)
>>> +{
>>> +#ifdef CONFIG_DEBUG_PER_CPU_MAPS
>>> +       WARN_ON_ONCE(j >= nr_bits);
>>> +#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
>>
>> This #ifdef block appears 3 times in the patch. Seems like it should
>> be replace by simple macro.
>
> Sure, will do in the next version.
>
>>
>>> +       return test_bit(j, mask);
>>> +}
>>> +
>>> +static inline bool attr_test_online(unsigned long j,
>>> +                                   const unsigned long *online_mask,
>>> +                                   unsigned int nr_bits)
>>> +{
>>> +#ifdef CONFIG_DEBUG_PER_CPU_MAPS
>>> +       WARN_ON_ONCE(j >= nr_bits);
>>> +#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
>>> +
>>> +       if (online_mask)
>>> +               return test_bit(j, online_mask);
>>> +
>>> +       if (j >= 0 && j < nr_bits)
>>> +               return true;
>>> +
>>> +       return false;
>>> +}
>>> +
>>> +static inline unsigned int attrmask_next(int n, const unsigned long *srcp,
>>> +                                        unsigned int nr_bits)
>>> +{
>>> +       /* -1 is a legal arg here. */
>>> +       if (n != -1) {
>>> +#ifdef CONFIG_DEBUG_PER_CPU_MAPS
>>> +               WARN_ON_ONCE(n >= nr_bits);
>>> +#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
>>> +       }
>>> +
>>> +       if (srcp)
>>> +               return find_next_bit(srcp, nr_bits, n + 1);
>>> +
>>> +       return n + 1;
>>> +}
>>> +
>>> +static inline int attrmask_next_and(int n, const unsigned long *src1p,
>>> +                                   const unsigned long *src2p,
>>> +                                   unsigned int nr_bits)
>>> +{
>>> +       /* -1 is a legal arg here. */
>>> +       if (n != -1) {
>>> +#ifdef CONFIG_DEBUG_PER_CPU_MAPS
>>> +               WARN_ON_ONCE(n >= nr_bits);
>>> +#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
>>> +       }
>>> +
>>> +       if (src1p && src2p)
>>> +               return find_next_and_bit(src1p, src2p, nr_bits, n + 1);
>>> +       else if (src1p)
>>> +               return find_next_bit(src1p, nr_bits, n + 1);
>>> +       else if (src2p)
>>> +               return find_next_bit(src2p, nr_bits, n + 1);
>>> +
>>> +       return n + 1;
>>> +}
>>>  #else
>>>  static inline int netif_set_xps_queue(struct net_device *dev,
>>>                                       const struct cpumask *mask,
>>> diff --git a/net/core/dev.c b/net/core/dev.c
>>> index a490ef6..17c4883 100644
>>> --- a/net/core/dev.c
>>> +++ b/net/core/dev.c
>>> @@ -2092,7 +2092,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
>>>         int pos;
>>>
>>>         if (dev_maps)
>>> -               map = xmap_dereference(dev_maps->cpu_map[tci]);
>>> +               map = xmap_dereference(dev_maps->attr_map[tci]);
>>>         if (!map)
>>>                 return false;
>>>
>>> @@ -2105,7 +2105,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
>>>                         break;
>>>                 }
>>>
>>> -               RCU_INIT_POINTER(dev_maps->cpu_map[tci], NULL);
>>> +               RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
>>>                 kfree_rcu(map, rcu);
>>>                 return false;
>>>         }
>>> @@ -2138,30 +2138,47 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
>>>  static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
>>>                                    u16 count)
>>>  {
>>> +       const unsigned long *possible_mask = NULL;
>>> +       enum xps_map_type type = XPS_MAP_RXQS;
>>>         struct xps_dev_maps *dev_maps;
>>> -       int cpu, i;
>>>         bool active = false;
>>> +       unsigned int nr_ids;
>>> +       int i, j;
>>>
>>>         mutex_lock(&xps_map_mutex);
>>> -       dev_maps = xmap_dereference(dev->xps_maps);
>>>
>>> -       if (!dev_maps)
>>> -               goto out_no_maps;
>>> +       while (type < __XPS_MAP_MAX) {
>>> +               dev_maps = xmap_dereference(dev->xps_maps[type]);
>>> +               if (!dev_maps)
>>> +                       goto out_no_maps;
>>> +
>>> +               if (type == XPS_MAP_CPUS) {
>>> +                       if (num_possible_cpus() > 1)
>>> +                               possible_mask = cpumask_bits(cpu_possible_mask);
>>> +                       nr_ids = nr_cpu_ids;
>>> +               } else if (type == XPS_MAP_RXQS) {
>>> +                       nr_ids = dev->num_rx_queues;
>>> +               }
>>>
>>> -       for_each_possible_cpu(cpu)
>>> -               active |= remove_xps_queue_cpu(dev, dev_maps, cpu,
>>> -                                              offset, count);
>>> +               for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
>>> +                    j < nr_ids;)
>>> +                       active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
>>> +                                                      count);
>>> +               if (!active) {
>>> +                       RCU_INIT_POINTER(dev->xps_maps[type], NULL);
>>> +                       kfree_rcu(dev_maps, rcu);
>>> +               }
>>>
>>> -       if (!active) {
>>> -               RCU_INIT_POINTER(dev->xps_maps, NULL);
>>> -               kfree_rcu(dev_maps, rcu);
>>> +               if (type == XPS_MAP_CPUS) {
>>> +                       for (i = offset + (count - 1); count--; i--)
>>> +                               netdev_queue_numa_node_write(
>>> +                                       netdev_get_tx_queue(dev, i),
>>> +                                                           NUMA_NO_NODE);
>>> +               }
>>> +out_no_maps:
>>> +               type++;
>>>         }
>>>
>>> -       for (i = offset + (count - 1); count--; i--)
>>> -               netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
>>> -                                            NUMA_NO_NODE);
>>> -
>>> -out_no_maps:
>>>         mutex_unlock(&xps_map_mutex);
>>>  }
>>>
>>> @@ -2170,11 +2187,11 @@ static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
>>>         netif_reset_xps_queues(dev, index, dev->num_tx_queues - index);
>>>  }
>>>
>>> -static struct xps_map *expand_xps_map(struct xps_map *map,
>>> -                                     int cpu, u16 index)
>>> +static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index,
>>> +                                     u16 index, enum xps_map_type type)
>>>  {
>>> -       struct xps_map *new_map;
>>>         int alloc_len = XPS_MIN_MAP_ALLOC;
>>> +       struct xps_map *new_map = NULL;
>>>         int i, pos;
>>>
>>>         for (pos = 0; map && pos < map->len; pos++) {
>>> @@ -2183,7 +2200,7 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
>>>                 return map;
>>>         }
>>>
>>> -       /* Need to add queue to this CPU's existing map */
>>> +       /* Need to add tx-queue to this CPU's/rx-queue's existing map */
>>>         if (map) {
>>>                 if (pos < map->alloc_len)
>>>                         return map;
>>> @@ -2191,9 +2208,14 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
>>>                 alloc_len = map->alloc_len * 2;
>>>         }
>>>
>>> -       /* Need to allocate new map to store queue on this CPU's map */
>>> -       new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
>>> -                              cpu_to_node(cpu));
>>> +       /* Need to allocate new map to store tx-queue on this CPU's/rx-queue's
>>> +        *  map
>>> +        */
>>> +       if (type == XPS_MAP_RXQS)
>>> +               new_map = kzalloc(XPS_MAP_SIZE(alloc_len), GFP_KERNEL);
>>> +       else if (type == XPS_MAP_CPUS)
>>> +               new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
>>> +                                      cpu_to_node(attr_index));
>>>         if (!new_map)
>>>                 return NULL;
>>>
>>> @@ -2205,14 +2227,16 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
>>>         return new_map;
>>>  }
>>>
>>> -int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>> -                       u16 index)
>>> +int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
>>> +                         u16 index, enum xps_map_type type)
>>>  {
>>> +       const unsigned long *online_mask = NULL, *possible_mask = NULL;
>>>         struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
>>> -       int i, cpu, tci, numa_node_id = -2;
>>> +       int i, j, tci, numa_node_id = -2;
>>>         int maps_sz, num_tc = 1, tc = 0;
>>>         struct xps_map *map, *new_map;
>>>         bool active = false;
>>> +       unsigned int nr_ids;
>>>
>>>         if (dev->num_tc) {
>>>                 num_tc = dev->num_tc;
>>> @@ -2221,16 +2245,33 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>>                         return -EINVAL;
>>>         }
>>>
>>> -       maps_sz = XPS_DEV_MAPS_SIZE(num_tc);
>>> +       switch (type) {
>>> +       case XPS_MAP_RXQS:
>>> +               maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
>>> +               dev_maps = xmap_dereference(dev->xps_maps[XPS_MAP_RXQS]);
>>> +               nr_ids = dev->num_rx_queues;
>>> +               break;
>>> +       case XPS_MAP_CPUS:
>>> +               maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc);
>>> +               if (num_possible_cpus() > 1) {
>>> +                       online_mask = cpumask_bits(cpu_online_mask);
>>> +                       possible_mask = cpumask_bits(cpu_possible_mask);
>>> +               }
>>> +               dev_maps = xmap_dereference(dev->xps_maps[XPS_MAP_CPUS]);
>>> +               nr_ids = nr_cpu_ids;
>>> +               break;
>>> +       default:
>>> +               return -EINVAL;
>>> +       }
>>> +
>>>         if (maps_sz < L1_CACHE_BYTES)
>>>                 maps_sz = L1_CACHE_BYTES;
>>>
>>>         mutex_lock(&xps_map_mutex);
>>>
>>> -       dev_maps = xmap_dereference(dev->xps_maps);
>>> -
>>>         /* allocate memory for queue storage */
>>> -       for_each_cpu_and(cpu, cpu_online_mask, mask) {
>>> +       for (j = -1; j = attrmask_next_and(j, online_mask, mask, nr_ids),
>>> +            j < nr_ids;) {
>>>                 if (!new_dev_maps)
>>>                         new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
>>>                 if (!new_dev_maps) {
>>> @@ -2238,73 +2279,81 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>>                         return -ENOMEM;
>>>                 }
>>>
>>> -               tci = cpu * num_tc + tc;
>>> -               map = dev_maps ? xmap_dereference(dev_maps->cpu_map[tci]) :
>>> +               tci = j * num_tc + tc;
>>> +               map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) :
>>>                                  NULL;
>>>
>>> -               map = expand_xps_map(map, cpu, index);
>>> +               map = expand_xps_map(map, j, index, type);
>>>                 if (!map)
>>>                         goto error;
>>>
>>> -               RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
>>> +               RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
>>>         }
>>>
>>>         if (!new_dev_maps)
>>>                 goto out_no_new_maps;
>>>
>>> -       for_each_possible_cpu(cpu) {
>>> +       for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
>>> +            j < nr_ids;) {
>>>                 /* copy maps belonging to foreign traffic classes */
>>> -               for (i = tc, tci = cpu * num_tc; dev_maps && i--; tci++) {
>>> +               for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) {
>>>                         /* fill in the new device map from the old device map */
>>> -                       map = xmap_dereference(dev_maps->cpu_map[tci]);
>>> -                       RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
>>> +                       map = xmap_dereference(dev_maps->attr_map[tci]);
>>> +                       RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
>>>                 }
>>>
>>>                 /* We need to explicitly update tci as prevous loop
>>>                  * could break out early if dev_maps is NULL.
>>>                  */
>>> -               tci = cpu * num_tc + tc;
>>> +               tci = j * num_tc + tc;
>>>
>>> -               if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
>>> -                       /* add queue to CPU maps */
>>> +               if (attr_test_mask(j, mask, nr_ids) &&
>>> +                   attr_test_online(j, online_mask, nr_ids)) {
>>> +                       /* add tx-queue to CPU/rx-queue maps */
>>>                         int pos = 0;
>>>
>>> -                       map = xmap_dereference(new_dev_maps->cpu_map[tci]);
>>> +                       map = xmap_dereference(new_dev_maps->attr_map[tci]);
>>>                         while ((pos < map->len) && (map->queues[pos] != index))
>>>                                 pos++;
>>>
>>>                         if (pos == map->len)
>>>                                 map->queues[map->len++] = index;
>>>  #ifdef CONFIG_NUMA
>>> -                       if (numa_node_id == -2)
>>> -                               numa_node_id = cpu_to_node(cpu);
>>> -                       else if (numa_node_id != cpu_to_node(cpu))
>>> -                               numa_node_id = -1;
>>> +                       if (type == XPS_MAP_CPUS) {
>>> +                               if (numa_node_id == -2)
>>> +                                       numa_node_id = cpu_to_node(j);
>>> +                               else if (numa_node_id != cpu_to_node(j))
>>> +                                       numa_node_id = -1;
>>> +                       }
>>>  #endif
>>>                 } else if (dev_maps) {
>>>                         /* fill in the new device map from the old device map */
>>> -                       map = xmap_dereference(dev_maps->cpu_map[tci]);
>>> -                       RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
>>> +                       map = xmap_dereference(dev_maps->attr_map[tci]);
>>> +                       RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
>>>                 }
>>>
>>>                 /* copy maps belonging to foreign traffic classes */
>>>                 for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
>>>                         /* fill in the new device map from the old device map */
>>> -                       map = xmap_dereference(dev_maps->cpu_map[tci]);
>>> -                       RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
>>> +                       map = xmap_dereference(dev_maps->attr_map[tci]);
>>> +                       RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
>>>                 }
>>>         }
>>>
>>> -       rcu_assign_pointer(dev->xps_maps, new_dev_maps);
>>> +       if (type == XPS_MAP_RXQS)
>>> +               rcu_assign_pointer(dev->xps_maps[XPS_MAP_RXQS], new_dev_maps);
>>> +       else if (type == XPS_MAP_CPUS)
>>> +               rcu_assign_pointer(dev->xps_maps[XPS_MAP_CPUS], new_dev_maps);
>>>
>>>         /* Cleanup old maps */
>>>         if (!dev_maps)
>>>                 goto out_no_old_maps;
>>>
>>> -       for_each_possible_cpu(cpu) {
>>> -               for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
>>> -                       new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
>>> -                       map = xmap_dereference(dev_maps->cpu_map[tci]);
>>> +       for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
>>> +            j < nr_ids;) {
>>> +               for (i = num_tc, tci = j * num_tc; i--; tci++) {
>>> +                       new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
>>> +                       map = xmap_dereference(dev_maps->attr_map[tci]);
>>>                         if (map && map != new_map)
>>>                                 kfree_rcu(map, rcu);
>>>                 }
>>> @@ -2317,19 +2366,23 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>>         active = true;
>>>
>>>  out_no_new_maps:
>>> -       /* update Tx queue numa node */
>>> -       netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
>>> -                                    (numa_node_id >= 0) ? numa_node_id :
>>> -                                    NUMA_NO_NODE);
>>> +       if (type == XPS_MAP_CPUS) {
>>> +               /* update Tx queue numa node */
>>> +               netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
>>> +                                            (numa_node_id >= 0) ?
>>> +                                            numa_node_id : NUMA_NO_NODE);
>>> +       }
>>>
>>>         if (!dev_maps)
>>>                 goto out_no_maps;
>>>
>>> -       /* removes queue from unused CPUs */
>>> -       for_each_possible_cpu(cpu) {
>>> -               for (i = tc, tci = cpu * num_tc; i--; tci++)
>>> +       /* removes tx-queue from unused CPUs/rx-queues */
>>> +       for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
>>> +            j < nr_ids;) {
>>> +               for (i = tc, tci = j * num_tc; i--; tci++)
>>>                         active |= remove_xps_queue(dev_maps, tci, index);
>>> -               if (!cpumask_test_cpu(cpu, mask) || !cpu_online(cpu))
>>> +               if (!attr_test_mask(j, mask, nr_ids) ||
>>> +                   !attr_test_online(j, online_mask, nr_ids))
>>>                         active |= remove_xps_queue(dev_maps, tci, index);
>>>                 for (i = num_tc - tc, tci++; --i; tci++)
>>>                         active |= remove_xps_queue(dev_maps, tci, index);
>>> @@ -2337,7 +2390,10 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>>
>>>         /* free map if not active */
>>>         if (!active) {
>>> -               RCU_INIT_POINTER(dev->xps_maps, NULL);
>>> +               if (type == XPS_MAP_RXQS)
>>> +                       RCU_INIT_POINTER(dev->xps_maps[XPS_MAP_RXQS], NULL);
>>> +               else if (type == XPS_MAP_CPUS)
>>> +                       RCU_INIT_POINTER(dev->xps_maps[XPS_MAP_CPUS], NULL);
>>>                 kfree_rcu(dev_maps, rcu);
>>>         }
>>>
>>> @@ -2347,11 +2403,12 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>>         return 0;
>>>  error:
>>>         /* remove any maps that we added */
>>> -       for_each_possible_cpu(cpu) {
>>> -               for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
>>> -                       new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
>>> +       for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
>>> +            j < nr_ids;) {
>>> +               for (i = num_tc, tci = j * num_tc; i--; tci++) {
>>> +                       new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
>>>                         map = dev_maps ?
>>> -                             xmap_dereference(dev_maps->cpu_map[tci]) :
>>> +                             xmap_dereference(dev_maps->attr_map[tci]) :
>>>                               NULL;
>>>                         if (new_map && new_map != map)
>>>                                 kfree(new_map);
>>> @@ -2363,6 +2420,13 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>>         kfree(new_dev_maps);
>>>         return -ENOMEM;
>>>  }
>>> +
>>> +int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>> +                       u16 index)
>>> +{
>>> +       return __netif_set_xps_queue(dev, cpumask_bits(mask), index,
>>> +                                    XPS_MAP_CPUS);
>>> +}
>>>  EXPORT_SYMBOL(netif_set_xps_queue);
>>>
>>>  #endif
>>> @@ -3400,7 +3464,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
>>>         int queue_index = -1;
>>>
>>>         rcu_read_lock();
>>> -       dev_maps = rcu_dereference(dev->xps_maps);
>>> +       dev_maps = rcu_dereference(dev->xps_maps[XPS_MAP_CPUS]);
>>>         if (dev_maps) {
>>>                 unsigned int tci = skb->sender_cpu - 1;
>>>
>>> @@ -3409,7 +3473,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
>>>                         tci += netdev_get_prio_tc_map(dev, skb->priority);
>>>                 }
>>>
>>> -               map = rcu_dereference(dev_maps->cpu_map[tci]);
>>> +               map = rcu_dereference(dev_maps->attr_map[tci]);
>>>                 if (map) {
>>>                         if (map->len == 1)
>>>                                 queue_index = map->queues[0];
>>> diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
>>> index c476f07..d7abd33 100644
>>> --- a/net/core/net-sysfs.c
>>> +++ b/net/core/net-sysfs.c
>>> @@ -1227,13 +1227,13 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
>>>         }
>>>
>>>         rcu_read_lock();
>>> -       dev_maps = rcu_dereference(dev->xps_maps);
>>> +       dev_maps = rcu_dereference(dev->xps_maps[XPS_MAP_CPUS]);
>>>         if (dev_maps) {
>>>                 for_each_possible_cpu(cpu) {
>>>                         int i, tci = cpu * num_tc + tc;
>>>                         struct xps_map *map;
>>>
>>> -                       map = rcu_dereference(dev_maps->cpu_map[tci]);
>>> +                       map = rcu_dereference(dev_maps->attr_map[tci]);
>>>                         if (!map)
>>>                                 continue;
>>>
>>>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [net-next PATCH 1/3] net: Refactor XPS for CPUs and Rx queues
  2018-05-14 16:53       ` Tom Herbert
@ 2018-05-14 17:18         ` Nambiar, Amritha
  0 siblings, 0 replies; 14+ messages in thread
From: Nambiar, Amritha @ 2018-05-14 17:18 UTC (permalink / raw)
  To: Tom Herbert
  Cc: Linux Kernel Network Developers, David S. Miller,
	Alexander Duyck, Sridhar Samudrala, Eric Dumazet,
	Hannes Frederic Sowa

On 5/14/2018 9:53 AM, Tom Herbert wrote:
> On Wed, May 9, 2018 at 1:54 PM, Nambiar, Amritha
> <amritha.nambiar@intel.com> wrote:
>> On 5/9/2018 1:31 PM, Tom Herbert wrote:
>>> On Thu, Apr 19, 2018 at 6:04 PM, Amritha Nambiar
>>> <amritha.nambiar@intel.com> wrote:
>>>> Refactor XPS code to support Tx queue selection based on
>>>> CPU map or Rx queue map.
>>>>
>>>> Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
>>>> ---
>>>>  include/linux/netdevice.h |   82 +++++++++++++++++-
>>>>  net/core/dev.c            |  206 +++++++++++++++++++++++++++++----------------
>>>>  net/core/net-sysfs.c      |    4 -
>>>>  3 files changed, 216 insertions(+), 76 deletions(-)
>>>>
>>>> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>>>> index 14e0777..40a9171 100644
>>>> --- a/include/linux/netdevice.h
>>>> +++ b/include/linux/netdevice.h
>>>> @@ -730,10 +730,21 @@ struct xps_map {
>>>>   */
>>>>  struct xps_dev_maps {
>>>>         struct rcu_head rcu;
>>>> -       struct xps_map __rcu *cpu_map[0];
>>>> +       struct xps_map __rcu *attr_map[0];
>>>
>>> This seems unnecessarily complicated to me. Why not just add another
>>> map called something like "rxq2txq_map". Then when selecting TXQ just
>>> check the new map first and then the normal cpu_map if there's not a
>>> hit.
>>>
>>
>> This is just a change in the name to something more generic ('attr')
>> since the maps can either be cpu based or rxq based. I have added two
>> map types, XPS_MAP_RXQS, XPS_MAP_CPUS and the TXQ selection (in patch
> 
> I think adding map types is overkill and we really don't want to turn
> this in to a generic but complex interface with a bunch of map types.
> Just have two pointers to the two different maps.
> 

Sorry if I wasn't clear enough, instead of having two different pointers:
struct xps_dev_maps __rcu *xps_cpu_maps;
struct xps_dev_maps __rcu *xps_rxq_maps;
in the struct netdevice below, I have a pointer array:
struct xps_dev_maps __rcu *xps_maps[__XPS_MAP_MAX];
and use the map type enum as an index to select which one we are updating.

The idea was this might look cleaner while selecting the queue where the
processing order would be XPS_MAP_RXQS first and then XPS_MAP_CPUS.

>> 2/3) works how you described,  first based on the RXQ map and if there
>> is no hit, falls to the normal CPU map.
>>
>>>>  };
>>>> -#define XPS_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +         \
>>>> +
>>>> +#define XPS_CPU_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +     \
>>>>         (nr_cpu_ids * (_tcs) * sizeof(struct xps_map *)))
>>>> +
>>>> +#define XPS_RXQ_DEV_MAPS_SIZE(_tcs, _rxqs) (sizeof(struct xps_dev_maps) +\
>>>> +       (_rxqs * (_tcs) * sizeof(struct xps_map *)))
>>>> +
>>>> +enum xps_map_type {
>>>> +       XPS_MAP_RXQS,
>>>> +       XPS_MAP_CPUS,
>>>> +       __XPS_MAP_MAX
>>>> +};
>>>> +
>>>>  #endif /* CONFIG_XPS */
>>>>
>>>>  #define TC_MAX_QUEUE   16
>>>> @@ -1867,7 +1878,7 @@ struct net_device {
>>>>         int                     watchdog_timeo;
>>>>
>>>>  #ifdef CONFIG_XPS
>>>> -       struct xps_dev_maps __rcu *xps_maps;
>>>> +       struct xps_dev_maps __rcu *xps_maps[__XPS_MAP_MAX];
>>>>  #endif
>>>>  #ifdef CONFIG_NET_CLS_ACT
>>>>         struct mini_Qdisc __rcu *miniq_egress;
>>>> @@ -3204,6 +3215,71 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
>>>>  #ifdef CONFIG_XPS
>>>>  int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>>>                         u16 index);
>>>> +int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
>>>> +                         u16 index, enum xps_map_type type);
>>>> +
>>>> +static inline bool attr_test_mask(unsigned long j, const unsigned long *mask,
>>>> +                                 unsigned int nr_bits)
>>>> +{
>>>> +#ifdef CONFIG_DEBUG_PER_CPU_MAPS
>>>> +       WARN_ON_ONCE(j >= nr_bits);
>>>> +#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
>>>
>>> This #ifdef block appears 3 times in the patch. Seems like it should
>>> be replace by simple macro.
>>
>> Sure, will do in the next version.
>>
>>>
>>>> +       return test_bit(j, mask);
>>>> +}
>>>> +
>>>> +static inline bool attr_test_online(unsigned long j,
>>>> +                                   const unsigned long *online_mask,
>>>> +                                   unsigned int nr_bits)
>>>> +{
>>>> +#ifdef CONFIG_DEBUG_PER_CPU_MAPS
>>>> +       WARN_ON_ONCE(j >= nr_bits);
>>>> +#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
>>>> +
>>>> +       if (online_mask)
>>>> +               return test_bit(j, online_mask);
>>>> +
>>>> +       if (j >= 0 && j < nr_bits)
>>>> +               return true;
>>>> +
>>>> +       return false;
>>>> +}
>>>> +
>>>> +static inline unsigned int attrmask_next(int n, const unsigned long *srcp,
>>>> +                                        unsigned int nr_bits)
>>>> +{
>>>> +       /* -1 is a legal arg here. */
>>>> +       if (n != -1) {
>>>> +#ifdef CONFIG_DEBUG_PER_CPU_MAPS
>>>> +               WARN_ON_ONCE(n >= nr_bits);
>>>> +#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
>>>> +       }
>>>> +
>>>> +       if (srcp)
>>>> +               return find_next_bit(srcp, nr_bits, n + 1);
>>>> +
>>>> +       return n + 1;
>>>> +}
>>>> +
>>>> +static inline int attrmask_next_and(int n, const unsigned long *src1p,
>>>> +                                   const unsigned long *src2p,
>>>> +                                   unsigned int nr_bits)
>>>> +{
>>>> +       /* -1 is a legal arg here. */
>>>> +       if (n != -1) {
>>>> +#ifdef CONFIG_DEBUG_PER_CPU_MAPS
>>>> +               WARN_ON_ONCE(n >= nr_bits);
>>>> +#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
>>>> +       }
>>>> +
>>>> +       if (src1p && src2p)
>>>> +               return find_next_and_bit(src1p, src2p, nr_bits, n + 1);
>>>> +       else if (src1p)
>>>> +               return find_next_bit(src1p, nr_bits, n + 1);
>>>> +       else if (src2p)
>>>> +               return find_next_bit(src2p, nr_bits, n + 1);
>>>> +
>>>> +       return n + 1;
>>>> +}
>>>>  #else
>>>>  static inline int netif_set_xps_queue(struct net_device *dev,
>>>>                                       const struct cpumask *mask,
>>>> diff --git a/net/core/dev.c b/net/core/dev.c
>>>> index a490ef6..17c4883 100644
>>>> --- a/net/core/dev.c
>>>> +++ b/net/core/dev.c
>>>> @@ -2092,7 +2092,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
>>>>         int pos;
>>>>
>>>>         if (dev_maps)
>>>> -               map = xmap_dereference(dev_maps->cpu_map[tci]);
>>>> +               map = xmap_dereference(dev_maps->attr_map[tci]);
>>>>         if (!map)
>>>>                 return false;
>>>>
>>>> @@ -2105,7 +2105,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
>>>>                         break;
>>>>                 }
>>>>
>>>> -               RCU_INIT_POINTER(dev_maps->cpu_map[tci], NULL);
>>>> +               RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
>>>>                 kfree_rcu(map, rcu);
>>>>                 return false;
>>>>         }
>>>> @@ -2138,30 +2138,47 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
>>>>  static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
>>>>                                    u16 count)
>>>>  {
>>>> +       const unsigned long *possible_mask = NULL;
>>>> +       enum xps_map_type type = XPS_MAP_RXQS;
>>>>         struct xps_dev_maps *dev_maps;
>>>> -       int cpu, i;
>>>>         bool active = false;
>>>> +       unsigned int nr_ids;
>>>> +       int i, j;
>>>>
>>>>         mutex_lock(&xps_map_mutex);
>>>> -       dev_maps = xmap_dereference(dev->xps_maps);
>>>>
>>>> -       if (!dev_maps)
>>>> -               goto out_no_maps;
>>>> +       while (type < __XPS_MAP_MAX) {
>>>> +               dev_maps = xmap_dereference(dev->xps_maps[type]);
>>>> +               if (!dev_maps)
>>>> +                       goto out_no_maps;
>>>> +
>>>> +               if (type == XPS_MAP_CPUS) {
>>>> +                       if (num_possible_cpus() > 1)
>>>> +                               possible_mask = cpumask_bits(cpu_possible_mask);
>>>> +                       nr_ids = nr_cpu_ids;
>>>> +               } else if (type == XPS_MAP_RXQS) {
>>>> +                       nr_ids = dev->num_rx_queues;
>>>> +               }
>>>>
>>>> -       for_each_possible_cpu(cpu)
>>>> -               active |= remove_xps_queue_cpu(dev, dev_maps, cpu,
>>>> -                                              offset, count);
>>>> +               for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
>>>> +                    j < nr_ids;)
>>>> +                       active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
>>>> +                                                      count);
>>>> +               if (!active) {
>>>> +                       RCU_INIT_POINTER(dev->xps_maps[type], NULL);
>>>> +                       kfree_rcu(dev_maps, rcu);
>>>> +               }
>>>>
>>>> -       if (!active) {
>>>> -               RCU_INIT_POINTER(dev->xps_maps, NULL);
>>>> -               kfree_rcu(dev_maps, rcu);
>>>> +               if (type == XPS_MAP_CPUS) {
>>>> +                       for (i = offset + (count - 1); count--; i--)
>>>> +                               netdev_queue_numa_node_write(
>>>> +                                       netdev_get_tx_queue(dev, i),
>>>> +                                                           NUMA_NO_NODE);
>>>> +               }
>>>> +out_no_maps:
>>>> +               type++;
>>>>         }
>>>>
>>>> -       for (i = offset + (count - 1); count--; i--)
>>>> -               netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
>>>> -                                            NUMA_NO_NODE);
>>>> -
>>>> -out_no_maps:
>>>>         mutex_unlock(&xps_map_mutex);
>>>>  }
>>>>
>>>> @@ -2170,11 +2187,11 @@ static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
>>>>         netif_reset_xps_queues(dev, index, dev->num_tx_queues - index);
>>>>  }
>>>>
>>>> -static struct xps_map *expand_xps_map(struct xps_map *map,
>>>> -                                     int cpu, u16 index)
>>>> +static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index,
>>>> +                                     u16 index, enum xps_map_type type)
>>>>  {
>>>> -       struct xps_map *new_map;
>>>>         int alloc_len = XPS_MIN_MAP_ALLOC;
>>>> +       struct xps_map *new_map = NULL;
>>>>         int i, pos;
>>>>
>>>>         for (pos = 0; map && pos < map->len; pos++) {
>>>> @@ -2183,7 +2200,7 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
>>>>                 return map;
>>>>         }
>>>>
>>>> -       /* Need to add queue to this CPU's existing map */
>>>> +       /* Need to add tx-queue to this CPU's/rx-queue's existing map */
>>>>         if (map) {
>>>>                 if (pos < map->alloc_len)
>>>>                         return map;
>>>> @@ -2191,9 +2208,14 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
>>>>                 alloc_len = map->alloc_len * 2;
>>>>         }
>>>>
>>>> -       /* Need to allocate new map to store queue on this CPU's map */
>>>> -       new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
>>>> -                              cpu_to_node(cpu));
>>>> +       /* Need to allocate new map to store tx-queue on this CPU's/rx-queue's
>>>> +        *  map
>>>> +        */
>>>> +       if (type == XPS_MAP_RXQS)
>>>> +               new_map = kzalloc(XPS_MAP_SIZE(alloc_len), GFP_KERNEL);
>>>> +       else if (type == XPS_MAP_CPUS)
>>>> +               new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
>>>> +                                      cpu_to_node(attr_index));
>>>>         if (!new_map)
>>>>                 return NULL;
>>>>
>>>> @@ -2205,14 +2227,16 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
>>>>         return new_map;
>>>>  }
>>>>
>>>> -int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>>> -                       u16 index)
>>>> +int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
>>>> +                         u16 index, enum xps_map_type type)
>>>>  {
>>>> +       const unsigned long *online_mask = NULL, *possible_mask = NULL;
>>>>         struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
>>>> -       int i, cpu, tci, numa_node_id = -2;
>>>> +       int i, j, tci, numa_node_id = -2;
>>>>         int maps_sz, num_tc = 1, tc = 0;
>>>>         struct xps_map *map, *new_map;
>>>>         bool active = false;
>>>> +       unsigned int nr_ids;
>>>>
>>>>         if (dev->num_tc) {
>>>>                 num_tc = dev->num_tc;
>>>> @@ -2221,16 +2245,33 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>>>                         return -EINVAL;
>>>>         }
>>>>
>>>> -       maps_sz = XPS_DEV_MAPS_SIZE(num_tc);
>>>> +       switch (type) {
>>>> +       case XPS_MAP_RXQS:
>>>> +               maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
>>>> +               dev_maps = xmap_dereference(dev->xps_maps[XPS_MAP_RXQS]);
>>>> +               nr_ids = dev->num_rx_queues;
>>>> +               break;
>>>> +       case XPS_MAP_CPUS:
>>>> +               maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc);
>>>> +               if (num_possible_cpus() > 1) {
>>>> +                       online_mask = cpumask_bits(cpu_online_mask);
>>>> +                       possible_mask = cpumask_bits(cpu_possible_mask);
>>>> +               }
>>>> +               dev_maps = xmap_dereference(dev->xps_maps[XPS_MAP_CPUS]);
>>>> +               nr_ids = nr_cpu_ids;
>>>> +               break;
>>>> +       default:
>>>> +               return -EINVAL;
>>>> +       }
>>>> +
>>>>         if (maps_sz < L1_CACHE_BYTES)
>>>>                 maps_sz = L1_CACHE_BYTES;
>>>>
>>>>         mutex_lock(&xps_map_mutex);
>>>>
>>>> -       dev_maps = xmap_dereference(dev->xps_maps);
>>>> -
>>>>         /* allocate memory for queue storage */
>>>> -       for_each_cpu_and(cpu, cpu_online_mask, mask) {
>>>> +       for (j = -1; j = attrmask_next_and(j, online_mask, mask, nr_ids),
>>>> +            j < nr_ids;) {
>>>>                 if (!new_dev_maps)
>>>>                         new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
>>>>                 if (!new_dev_maps) {
>>>> @@ -2238,73 +2279,81 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>>>                         return -ENOMEM;
>>>>                 }
>>>>
>>>> -               tci = cpu * num_tc + tc;
>>>> -               map = dev_maps ? xmap_dereference(dev_maps->cpu_map[tci]) :
>>>> +               tci = j * num_tc + tc;
>>>> +               map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) :
>>>>                                  NULL;
>>>>
>>>> -               map = expand_xps_map(map, cpu, index);
>>>> +               map = expand_xps_map(map, j, index, type);
>>>>                 if (!map)
>>>>                         goto error;
>>>>
>>>> -               RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
>>>> +               RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
>>>>         }
>>>>
>>>>         if (!new_dev_maps)
>>>>                 goto out_no_new_maps;
>>>>
>>>> -       for_each_possible_cpu(cpu) {
>>>> +       for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
>>>> +            j < nr_ids;) {
>>>>                 /* copy maps belonging to foreign traffic classes */
>>>> -               for (i = tc, tci = cpu * num_tc; dev_maps && i--; tci++) {
>>>> +               for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) {
>>>>                         /* fill in the new device map from the old device map */
>>>> -                       map = xmap_dereference(dev_maps->cpu_map[tci]);
>>>> -                       RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
>>>> +                       map = xmap_dereference(dev_maps->attr_map[tci]);
>>>> +                       RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
>>>>                 }
>>>>
>>>>                 /* We need to explicitly update tci as prevous loop
>>>>                  * could break out early if dev_maps is NULL.
>>>>                  */
>>>> -               tci = cpu * num_tc + tc;
>>>> +               tci = j * num_tc + tc;
>>>>
>>>> -               if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
>>>> -                       /* add queue to CPU maps */
>>>> +               if (attr_test_mask(j, mask, nr_ids) &&
>>>> +                   attr_test_online(j, online_mask, nr_ids)) {
>>>> +                       /* add tx-queue to CPU/rx-queue maps */
>>>>                         int pos = 0;
>>>>
>>>> -                       map = xmap_dereference(new_dev_maps->cpu_map[tci]);
>>>> +                       map = xmap_dereference(new_dev_maps->attr_map[tci]);
>>>>                         while ((pos < map->len) && (map->queues[pos] != index))
>>>>                                 pos++;
>>>>
>>>>                         if (pos == map->len)
>>>>                                 map->queues[map->len++] = index;
>>>>  #ifdef CONFIG_NUMA
>>>> -                       if (numa_node_id == -2)
>>>> -                               numa_node_id = cpu_to_node(cpu);
>>>> -                       else if (numa_node_id != cpu_to_node(cpu))
>>>> -                               numa_node_id = -1;
>>>> +                       if (type == XPS_MAP_CPUS) {
>>>> +                               if (numa_node_id == -2)
>>>> +                                       numa_node_id = cpu_to_node(j);
>>>> +                               else if (numa_node_id != cpu_to_node(j))
>>>> +                                       numa_node_id = -1;
>>>> +                       }
>>>>  #endif
>>>>                 } else if (dev_maps) {
>>>>                         /* fill in the new device map from the old device map */
>>>> -                       map = xmap_dereference(dev_maps->cpu_map[tci]);
>>>> -                       RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
>>>> +                       map = xmap_dereference(dev_maps->attr_map[tci]);
>>>> +                       RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
>>>>                 }
>>>>
>>>>                 /* copy maps belonging to foreign traffic classes */
>>>>                 for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
>>>>                         /* fill in the new device map from the old device map */
>>>> -                       map = xmap_dereference(dev_maps->cpu_map[tci]);
>>>> -                       RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
>>>> +                       map = xmap_dereference(dev_maps->attr_map[tci]);
>>>> +                       RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
>>>>                 }
>>>>         }
>>>>
>>>> -       rcu_assign_pointer(dev->xps_maps, new_dev_maps);
>>>> +       if (type == XPS_MAP_RXQS)
>>>> +               rcu_assign_pointer(dev->xps_maps[XPS_MAP_RXQS], new_dev_maps);
>>>> +       else if (type == XPS_MAP_CPUS)
>>>> +               rcu_assign_pointer(dev->xps_maps[XPS_MAP_CPUS], new_dev_maps);
>>>>
>>>>         /* Cleanup old maps */
>>>>         if (!dev_maps)
>>>>                 goto out_no_old_maps;
>>>>
>>>> -       for_each_possible_cpu(cpu) {
>>>> -               for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
>>>> -                       new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
>>>> -                       map = xmap_dereference(dev_maps->cpu_map[tci]);
>>>> +       for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
>>>> +            j < nr_ids;) {
>>>> +               for (i = num_tc, tci = j * num_tc; i--; tci++) {
>>>> +                       new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
>>>> +                       map = xmap_dereference(dev_maps->attr_map[tci]);
>>>>                         if (map && map != new_map)
>>>>                                 kfree_rcu(map, rcu);
>>>>                 }
>>>> @@ -2317,19 +2366,23 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>>>         active = true;
>>>>
>>>>  out_no_new_maps:
>>>> -       /* update Tx queue numa node */
>>>> -       netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
>>>> -                                    (numa_node_id >= 0) ? numa_node_id :
>>>> -                                    NUMA_NO_NODE);
>>>> +       if (type == XPS_MAP_CPUS) {
>>>> +               /* update Tx queue numa node */
>>>> +               netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
>>>> +                                            (numa_node_id >= 0) ?
>>>> +                                            numa_node_id : NUMA_NO_NODE);
>>>> +       }
>>>>
>>>>         if (!dev_maps)
>>>>                 goto out_no_maps;
>>>>
>>>> -       /* removes queue from unused CPUs */
>>>> -       for_each_possible_cpu(cpu) {
>>>> -               for (i = tc, tci = cpu * num_tc; i--; tci++)
>>>> +       /* removes tx-queue from unused CPUs/rx-queues */
>>>> +       for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
>>>> +            j < nr_ids;) {
>>>> +               for (i = tc, tci = j * num_tc; i--; tci++)
>>>>                         active |= remove_xps_queue(dev_maps, tci, index);
>>>> -               if (!cpumask_test_cpu(cpu, mask) || !cpu_online(cpu))
>>>> +               if (!attr_test_mask(j, mask, nr_ids) ||
>>>> +                   !attr_test_online(j, online_mask, nr_ids))
>>>>                         active |= remove_xps_queue(dev_maps, tci, index);
>>>>                 for (i = num_tc - tc, tci++; --i; tci++)
>>>>                         active |= remove_xps_queue(dev_maps, tci, index);
>>>> @@ -2337,7 +2390,10 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>>>
>>>>         /* free map if not active */
>>>>         if (!active) {
>>>> -               RCU_INIT_POINTER(dev->xps_maps, NULL);
>>>> +               if (type == XPS_MAP_RXQS)
>>>> +                       RCU_INIT_POINTER(dev->xps_maps[XPS_MAP_RXQS], NULL);
>>>> +               else if (type == XPS_MAP_CPUS)
>>>> +                       RCU_INIT_POINTER(dev->xps_maps[XPS_MAP_CPUS], NULL);
>>>>                 kfree_rcu(dev_maps, rcu);
>>>>         }
>>>>
>>>> @@ -2347,11 +2403,12 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>>>         return 0;
>>>>  error:
>>>>         /* remove any maps that we added */
>>>> -       for_each_possible_cpu(cpu) {
>>>> -               for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
>>>> -                       new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
>>>> +       for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
>>>> +            j < nr_ids;) {
>>>> +               for (i = num_tc, tci = j * num_tc; i--; tci++) {
>>>> +                       new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
>>>>                         map = dev_maps ?
>>>> -                             xmap_dereference(dev_maps->cpu_map[tci]) :
>>>> +                             xmap_dereference(dev_maps->attr_map[tci]) :
>>>>                               NULL;
>>>>                         if (new_map && new_map != map)
>>>>                                 kfree(new_map);
>>>> @@ -2363,6 +2420,13 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>>>         kfree(new_dev_maps);
>>>>         return -ENOMEM;
>>>>  }
>>>> +
>>>> +int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>>> +                       u16 index)
>>>> +{
>>>> +       return __netif_set_xps_queue(dev, cpumask_bits(mask), index,
>>>> +                                    XPS_MAP_CPUS);
>>>> +}
>>>>  EXPORT_SYMBOL(netif_set_xps_queue);
>>>>
>>>>  #endif
>>>> @@ -3400,7 +3464,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
>>>>         int queue_index = -1;
>>>>
>>>>         rcu_read_lock();
>>>> -       dev_maps = rcu_dereference(dev->xps_maps);
>>>> +       dev_maps = rcu_dereference(dev->xps_maps[XPS_MAP_CPUS]);
>>>>         if (dev_maps) {
>>>>                 unsigned int tci = skb->sender_cpu - 1;
>>>>
>>>> @@ -3409,7 +3473,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
>>>>                         tci += netdev_get_prio_tc_map(dev, skb->priority);
>>>>                 }
>>>>
>>>> -               map = rcu_dereference(dev_maps->cpu_map[tci]);
>>>> +               map = rcu_dereference(dev_maps->attr_map[tci]);
>>>>                 if (map) {
>>>>                         if (map->len == 1)
>>>>                                 queue_index = map->queues[0];
>>>> diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
>>>> index c476f07..d7abd33 100644
>>>> --- a/net/core/net-sysfs.c
>>>> +++ b/net/core/net-sysfs.c
>>>> @@ -1227,13 +1227,13 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
>>>>         }
>>>>
>>>>         rcu_read_lock();
>>>> -       dev_maps = rcu_dereference(dev->xps_maps);
>>>> +       dev_maps = rcu_dereference(dev->xps_maps[XPS_MAP_CPUS]);
>>>>         if (dev_maps) {
>>>>                 for_each_possible_cpu(cpu) {
>>>>                         int i, tci = cpu * num_tc + tc;
>>>>                         struct xps_map *map;
>>>>
>>>> -                       map = rcu_dereference(dev_maps->cpu_map[tci]);
>>>> +                       map = rcu_dereference(dev_maps->attr_map[tci]);
>>>>                         if (!map)
>>>>                                 continue;
>>>>
>>>>

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2018-05-14 17:18 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-20  1:04 [net-next PATCH 0/3] Symmetric queue selection using XPS for Rx queues Amritha Nambiar
2018-04-20  1:04 ` [net-next PATCH 1/3] net: Refactor XPS for CPUs and " Amritha Nambiar
2018-05-09 20:31   ` Tom Herbert
2018-05-09 20:54     ` Nambiar, Amritha
2018-05-14 16:53       ` Tom Herbert
2018-05-14 17:18         ` Nambiar, Amritha
2018-04-20  1:04 ` [net-next PATCH 2/3] net: Enable Tx queue selection based on " Amritha Nambiar
2018-04-20  1:04 ` [net-next PATCH 3/3] net-sysfs: Add interface for Rx queue map per Tx queue Amritha Nambiar
2018-04-20  2:41 ` [net-next PATCH 0/3] Symmetric queue selection using XPS for Rx queues Eric Dumazet
2018-05-08 15:15   ` Tom Herbert
2018-05-08 16:02     ` Alexander Duyck
2018-05-08 17:07       ` Eric Dumazet
2018-05-08 17:19         ` Alexander Duyck
2018-05-08 17:56           ` Nambiar, Amritha

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.