All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC 0/4] xfs: Transmit flow steering
@ 2016-08-31  0:00 Tom Herbert
  2016-08-31  0:00 ` [PATCH RFC 1/4] net: Set SW hash in skb_set_hash_from_sk Tom Herbert
                   ` (4 more replies)
  0 siblings, 5 replies; 11+ messages in thread
From: Tom Herbert @ 2016-08-31  0:00 UTC (permalink / raw)
  To: davem, netdev, rick.jones2; +Cc: kernel-team

This patch set introduces transmit flow steering. The idea is that we
record the transmit queues in a flow table that is indexed by skbuff.
The flow table entries have two values: the queue_index and the head cnt
of packets from the TX queue. We only allow a queue to change for a flow
if the tail cnt in the TX queue advances beyond the recorded head cnt.
That is the condition that should indicate that all outstanding packets
for the flow have completed transmission so the queue can change.

Tracking the inflight queue is performed as part of BQL. Two fields are
added to netdevice structure: head_cnt and tail_cnt. head_cnt is
incremented in netdev_tx_sent_queue and tail_cnt is incremented in
netdev_tx_completed_queue by the number of packets completed.

This patch set creates /sys/class/net/eth*/xps_dev_flow_table_cnt
which number of entries in the XPS flow table.

Tom Herbert (4):
  net: Set SW hash in skb_set_hash_from_sk
  bql: Add tracking of inflight packets
  net: Add xps_dev_flow_table_cnt
  xfs: Transmit flow steering

 include/linux/netdevice.h | 26 +++++++++++++
 include/net/sock.h        |  6 +--
 net/Kconfig               |  6 +++
 net/core/dev.c            | 93 +++++++++++++++++++++++++++++++++++++++--------
 net/core/net-sysfs.c      | 87 ++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 199 insertions(+), 19 deletions(-)

-- 
2.8.0.rc2

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH RFC 1/4] net: Set SW hash in skb_set_hash_from_sk
  2016-08-31  0:00 [PATCH RFC 0/4] xfs: Transmit flow steering Tom Herbert
@ 2016-08-31  0:00 ` Tom Herbert
  2016-08-31  0:00 ` [PATCH RFC 2/4] bql: Add tracking of inflight packets Tom Herbert
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 11+ messages in thread
From: Tom Herbert @ 2016-08-31  0:00 UTC (permalink / raw)
  To: davem, netdev, rick.jones2; +Cc: kernel-team

Use the __skb_set_sw_hash to set the hash in an skbuff from the socket
txhash.

Signed-off-by: Tom Herbert <tom@herbertland.com>
---
 include/net/sock.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index c797c57..12e585c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1910,10 +1910,8 @@ static inline void sock_poll_wait(struct file *filp,
 
 static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk)
 {
-	if (sk->sk_txhash) {
-		skb->l4_hash = 1;
-		skb->hash = sk->sk_txhash;
-	}
+	if (sk->sk_txhash)
+		__skb_set_sw_hash(skb, sk->sk_txhash, true);
 }
 
 void skb_set_owner_w(struct sk_buff *skb, struct sock *sk);
-- 
2.8.0.rc2

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH RFC 2/4] bql: Add tracking of inflight packets
  2016-08-31  0:00 [PATCH RFC 0/4] xfs: Transmit flow steering Tom Herbert
  2016-08-31  0:00 ` [PATCH RFC 1/4] net: Set SW hash in skb_set_hash_from_sk Tom Herbert
@ 2016-08-31  0:00 ` Tom Herbert
  2016-08-31  9:23   ` Jesper Dangaard Brouer
  2016-08-31 13:08   ` Eric Dumazet
  2016-08-31  0:00 ` [PATCH RFC 3/4] net: Add xps_dev_flow_table_cnt Tom Herbert
                   ` (2 subsequent siblings)
  4 siblings, 2 replies; 11+ messages in thread
From: Tom Herbert @ 2016-08-31  0:00 UTC (permalink / raw)
  To: davem, netdev, rick.jones2; +Cc: kernel-team

Add two fields to netdev_queue as head_cnt and tail_cnt. head_cnt is
incremented for every sent packet in netdev_tx_sent_queue and tail_cnt
is incremented by the number of packets in netdev_tx_completed_queue.
So then the number of inflight packets for a queue is simply
queue->head_cnt - queue->tail_cnt.

Add inflight_pkts to be reported in sys-fs.

Signed-off-by: Tom Herbert <tom@herbertland.com>
---
 include/linux/netdevice.h |  4 ++++
 net/core/net-sysfs.c      | 11 +++++++++++
 2 files changed, 15 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d122be9..487d1df 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -592,6 +592,8 @@ struct netdev_queue {
 
 #ifdef CONFIG_BQL
 	struct dql		dql;
+	unsigned int		head_cnt;
+	unsigned int		tail_cnt;
 #endif
 } ____cacheline_aligned_in_smp;
 
@@ -2958,6 +2960,7 @@ static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue,
 					unsigned int bytes)
 {
 #ifdef CONFIG_BQL
+	dev_queue->head_cnt++;
 	dql_queued(&dev_queue->dql, bytes);
 
 	if (likely(dql_avail(&dev_queue->dql) >= 0))
@@ -2999,6 +3002,7 @@ static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue,
 	if (unlikely(!bytes))
 		return;
 
+	dev_queue->tail_cnt += pkts;
 	dql_completed(&dev_queue->dql, bytes);
 
 	/*
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 6e4f347..5a33f6a 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1147,6 +1147,16 @@ static ssize_t bql_show_inflight(struct netdev_queue *queue,
 static struct netdev_queue_attribute bql_inflight_attribute =
 	__ATTR(inflight, S_IRUGO, bql_show_inflight, NULL);
 
+static ssize_t bql_show_inflight_pkts(struct netdev_queue *queue,
+				      struct netdev_queue_attribute *attr,
+				      char *buf)
+{
+	return sprintf(buf, "%u\n", queue->head_cnt - queue->tail_cnt);
+}
+
+static struct netdev_queue_attribute bql_inflight_pkts_attribute =
+	__ATTR(inflight_pkts, S_IRUGO, bql_show_inflight_pkts, NULL);
+
 #define BQL_ATTR(NAME, FIELD)						\
 static ssize_t bql_show_ ## NAME(struct netdev_queue *queue,		\
 				 struct netdev_queue_attribute *attr,	\
@@ -1176,6 +1186,7 @@ static struct attribute *dql_attrs[] = {
 	&bql_limit_min_attribute.attr,
 	&bql_hold_time_attribute.attr,
 	&bql_inflight_attribute.attr,
+	&bql_inflight_pkts_attribute.attr,
 	NULL
 };
 
-- 
2.8.0.rc2

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH RFC 3/4] net: Add xps_dev_flow_table_cnt
  2016-08-31  0:00 [PATCH RFC 0/4] xfs: Transmit flow steering Tom Herbert
  2016-08-31  0:00 ` [PATCH RFC 1/4] net: Set SW hash in skb_set_hash_from_sk Tom Herbert
  2016-08-31  0:00 ` [PATCH RFC 2/4] bql: Add tracking of inflight packets Tom Herbert
@ 2016-08-31  0:00 ` Tom Herbert
  2016-08-31  0:00 ` [PATCH RFC 4/4] xfs: Transmit flow steering Tom Herbert
  2016-09-28 15:13 ` [PATCH RFC 0/4] " Rick Jones
  4 siblings, 0 replies; 11+ messages in thread
From: Tom Herbert @ 2016-08-31  0:00 UTC (permalink / raw)
  To: davem, netdev, rick.jones2; +Cc: kernel-team

Add infrastructure and definitions to create XFS flow tables. This
creates the new sys entry /sys/class/net/eth*/xps_dev_flow_table_cnt

Signed-off-by: Tom Herbert <tom@herbertland.com>
---
 include/linux/netdevice.h | 22 ++++++++++++++
 net/core/net-sysfs.c      | 76 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 98 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 487d1df..d30e1bb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -736,8 +736,28 @@ struct xps_dev_maps {
 };
 #define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) +		\
     (nr_cpu_ids * sizeof(struct xps_map *)))
+
+struct xps_dev_flow {
+	union {
+		u64	v64;
+		struct {
+			int		queue_index;
+			unsigned int	queue_ptr;
+		};
+	};
+};
+
+struct xps_dev_flow_table {
+	unsigned int mask;
+	struct rcu_head rcu;
+	struct xps_dev_flow flows[0];
+};
+#define XPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct xps_dev_flow_table) + \
+	((_num) * sizeof(struct xps_dev_flow)))
+
 #endif /* CONFIG_XPS */
 
+
 #define TC_MAX_QUEUE	16
 #define TC_BITMASK	15
 /* HW offloaded queuing disciplines txq count and offset maps */
@@ -1810,6 +1830,8 @@ struct net_device {
 
 #ifdef CONFIG_XPS
 	struct xps_dev_maps __rcu *xps_maps;
+	struct xps_dev_flow_table __rcu *xps_flow_table;
+	unsigned int xps_dev_flow_table_cnt;
 #endif
 #ifdef CONFIG_NET_CLS_ACT
 	struct tcf_proto __rcu  *egress_cl_list;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 5a33f6a..41d0bc9 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -503,6 +503,79 @@ static ssize_t phys_switch_id_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(phys_switch_id);
 
+#ifdef CONFIG_XPS
+static void xps_dev_flow_table_release(struct rcu_head *rcu)
+{
+	struct xps_dev_flow_table *table = container_of(rcu,
+	    struct xps_dev_flow_table, rcu);
+	vfree(table);
+}
+
+static int change_xps_dev_flow_table_cnt(struct net_device *dev,
+					 unsigned long count)
+{
+	unsigned long mask;
+	struct xps_dev_flow_table *table, *old_table;
+	static DEFINE_SPINLOCK(xps_dev_flow_lock);
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (count) {
+		mask = count - 1;
+		/* mask = roundup_pow_of_two(count) - 1;
+		 * without overflows...
+		 */
+		while ((mask | (mask >> 1)) != mask)
+			mask |= (mask >> 1);
+		/* On 64 bit arches, must check mask fits in table->mask (u32),
+		 * and on 32bit arches, must check
+		 * XPS_DEV_FLOW_TABLE_SIZE(mask + 1) doesn't overflow.
+		 */
+#if BITS_PER_LONG > 32
+		if (mask > (unsigned long)(u32)mask)
+			return -EINVAL;
+#else
+		if (mask > (ULONG_MAX - XPS_DEV_FLOW_TABLE_SIZE(1))
+				/ sizeof(struct xps_dev_flow)) {
+			/* Enforce a limit to prevent overflow */
+			return -EINVAL;
+		}
+#endif
+		table = vmalloc(XPS_DEV_FLOW_TABLE_SIZE(mask + 1));
+		if (!table)
+			return -ENOMEM;
+
+		table->mask = mask;
+		for (count = 0; count <= mask; count++)
+			table->flows[count].queue_index = -1;
+	} else
+		table = NULL;
+
+	spin_lock(&xps_dev_flow_lock);
+	old_table = rcu_dereference_protected(dev->xps_flow_table,
+					      lockdep_is_held(&xps_dev_flow_lock));
+	rcu_assign_pointer(dev->xps_flow_table, table);
+	dev->xps_dev_flow_table_cnt = count;
+	spin_unlock(&xps_dev_flow_lock);
+
+	if (old_table)
+		call_rcu(&old_table->rcu, xps_dev_flow_table_release);
+
+	return 0;
+}
+
+static ssize_t xps_dev_flow_table_cnt_store(struct device *dev,
+					    struct device_attribute *attr,
+					    const char *buf, size_t len)
+{
+	return netdev_store(dev, attr, buf, len, change_xps_dev_flow_table_cnt);
+}
+
+NETDEVICE_SHOW_RW(xps_dev_flow_table_cnt, fmt_dec);
+
+#endif
+
 static struct attribute *net_class_attrs[] = {
 	&dev_attr_netdev_group.attr,
 	&dev_attr_type.attr,
@@ -531,6 +604,9 @@ static struct attribute *net_class_attrs[] = {
 	&dev_attr_phys_port_name.attr,
 	&dev_attr_phys_switch_id.attr,
 	&dev_attr_proto_down.attr,
+#ifdef CONFIG_XPS
+	&dev_attr_xps_dev_flow_table_cnt.attr,
+#endif
 	NULL,
 };
 ATTRIBUTE_GROUPS(net_class);
-- 
2.8.0.rc2

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH RFC 4/4] xfs: Transmit flow steering
  2016-08-31  0:00 [PATCH RFC 0/4] xfs: Transmit flow steering Tom Herbert
                   ` (2 preceding siblings ...)
  2016-08-31  0:00 ` [PATCH RFC 3/4] net: Add xps_dev_flow_table_cnt Tom Herbert
@ 2016-08-31  0:00 ` Tom Herbert
  2016-08-31  3:14   ` Alexander Duyck
  2016-08-31 18:34   ` Chris Mason
  2016-09-28 15:13 ` [PATCH RFC 0/4] " Rick Jones
  4 siblings, 2 replies; 11+ messages in thread
From: Tom Herbert @ 2016-08-31  0:00 UTC (permalink / raw)
  To: davem, netdev, rick.jones2; +Cc: kernel-team

XFS maintains a per device flow table that is indexed by the skbuff
hash. The XFS table is only consulted when there is no queue saved in
a transmit socket for an skbuff.

Each entry in the flow table contains a queue index and a queue
pointer. The queue pointer is set when a queue is chosen using a
flow table entry. This pointer is set to the head pointer in the
transmit queue (which is maintained by BQL).

The new function get_xfs_index that looks up flows in the XPS table.
The entry returned gives the last queue a matching flow used. The
returned queue is compared against the normal XPS queue. If they
are different, then we only switch if the tail pointer in the TX
queue has advanced past the pointer saved in the entry. In this
way OOO should be avoided when XPS wants to use a different queue.

Signed-off-by: Tom Herbert <tom@herbertland.com>
---
 net/Kconfig    |  6 ++++
 net/core/dev.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 84 insertions(+), 15 deletions(-)

diff --git a/net/Kconfig b/net/Kconfig
index 7b6cd34..5e3eddf 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -255,6 +255,12 @@ config XPS
 	depends on SMP
 	default y
 
+config XFS
+	bool
+	depends on XPS
+	depends on BQL
+	default y
+
 config HWBM
        bool
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 1d5c6dd..722e487 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3210,6 +3210,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
 }
 #endif /* CONFIG_NET_EGRESS */
 
+/* Must be called with RCU read_lock */
 static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 {
 #ifdef CONFIG_XPS
@@ -3217,7 +3218,6 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 	struct xps_map *map;
 	int queue_index = -1;
 
-	rcu_read_lock();
 	dev_maps = rcu_dereference(dev->xps_maps);
 	if (dev_maps) {
 		map = rcu_dereference(
@@ -3232,7 +3232,6 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 				queue_index = -1;
 		}
 	}
-	rcu_read_unlock();
 
 	return queue_index;
 #else
@@ -3240,26 +3239,90 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 #endif
 }
 
-static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
+/* Must be called with RCU read_lock */
+static int get_xfs_index(struct net_device *dev, struct sk_buff *skb)
 {
-	struct sock *sk = skb->sk;
-	int queue_index = sk_tx_queue_get(sk);
+#ifdef CONFIG_XFS
+	struct xps_dev_flow_table *flow_table;
+	struct xps_dev_flow ent;
+	int queue_index;
+	struct netdev_queue *txq;
+	u32 hash;
 
-	if (queue_index < 0 || skb->ooo_okay ||
-	    queue_index >= dev->real_num_tx_queues) {
-		int new_index = get_xps_queue(dev, skb);
-		if (new_index < 0)
-			new_index = skb_tx_hash(dev, skb);
+	flow_table = rcu_dereference(dev->xps_flow_table);
+	if (!flow_table)
+		return -1;
 
-		if (queue_index != new_index && sk &&
-		    sk_fullsock(sk) &&
-		    rcu_access_pointer(sk->sk_dst_cache))
-			sk_tx_queue_set(sk, new_index);
+	queue_index = get_xps_queue(dev, skb);
+	if (queue_index < 0)
+		return -1;
 
-		queue_index = new_index;
+	hash = skb_get_hash(skb);
+	if (!hash)
+		return -1;
+
+	ent.v64 = flow_table->flows[hash & flow_table->mask].v64;
+	if (ent.queue_index >= 0 &&
+	    ent.queue_index < dev->real_num_tx_queues) {
+		txq = netdev_get_tx_queue(dev, ent.queue_index);
+		if (queue_index != ent.queue_index) {
+			if ((int)(txq->tail_cnt - ent.queue_ptr) >= 0)  {
+				/* The current queue's tail has advanced
+				 * beyone the last packet that was
+				 * enqueued using the table entry. All
+				 * previous packets sent for this flow
+				 * should have been completed so the
+				 * queue for the flow cna be changed.
+				 */
+				ent.queue_index = queue_index;
+				txq = netdev_get_tx_queue(dev, queue_index);
+			} else {
+				queue_index = ent.queue_index;
+			}
+		}
+	} else {
+		/* Queue from the table was bad, use the new one. */
+		ent.queue_index = queue_index;
+		txq = netdev_get_tx_queue(dev, queue_index);
 	}
 
+	/* Save the updated entry */
+	ent.queue_ptr = txq->head_cnt;
+	flow_table->flows[hash & flow_table->mask].v64 = ent.v64;
+
 	return queue_index;
+#else
+	return = get_xps_queue(dev, skb);
+#endif
+}
+
+static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+	int queue_index = sk_tx_queue_get(sk);
+	int new_index;
+
+	if (queue_index < 0) {
+		/* Socket did not provide a queue index, try XFS */
+		new_index = get_xfs_index(dev, skb);
+	} else if (skb->ooo_okay || queue_index >= dev->real_num_tx_queues) {
+		/* Queue index in socket, see if we can find a better one */
+		new_index = get_xps_queue(dev, skb);
+	} else {
+		/* Valid queue in socket and can't send OOO. Just return it */
+		return queue_index;
+	}
+
+	if (new_index < 0) {
+		/* No queue index from flow steering, fallback to hash */
+		new_index = skb_tx_hash(dev, skb);
+	}
+
+	if (queue_index != new_index && sk && sk_fullsock(sk) &&
+	    rcu_access_pointer(sk->sk_dst_cache))
+		sk_tx_queue_set(sk, new_index);
+
+	return new_index;
 }
 
 struct netdev_queue *netdev_pick_tx(struct net_device *dev,
-- 
2.8.0.rc2

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH RFC 4/4] xfs: Transmit flow steering
  2016-08-31  0:00 ` [PATCH RFC 4/4] xfs: Transmit flow steering Tom Herbert
@ 2016-08-31  3:14   ` Alexander Duyck
  2016-08-31 18:34   ` Chris Mason
  1 sibling, 0 replies; 11+ messages in thread
From: Alexander Duyck @ 2016-08-31  3:14 UTC (permalink / raw)
  To: Tom Herbert; +Cc: David Miller, Netdev, Rick Jones, Kernel Team

On Tue, Aug 30, 2016 at 5:00 PM, Tom Herbert <tom@herbertland.com> wrote:
> XFS maintains a per device flow table that is indexed by the skbuff
> hash. The XFS table is only consulted when there is no queue saved in
> a transmit socket for an skbuff.
>
> Each entry in the flow table contains a queue index and a queue
> pointer. The queue pointer is set when a queue is chosen using a
> flow table entry. This pointer is set to the head pointer in the
> transmit queue (which is maintained by BQL).
>
> The new function get_xfs_index that looks up flows in the XPS table.
> The entry returned gives the last queue a matching flow used. The
> returned queue is compared against the normal XPS queue. If they
> are different, then we only switch if the tail pointer in the TX
> queue has advanced past the pointer saved in the entry. In this
> way OOO should be avoided when XPS wants to use a different queue.
>
> Signed-off-by: Tom Herbert <tom@herbertland.com>

This looks pretty good.  I haven't had a chance to test it though as
it will probably take me a few days.

A few minor items called out below.

Thanks.

- Alex

> ---
>  net/Kconfig    |  6 ++++
>  net/core/dev.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++----------
>  2 files changed, 84 insertions(+), 15 deletions(-)
>
> diff --git a/net/Kconfig b/net/Kconfig
> index 7b6cd34..5e3eddf 100644
> --- a/net/Kconfig
> +++ b/net/Kconfig
> @@ -255,6 +255,12 @@ config XPS
>         depends on SMP
>         default y
>
> +config XFS
> +       bool
> +       depends on XPS
> +       depends on BQL
> +       default y
> +
>  config HWBM
>         bool
>
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 1d5c6dd..722e487 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -3210,6 +3210,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
>  }
>  #endif /* CONFIG_NET_EGRESS */
>
> +/* Must be called with RCU read_lock */
>  static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
>  {
>  #ifdef CONFIG_XPS
> @@ -3217,7 +3218,6 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
>         struct xps_map *map;
>         int queue_index = -1;
>
> -       rcu_read_lock();
>         dev_maps = rcu_dereference(dev->xps_maps);
>         if (dev_maps) {
>                 map = rcu_dereference(
> @@ -3232,7 +3232,6 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
>                                 queue_index = -1;
>                 }
>         }
> -       rcu_read_unlock();
>
>         return queue_index;
>  #else
> @@ -3240,26 +3239,90 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
>  #endif
>  }
>
> -static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
> +/* Must be called with RCU read_lock */
> +static int get_xfs_index(struct net_device *dev, struct sk_buff *skb)
>  {
> -       struct sock *sk = skb->sk;
> -       int queue_index = sk_tx_queue_get(sk);
> +#ifdef CONFIG_XFS
> +       struct xps_dev_flow_table *flow_table;
> +       struct xps_dev_flow ent;
> +       int queue_index;
> +       struct netdev_queue *txq;
> +       u32 hash;
>
> -       if (queue_index < 0 || skb->ooo_okay ||
> -           queue_index >= dev->real_num_tx_queues) {
> -               int new_index = get_xps_queue(dev, skb);
> -               if (new_index < 0)
> -                       new_index = skb_tx_hash(dev, skb);
> +       flow_table = rcu_dereference(dev->xps_flow_table);
> +       if (!flow_table)
> +               return -1;
>
> -               if (queue_index != new_index && sk &&
> -                   sk_fullsock(sk) &&
> -                   rcu_access_pointer(sk->sk_dst_cache))
> -                       sk_tx_queue_set(sk, new_index);
> +       queue_index = get_xps_queue(dev, skb);
> +       if (queue_index < 0)
> +               return -1;

Actually I think this bit here probably needs to fall back to using
skb_tx_hash if you don't get a usable result.  The problem is you
could have a system that is running with a mix of XFS assigned for
some CPUs and just using skb_tx_hash for others.  We shouldn't steal
flows from the ones selected using skb_tx_hash until they have met the
flow transition criteria.

> -               queue_index = new_index;
> +       hash = skb_get_hash(skb);
> +       if (!hash)
> +               return -1;

I'm not sure the !hash test makes any sense.  Isn't 0 a valid hash value?

> +       ent.v64 = flow_table->flows[hash & flow_table->mask].v64;
> +       if (ent.queue_index >= 0 &&
> +           ent.queue_index < dev->real_num_tx_queues) {
> +               txq = netdev_get_tx_queue(dev, ent.queue_index);
> +               if (queue_index != ent.queue_index) {
> +                       if ((int)(txq->tail_cnt - ent.queue_ptr) >= 0)  {
> +                               /* The current queue's tail has advanced
> +                                * beyone the last packet that was

Small typo here, "beyond" instead of "beyone".

> +                                * enqueued using the table entry. All
> +                                * previous packets sent for this flow
> +                                * should have been completed so the
> +                                * queue for the flow cna be changed.
> +                                */
> +                               ent.queue_index = queue_index;
> +                               txq = netdev_get_tx_queue(dev, queue_index);

You might want to consider using a goto label instead of duplicating
this block of code here and in the else section below.

> +                       } else {
> +                               queue_index = ent.queue_index;
> +                       }
> +               }
> +       } else {
> +               /* Queue from the table was bad, use the new one. */
> +               ent.queue_index = queue_index;
> +               txq = netdev_get_tx_queue(dev, queue_index);
>         }
>
> +       /* Save the updated entry */
> +       ent.queue_ptr = txq->head_cnt;
> +       flow_table->flows[hash & flow_table->mask].v64 = ent.v64;
> +
>         return queue_index;
> +#else
> +       return = get_xps_queue(dev, skb);
> +#endif
> +}
> +
> +static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
> +{
> +       struct sock *sk = skb->sk;
> +       int queue_index = sk_tx_queue_get(sk);
> +       int new_index;
> +
> +       if (queue_index < 0) {
> +               /* Socket did not provide a queue index, try XFS */
> +               new_index = get_xfs_index(dev, skb);
> +       } else if (skb->ooo_okay || queue_index >= dev->real_num_tx_queues) {
> +               /* Queue index in socket, see if we can find a better one */
> +               new_index = get_xps_queue(dev, skb);
> +       } else {
> +               /* Valid queue in socket and can't send OOO. Just return it */
> +               return queue_index;
> +       }
> +
> +       if (new_index < 0) {
> +               /* No queue index from flow steering, fallback to hash */
> +               new_index = skb_tx_hash(dev, skb);
> +       }

You could just move the comment above the if statement and drop the braces.

> +
> +       if (queue_index != new_index && sk && sk_fullsock(sk) &&
> +           rcu_access_pointer(sk->sk_dst_cache))
> +               sk_tx_queue_set(sk, new_index);
> +
> +       return new_index;
>  }
>
>  struct netdev_queue *netdev_pick_tx(struct net_device *dev,
> --
> 2.8.0.rc2
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH RFC 2/4] bql: Add tracking of inflight packets
  2016-08-31  0:00 ` [PATCH RFC 2/4] bql: Add tracking of inflight packets Tom Herbert
@ 2016-08-31  9:23   ` Jesper Dangaard Brouer
  2016-08-31 13:08   ` Eric Dumazet
  1 sibling, 0 replies; 11+ messages in thread
From: Jesper Dangaard Brouer @ 2016-08-31  9:23 UTC (permalink / raw)
  To: Tom Herbert
  Cc: brouer, davem, netdev, rick.jones2, kernel-team, Achiad Shochat

On Tue, 30 Aug 2016 17:00:32 -0700
Tom Herbert <tom@herbertland.com> wrote:

> Add two fields to netdev_queue as head_cnt and tail_cnt. head_cnt is
> incremented for every sent packet in netdev_tx_sent_queue and tail_cnt
> is incremented by the number of packets in netdev_tx_completed_queue.
> So then the number of inflight packets for a queue is simply
> queue->head_cnt - queue->tail_cnt.
> 
> Add inflight_pkts to be reported in sys-fs.

I like the idea of BQL tracking inflight packets, because we could use this
to determine _when_ qdisc bulking could be beneficial (activating xmit_more).

Idea from NetDev1.1 slides[1] page 17, and experiment with BQL
byte_queue_limits/limit_max on page 18 (which would really need a pkt
count not a byte count)

[1] http://people.netfilter.org/hawk/presentations/NetDev1.1_2016/net_performance_BoF.pdf
[2] http://people.netfilter.org/hawk/presentations/NetDev1.1_2016/links.html
-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  Author of http://www.iptv-analyzer.org
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH RFC 2/4] bql: Add tracking of inflight packets
  2016-08-31  0:00 ` [PATCH RFC 2/4] bql: Add tracking of inflight packets Tom Herbert
  2016-08-31  9:23   ` Jesper Dangaard Brouer
@ 2016-08-31 13:08   ` Eric Dumazet
  1 sibling, 0 replies; 11+ messages in thread
From: Eric Dumazet @ 2016-08-31 13:08 UTC (permalink / raw)
  To: Tom Herbert; +Cc: davem, netdev, rick.jones2, kernel-team

On Tue, 2016-08-30 at 17:00 -0700, Tom Herbert wrote:
> Add two fields to netdev_queue as head_cnt and tail_cnt. head_cnt is
> incremented for every sent packet in netdev_tx_sent_queue and tail_cnt
> is incremented by the number of packets in netdev_tx_completed_queue.
> So then the number of inflight packets for a queue is simply
> queue->head_cnt - queue->tail_cnt.
> 
> Add inflight_pkts to be reported in sys-fs.
> 
> Signed-off-by: Tom Herbert <tom@herbertland.com>
> ---
>  include/linux/netdevice.h |  4 ++++
>  net/core/net-sysfs.c      | 11 +++++++++++
>  2 files changed, 15 insertions(+)
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index d122be9..487d1df 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -592,6 +592,8 @@ struct netdev_queue {
>  
>  #ifdef CONFIG_BQL
>  	struct dql		dql;
> +	unsigned int		head_cnt;
> +	unsigned int		tail_cnt;
>  #endif
>  } ____cacheline_aligned_in_smp;
>  

You probably should put these fields in the appropriate cache lines of
"struct dql" : It will provide better cache behavior and fill holes.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH RFC 4/4] xfs: Transmit flow steering
  2016-08-31  0:00 ` [PATCH RFC 4/4] xfs: Transmit flow steering Tom Herbert
  2016-08-31  3:14   ` Alexander Duyck
@ 2016-08-31 18:34   ` Chris Mason
  1 sibling, 0 replies; 11+ messages in thread
From: Chris Mason @ 2016-08-31 18:34 UTC (permalink / raw)
  To: Tom Herbert, davem, netdev, rick.jones2; +Cc: kernel-team



On 08/30/2016 08:00 PM, Tom Herbert wrote:
> XFS maintains a per device flow table that is indexed by the skbuff
> hash. The XFS table is only consulted when there is no queue saved in
> a transmit socket for an skbuff.
>
> Each entry in the flow table contains a queue index and a queue
> pointer. The queue pointer is set when a queue is chosen using a
> flow table entry. This pointer is set to the head pointer in the
> transmit queue (which is maintained by BQL).
>
> The new function get_xfs_index that looks up flows in the XPS table.
> The entry returned gives the last queue a matching flow used. The
> returned queue is compared against the normal XPS queue. If they
> are different, then we only switch if the tail pointer in the TX
> queue has advanced past the pointer saved in the entry. In this
> way OOO should be avoided when XPS wants to use a different queue.
>

I'd love for Dave Chinner to get some networking bug reports, but maybe 
we shouldn't call it XFS?

At least CONFIG_XFS should be something else.  It doesn't conflict now 
because we have CONFIG_XFS_FS, but even CONFIG_XFS_NET sounds like it's 
related to the filesystem instead of transmit flows.

[ Sorry, four patches in and all I do is complain about the name ]

-chris

> Signed-off-by: Tom Herbert <tom@herbertland.com>
> ---
>  net/Kconfig    |  6 ++++
>  net/core/dev.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++----------
>  2 files changed, 84 insertions(+), 15 deletions(-)
>
> diff --git a/net/Kconfig b/net/Kconfig
> index 7b6cd34..5e3eddf 100644
> --- a/net/Kconfig
> +++ b/net/Kconfig
> @@ -255,6 +255,12 @@ config XPS
>  	depends on SMP
>  	default y
>
> +config XFS
> +	bool
> +	depends on XPS
> +	depends on BQL
> +	default y
> +
>  config HWBM
>         bool

...

> -static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
> +/* Must be called with RCU read_lock */
> +static int get_xfs_index(struct net_device *dev, struct sk_buff *skb)
>  {
> -	struct sock *sk = skb->sk;
> -	int queue_index = sk_tx_queue_get(sk);
> +#ifdef CONFIG_XFS
> +	struct xps_dev_flow_table *flow_table;
> +	struct xps_dev_flow ent;
> +	int queue_index;
> +	struct netdev_queue *txq;
> +	u32 hash;

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH RFC 0/4] xfs: Transmit flow steering
  2016-08-31  0:00 [PATCH RFC 0/4] xfs: Transmit flow steering Tom Herbert
                   ` (3 preceding siblings ...)
  2016-08-31  0:00 ` [PATCH RFC 4/4] xfs: Transmit flow steering Tom Herbert
@ 2016-09-28 15:13 ` Rick Jones
  2016-10-07  9:18   ` Juerg Haefliger
  4 siblings, 1 reply; 11+ messages in thread
From: Rick Jones @ 2016-09-28 15:13 UTC (permalink / raw)
  To: Tom Herbert, davem, netdev; +Cc: kernel-team, Balliet, Drew, Juerg Haefliger


Here is a quick look at performance tests for the result of trying the
prototype fix for the packet reordering problem with VMs sending over
an XPS-configured NIC.  In particular, the Emulex/Avago/Broadcom
Skyhawk.  The fix was applied to a 4.4 kernel.

Before: 3884 Mbit/s
After: 8897 Mbit/s

That was from a VM on a node with a Skyhawk and 2 E5-2640 processors
to baremetal E5-2640 with a BE3.  Physical MTU was 1500, the VM's
vNIC's MTU was 1400.  Systems were HPE ProLiants in OS Control Mode
for power management, with the "performance" frequency governor
loaded. An OpenStack Mitaka setup with Distributed Virtual Router.

We had some other NIC types in the setup as well.  XPS was also
enabled on the ConnectX3-Pro.  It was not enabled on the 82599ES (a
function of the kernel being used, which had it disabled from the
first reports of XPS negatively affecting VM traffic at the beginning
of the year)

Average Mbit/s From NIC type To Bare Metal BE3:
NIC Type,
  CPU on VM Host            Before        After
------------------------------------------------
ConnectX-3 Pro,E5-2670v3    9224         9271
BE3, E5-2640                9016	 9022
82599, E5-2640              9192	 9003
BCM57840, E5-2640           9213	 9153
Skyhawk, E5-2640            3884	 8897

For completeness:
Average Mbit/s To NIC type from Bare Metal BE3:
NIC Type,
  CPU on VM Host            Before        After
------------------------------------------------
ConnectX-3 Pro,E5-2670v3    9322         9144
BE3, E5-2640		    9074	 9017
82599, E5-2640              8670	 8564
BCM57840, E5-2640           2468 *	 7979
Skyhawk, E5-2640            8897	 9269

* This is the Busted bnx2x NIC FW GRO implementation issue.  It was
   not visible in the "After" because the system was setup to disable
   the NIC FW GRO by the time it booted on the fix kernel.

Average Transactions/s Between NIC type and Bare Metal BE3:
NIC Type,
  CPU on VM Host            Before        After
------------------------------------------------
ConnectX-3 Pro,E5-2670v3   12421         12612
BE3, E5-2640		    8178	  8484
82599, E5-2640              8499	  8549
BCM57840, E5-2640           8544	  8560
Skyhawk, E5-2640            8537	  8701

happy benchmarking,

Drew Balliet
Jeurg Haefliger
rick jones

The semi-cooked results with additional statistics:

554M  - BE3
544+M - ConnectX-3 Pro
560M - 82599ES
630M - BCM57840
650M - Skyhawk

(substitute is simply replacing a system name with the model of NIC and CPU)
Bulk To (South) and From (North) VM, Before:
$ ../substitute.sh 
vxlan_554m_control_performance_gvnr_dvr_northsouth_stream.log | 
~/netperf2_trunk/doc/examples/parse_single_stream.py -r -5 -f 1 -f 3 -f 
4 -f 7 -f 8
Field1,Field3,Field4,Field7,Field8,Min,P10,Median,Average,P90,P99,Max,Count
North,560M,E5-2640,554FLB,E5-2640,8148.090,9048.830,9235.400,9192.868,9315.980,9338.845,9339.500,113
North,630M,E5-2640,554FLB,E5-2640,8909.980,9113.238,9234.750,9213.140,9299.442,9336.206,9337.830,47
North,544+M,E5-2670v3,554FLB,E5-2640,9013.740,9182.546,9229.620,9224.025,9264.036,9299.206,9301.970,99
North,650M,E5-2640,554FLB,E5-2640,3187.680,3393.724,3796.160,3884.765,4405.096,4941.391,4956.300,129
North,554M,E5-2640,554FLB,E5-2640,8700.930,8855.768,9026.030,9016.061,9158.846,9213.687,9226.150,135
South,554FLB,E5-2640,560M,E5-2640,7754.350,8193.114,8718.540,8670.612,9026.436,9262.355,9285.010,113
South,554FLB,E5-2640,630M,E5-2640,1897.660,2068.290,2514.430,2468.323,2787.162,2942.934,2957.250,53
South,554FLB,E5-2640,544+M,E5-2670v3,9298.260,9314.432,9323.220,9322.207,9328.324,9330.704,9331.080,100
South,554FLB,E5-2640,650M,E5-2640,8407.050,8907.136,9304.390,9206.776,9321.320,9325.347,9326.410,103
South,554FLB,E5-2640,554M,E5-2640,7844.900,8632.530,9199.385,9074.535,9308.070,9319.224,9322.360,132
0 too-short lines ignored.

Bulk To (South) and From (North) VM, After:

$ ../substitute.sh 
vxlan_554m_control_performance_gvnr_xpsfix_dvr_northsouth_stream.log | 
~/netperf2_trunk/doc/examples/parse_single_stream.py -r -5 -f 1 -f 3 -f 
4 -f 7 -f 8
Field1,Field3,Field4,Field7,Field8,Min,P10,Median,Average,P90,P99,Max,Count
North,560M,E5-2640,554FLB,E5-2640,7576.790,8213.890,9182.870,9003.190,9295.975,9315.878,9318.160,36
North,630M,E5-2640,554FLB,E5-2640,8811.800,8924.000,9206.660,9153.076,9306.287,9315.152,9315.790,12
North,544+M,E5-2670v3,554FLB,E5-2640,9135.990,9228.520,9277.465,9271.875,9324.545,9339.604,9339.780,46
North,650M,E5-2640,554FLB,E5-2640,8133.420,8483.340,8995.040,8897.779,9129.056,9165.230,9165.860,43
North,554M,E5-2640,554FLB,E5-2640,8438.390,8879.150,9048.590,9022.813,9181.540,9248.650,9297.660,101
South,554FLB,E5-2640,630M,E5-2640,7347.120,7592.565,7951.325,7979.951,8365.400,8575.837,8579.890,16
South,554FLB,E5-2640,560M,E5-2640,7719.510,8044.496,8602.750,8564.741,9172.824,9248.686,9259.070,45
South,554FLB,E5-2640,544+M,E5-2670v3,8838.660,8907.402,9112.335,9114.040,9326.510,9329.062,9329.990,52
South,554FLB,E5-2640,650M,E5-2640,8699.660,9204.378,9307.940,9269.755,9321.060,9328.007,9331.370,58
South,554FLB,E5-2640,554M,E5-2640,7893.310,8483.182,9111.070,9017.273,9314.984,9322.822,9326.750,103

Request/Response Before:
$ ../substitute.sh 
vxlan_554m_control_performance_gvnr_dvr_northsouth_rr.log | 
~/netperf2_trunk/doc/examples/parse_single_stream.py -r -5 -f 1 -f 3 -f 
4 -f 7 -f 8
Field1,Field3,Field4,Field7,Field8,Min,P10,Median,Average,P90,P99,Max,Count
RR,554FLB,E5-2640,560M,E5-2640,6971.540,8356.420,8515.275,8499.397,8717.740,9023.489,9114.990,186
RR,554FLB,E5-2640,630M,E5-2640,7538.530,8461.090,8532.970,8544.005,8665.414,8895.486,9003.330,119
RR,554FLB,E5-2640,544+M,E5-2670v3,8825.970,12279.268,12504.175,12421.076,12646.219,12742.536,12831.490,238
RR,554FLB,E5-2640,650M,E5-2640,7152.220,8425.368,8535.370,8537.314,8757.256,9046.692,9128.290,219

Request/Response After:

$ ../substitute.sh 
vxlan_554m_control_performance_gvnr_xpsfix_dvr_northsouth_rr.log | 
~/netperf2_trunk/doc/examples/parse_single_stream.py -r -5 -f 1 -f 3 -f 
4 -f 7 -f 8
Field1,Field3,Field4,Field7,Field8,Min,P10,Median,Average,P90,P99,Max,Count
RR,554FLB,E5-2640,560M,E5-2640,7385.910,8411.654,8568.280,8549.659,8724.006,8874.092,8971.860,85
RR,554FLB,E5-2640,630M,E5-2640,8265.030,8472.924,8543.215,8560.117,8657.339,8900.349,8902.260,34
RR,554FLB,E5-2640,544+M,E5-2670v3,12365.830,12446.182,12597.575,12612.538,12790.654,12939.399,12968.120,78
RR,554FLB,E5-2640,650M,E5-2640,8391.870,8516.924,8672.000,8701.399,8977.302,9100.178,9130.380,89
RR,554FLB,E5-2640,554M,E5-2640,6859.680,8390.240,8481.835,8484.884,8649.290,8850.360,8976.960,226

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH RFC 0/4] xfs: Transmit flow steering
  2016-09-28 15:13 ` [PATCH RFC 0/4] " Rick Jones
@ 2016-10-07  9:18   ` Juerg Haefliger
  0 siblings, 0 replies; 11+ messages in thread
From: Juerg Haefliger @ 2016-10-07  9:18 UTC (permalink / raw)
  To: Rick Jones, Tom Herbert, davem, netdev; +Cc: kernel-team, Balliet, Drew


[-- Attachment #1.1: Type: text/plain, Size: 7197 bytes --]

As Rick states, this fixes a performance issue with the 4.4 kernel for us.

Tested-by: Juerg Haefliger <juerg.haefliger@hpe.com>


On 09/28/2016 05:13 PM, Rick Jones wrote:
> 
> Here is a quick look at performance tests for the result of trying the
> prototype fix for the packet reordering problem with VMs sending over
> an XPS-configured NIC.  In particular, the Emulex/Avago/Broadcom
> Skyhawk.  The fix was applied to a 4.4 kernel.
> 
> Before: 3884 Mbit/s
> After: 8897 Mbit/s
> 
> That was from a VM on a node with a Skyhawk and 2 E5-2640 processors
> to baremetal E5-2640 with a BE3.  Physical MTU was 1500, the VM's
> vNIC's MTU was 1400.  Systems were HPE ProLiants in OS Control Mode
> for power management, with the "performance" frequency governor
> loaded. An OpenStack Mitaka setup with Distributed Virtual Router.
> 
> We had some other NIC types in the setup as well.  XPS was also
> enabled on the ConnectX3-Pro.  It was not enabled on the 82599ES (a
> function of the kernel being used, which had it disabled from the
> first reports of XPS negatively affecting VM traffic at the beginning
> of the year)
> 
> Average Mbit/s From NIC type To Bare Metal BE3:
> NIC Type,
>  CPU on VM Host            Before        After
> ------------------------------------------------
> ConnectX-3 Pro,E5-2670v3    9224         9271
> BE3, E5-2640                9016     9022
> 82599, E5-2640              9192     9003
> BCM57840, E5-2640           9213     9153
> Skyhawk, E5-2640            3884     8897
> 
> For completeness:
> Average Mbit/s To NIC type from Bare Metal BE3:
> NIC Type,
>  CPU on VM Host            Before        After
> ------------------------------------------------
> ConnectX-3 Pro,E5-2670v3    9322         9144
> BE3, E5-2640            9074     9017
> 82599, E5-2640              8670     8564
> BCM57840, E5-2640           2468 *     7979
> Skyhawk, E5-2640            8897     9269
> 
> * This is the Busted bnx2x NIC FW GRO implementation issue.  It was
>   not visible in the "After" because the system was setup to disable
>   the NIC FW GRO by the time it booted on the fix kernel.
> 
> Average Transactions/s Between NIC type and Bare Metal BE3:
> NIC Type,
>  CPU on VM Host            Before        After
> ------------------------------------------------
> ConnectX-3 Pro,E5-2670v3   12421         12612
> BE3, E5-2640            8178      8484
> 82599, E5-2640              8499      8549
> BCM57840, E5-2640           8544      8560
> Skyhawk, E5-2640            8537      8701
> 
> happy benchmarking,
> 
> Drew Balliet
> Jeurg Haefliger
> rick jones
> 
> The semi-cooked results with additional statistics:
> 
> 554M  - BE3
> 544+M - ConnectX-3 Pro
> 560M - 82599ES
> 630M - BCM57840
> 650M - Skyhawk
> 
> (substitute is simply replacing a system name with the model of NIC and CPU)
> Bulk To (South) and From (North) VM, Before:
> $ ../substitute.sh vxlan_554m_control_performance_gvnr_dvr_northsouth_stream.log |
> ~/netperf2_trunk/doc/examples/parse_single_stream.py -r -5 -f 1 -f 3 -f 4 -f 7 -f 8
> Field1,Field3,Field4,Field7,Field8,Min,P10,Median,Average,P90,P99,Max,Count
> North,560M,E5-2640,554FLB,E5-2640,8148.090,9048.830,9235.400,9192.868,9315.980,9338.845,9339.500,113
> North,630M,E5-2640,554FLB,E5-2640,8909.980,9113.238,9234.750,9213.140,9299.442,9336.206,9337.830,47
> North,544+M,E5-2670v3,554FLB,E5-2640,9013.740,9182.546,9229.620,9224.025,9264.036,9299.206,9301.970,99
> North,650M,E5-2640,554FLB,E5-2640,3187.680,3393.724,3796.160,3884.765,4405.096,4941.391,4956.300,129
> North,554M,E5-2640,554FLB,E5-2640,8700.930,8855.768,9026.030,9016.061,9158.846,9213.687,9226.150,135
> South,554FLB,E5-2640,560M,E5-2640,7754.350,8193.114,8718.540,8670.612,9026.436,9262.355,9285.010,113
> South,554FLB,E5-2640,630M,E5-2640,1897.660,2068.290,2514.430,2468.323,2787.162,2942.934,2957.250,53
> South,554FLB,E5-2640,544+M,E5-2670v3,9298.260,9314.432,9323.220,9322.207,9328.324,9330.704,9331.080,100
> South,554FLB,E5-2640,650M,E5-2640,8407.050,8907.136,9304.390,9206.776,9321.320,9325.347,9326.410,103
> South,554FLB,E5-2640,554M,E5-2640,7844.900,8632.530,9199.385,9074.535,9308.070,9319.224,9322.360,132
> 0 too-short lines ignored.
> 
> Bulk To (South) and From (North) VM, After:
> 
> $ ../substitute.sh vxlan_554m_control_performance_gvnr_xpsfix_dvr_northsouth_stream.log |
> ~/netperf2_trunk/doc/examples/parse_single_stream.py -r -5 -f 1 -f 3 -f 4 -f 7 -f 8
> Field1,Field3,Field4,Field7,Field8,Min,P10,Median,Average,P90,P99,Max,Count
> North,560M,E5-2640,554FLB,E5-2640,7576.790,8213.890,9182.870,9003.190,9295.975,9315.878,9318.160,36
> North,630M,E5-2640,554FLB,E5-2640,8811.800,8924.000,9206.660,9153.076,9306.287,9315.152,9315.790,12
> North,544+M,E5-2670v3,554FLB,E5-2640,9135.990,9228.520,9277.465,9271.875,9324.545,9339.604,9339.780,46
> North,650M,E5-2640,554FLB,E5-2640,8133.420,8483.340,8995.040,8897.779,9129.056,9165.230,9165.860,43
> North,554M,E5-2640,554FLB,E5-2640,8438.390,8879.150,9048.590,9022.813,9181.540,9248.650,9297.660,101
> South,554FLB,E5-2640,630M,E5-2640,7347.120,7592.565,7951.325,7979.951,8365.400,8575.837,8579.890,16
> South,554FLB,E5-2640,560M,E5-2640,7719.510,8044.496,8602.750,8564.741,9172.824,9248.686,9259.070,45
> South,554FLB,E5-2640,544+M,E5-2670v3,8838.660,8907.402,9112.335,9114.040,9326.510,9329.062,9329.990,52
> South,554FLB,E5-2640,650M,E5-2640,8699.660,9204.378,9307.940,9269.755,9321.060,9328.007,9331.370,58
> South,554FLB,E5-2640,554M,E5-2640,7893.310,8483.182,9111.070,9017.273,9314.984,9322.822,9326.750,103
> 
> Request/Response Before:
> $ ../substitute.sh vxlan_554m_control_performance_gvnr_dvr_northsouth_rr.log |
> ~/netperf2_trunk/doc/examples/parse_single_stream.py -r -5 -f 1 -f 3 -f 4 -f 7 -f 8
> Field1,Field3,Field4,Field7,Field8,Min,P10,Median,Average,P90,P99,Max,Count
> RR,554FLB,E5-2640,560M,E5-2640,6971.540,8356.420,8515.275,8499.397,8717.740,9023.489,9114.990,186
> RR,554FLB,E5-2640,630M,E5-2640,7538.530,8461.090,8532.970,8544.005,8665.414,8895.486,9003.330,119
> RR,554FLB,E5-2640,544+M,E5-2670v3,8825.970,12279.268,12504.175,12421.076,12646.219,12742.536,12831.490,238
> 
> RR,554FLB,E5-2640,650M,E5-2640,7152.220,8425.368,8535.370,8537.314,8757.256,9046.692,9128.290,219
> 
> Request/Response After:
> 
> $ ../substitute.sh vxlan_554m_control_performance_gvnr_xpsfix_dvr_northsouth_rr.log |
> ~/netperf2_trunk/doc/examples/parse_single_stream.py -r -5 -f 1 -f 3 -f 4 -f 7 -f 8
> Field1,Field3,Field4,Field7,Field8,Min,P10,Median,Average,P90,P99,Max,Count
> RR,554FLB,E5-2640,560M,E5-2640,7385.910,8411.654,8568.280,8549.659,8724.006,8874.092,8971.860,85
> RR,554FLB,E5-2640,630M,E5-2640,8265.030,8472.924,8543.215,8560.117,8657.339,8900.349,8902.260,34
> RR,554FLB,E5-2640,544+M,E5-2670v3,12365.830,12446.182,12597.575,12612.538,12790.654,12939.399,12968.120,78
> 
> RR,554FLB,E5-2640,650M,E5-2640,8391.870,8516.924,8672.000,8701.399,8977.302,9100.178,9130.380,89
> RR,554FLB,E5-2640,554M,E5-2640,6859.680,8390.240,8481.835,8484.884,8649.290,8850.360,8976.960,226
> 


-- 
Juerg Haefliger
Hewlett Packard Enterprise


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2016-10-07  9:18 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-08-31  0:00 [PATCH RFC 0/4] xfs: Transmit flow steering Tom Herbert
2016-08-31  0:00 ` [PATCH RFC 1/4] net: Set SW hash in skb_set_hash_from_sk Tom Herbert
2016-08-31  0:00 ` [PATCH RFC 2/4] bql: Add tracking of inflight packets Tom Herbert
2016-08-31  9:23   ` Jesper Dangaard Brouer
2016-08-31 13:08   ` Eric Dumazet
2016-08-31  0:00 ` [PATCH RFC 3/4] net: Add xps_dev_flow_table_cnt Tom Herbert
2016-08-31  0:00 ` [PATCH RFC 4/4] xfs: Transmit flow steering Tom Herbert
2016-08-31  3:14   ` Alexander Duyck
2016-08-31 18:34   ` Chris Mason
2016-09-28 15:13 ` [PATCH RFC 0/4] " Rick Jones
2016-10-07  9:18   ` Juerg Haefliger

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.