All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] hv_netvsc: Add per-cpu ethtool stats for netvsc
@ 2018-06-13 19:36 ` Yidong Ren
  0 siblings, 0 replies; 18+ messages in thread
From: Yidong Ren @ 2018-06-13 19:36 UTC (permalink / raw)
  To: K. Y. Srinivasan, Haiyang Zhang, Stephen Hemminger,
	David S. Miller, devel, netdev, linux-kernel
  Cc: Yidong Ren

From: Yidong Ren <yidren@microsoft.com>

This patch implements following ethtool stats fields for netvsc:
cpu<n>_tx/rx_packets/bytes
cpu<n>_vf_tx/rx_packets/bytes

Corresponding per-cpu counters exist in current code. Exposing these
counters will help troubleshooting performance issues.

Signed-off-by: Yidong Ren <yidren@microsoft.com>
---
Changes in v2:
  - Remove cpp style comment
  - Resubmit after freeze

 drivers/net/hyperv/hyperv_net.h |  11 +++++
 drivers/net/hyperv/netvsc_drv.c | 104 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 113 insertions(+), 2 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 23304ac..c825353 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -873,6 +873,17 @@ struct netvsc_ethtool_stats {
 	unsigned long wake_queue;
 };
 
+struct netvsc_ethtool_pcpu_stats {
+	u64     rx_packets;
+	u64     rx_bytes;
+	u64     tx_packets;
+	u64     tx_bytes;
+	u64     vf_rx_packets;
+	u64     vf_rx_bytes;
+	u64     vf_tx_packets;
+	u64     vf_tx_bytes;
+};
+
 struct netvsc_vf_pcpu_stats {
 	u64     rx_packets;
 	u64     rx_bytes;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 7b18a8c..6803aae 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1105,6 +1105,66 @@ static void netvsc_get_vf_stats(struct net_device *net,
 	}
 }
 
+static void netvsc_get_pcpu_stats(struct net_device *net,
+				  struct netvsc_ethtool_pcpu_stats
+					__percpu *pcpu_tot)
+{
+	struct net_device_context *ndev_ctx = netdev_priv(net);
+	struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev);
+	int i;
+
+	/* fetch percpu stats of vf */
+	for_each_possible_cpu(i) {
+		const struct netvsc_vf_pcpu_stats *stats =
+			per_cpu_ptr(ndev_ctx->vf_stats, i);
+		struct netvsc_ethtool_pcpu_stats *this_tot =
+			per_cpu_ptr(pcpu_tot, i);
+		unsigned int start;
+
+		do {
+			start = u64_stats_fetch_begin_irq(&stats->syncp);
+			this_tot->vf_rx_packets = stats->rx_packets;
+			this_tot->vf_tx_packets = stats->tx_packets;
+			this_tot->vf_rx_bytes = stats->rx_bytes;
+			this_tot->vf_tx_bytes = stats->tx_bytes;
+		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+		this_tot->rx_packets = this_tot->vf_rx_packets;
+		this_tot->tx_packets = this_tot->vf_tx_packets;
+		this_tot->rx_bytes   = this_tot->vf_rx_bytes;
+		this_tot->tx_bytes   = this_tot->vf_tx_bytes;
+	}
+
+	/* fetch percpu stats of netvsc */
+	for (i = 0; i < nvdev->num_chn; i++) {
+		const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
+		const struct netvsc_stats *stats;
+		struct netvsc_ethtool_pcpu_stats *this_tot =
+			per_cpu_ptr(pcpu_tot, nvchan->channel->target_cpu);
+		u64 packets, bytes;
+		unsigned int start;
+
+		stats = &nvchan->tx_stats;
+		do {
+			start = u64_stats_fetch_begin_irq(&stats->syncp);
+			packets = stats->packets;
+			bytes = stats->bytes;
+		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+		this_tot->tx_bytes	+= bytes;
+		this_tot->tx_packets	+= packets;
+
+		stats = &nvchan->rx_stats;
+		do {
+			start = u64_stats_fetch_begin_irq(&stats->syncp);
+			packets = stats->packets;
+			bytes = stats->bytes;
+		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+		this_tot->rx_bytes	+= bytes;
+		this_tot->rx_packets	+= packets;
+	}
+}
+
 static void netvsc_get_stats64(struct net_device *net,
 			       struct rtnl_link_stats64 *t)
 {
@@ -1202,6 +1262,23 @@ static const struct {
 	{ "rx_no_memory", offsetof(struct netvsc_ethtool_stats, rx_no_memory) },
 	{ "stop_queue", offsetof(struct netvsc_ethtool_stats, stop_queue) },
 	{ "wake_queue", offsetof(struct netvsc_ethtool_stats, wake_queue) },
+}, pcpu_stats[] = {
+	{ "cpu%u_rx_packets",
+		offsetof(struct netvsc_ethtool_pcpu_stats, rx_packets) },
+	{ "cpu%u_rx_bytes",
+		offsetof(struct netvsc_ethtool_pcpu_stats, rx_bytes) },
+	{ "cpu%u_tx_packets",
+		offsetof(struct netvsc_ethtool_pcpu_stats, tx_packets) },
+	{ "cpu%u_tx_bytes",
+		offsetof(struct netvsc_ethtool_pcpu_stats, tx_bytes) },
+	{ "cpu%u_vf_rx_packets",
+		offsetof(struct netvsc_ethtool_pcpu_stats, vf_rx_packets) },
+	{ "cpu%u_vf_rx_bytes",
+		offsetof(struct netvsc_ethtool_pcpu_stats, vf_rx_bytes) },
+	{ "cpu%u_vf_tx_packets",
+		offsetof(struct netvsc_ethtool_pcpu_stats, vf_tx_packets) },
+	{ "cpu%u_vf_tx_bytes",
+		offsetof(struct netvsc_ethtool_pcpu_stats, vf_tx_bytes) },
 }, vf_stats[] = {
 	{ "vf_rx_packets", offsetof(struct netvsc_vf_pcpu_stats, rx_packets) },
 	{ "vf_rx_bytes",   offsetof(struct netvsc_vf_pcpu_stats, rx_bytes) },
@@ -1213,6 +1290,9 @@ static const struct {
 #define NETVSC_GLOBAL_STATS_LEN	ARRAY_SIZE(netvsc_stats)
 #define NETVSC_VF_STATS_LEN	ARRAY_SIZE(vf_stats)
 
+/* statistics per queue (rx/tx packets/bytes) */
+#define NETVSC_PCPU_STATS_LEN (num_present_cpus() * ARRAY_SIZE(pcpu_stats))
+
 /* 4 statistics per queue (rx/tx packets/bytes) */
 #define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 4)
 
@@ -1228,6 +1308,7 @@ static int netvsc_get_sset_count(struct net_device *dev, int string_set)
 	case ETH_SS_STATS:
 		return NETVSC_GLOBAL_STATS_LEN
 			+ NETVSC_VF_STATS_LEN
+			+ NETVSC_PCPU_STATS_LEN
 			+ NETVSC_QUEUE_STATS_LEN(nvdev);
 	default:
 		return -EINVAL;
@@ -1242,9 +1323,10 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
 	const void *nds = &ndc->eth_stats;
 	const struct netvsc_stats *qstats;
 	struct netvsc_vf_pcpu_stats sum;
+	struct netvsc_ethtool_pcpu_stats __percpu *pcpu_sum;
 	unsigned int start;
 	u64 packets, bytes;
-	int i, j;
+	int i, j, cpu;
 
 	if (!nvdev)
 		return;
@@ -1256,6 +1338,17 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
 	for (j = 0; j < NETVSC_VF_STATS_LEN; j++)
 		data[i++] = *(u64 *)((void *)&sum + vf_stats[j].offset);
 
+	pcpu_sum = alloc_percpu(struct netvsc_ethtool_pcpu_stats);
+	netvsc_get_pcpu_stats(dev, pcpu_sum);
+	for_each_present_cpu(cpu) {
+		struct netvsc_ethtool_pcpu_stats *this_sum =
+			per_cpu_ptr(pcpu_sum, cpu);
+		for (j = 0; j < ARRAY_SIZE(pcpu_stats); j++)
+			data[i++] = *(u64 *)((void *)this_sum
+					     + pcpu_stats[j].offset);
+	}
+	free_percpu(pcpu_sum);
+
 	for (j = 0; j < nvdev->num_chn; j++) {
 		qstats = &nvdev->chan_table[j].tx_stats;
 
@@ -1283,7 +1376,7 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 	struct net_device_context *ndc = netdev_priv(dev);
 	struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
 	u8 *p = data;
-	int i;
+	int i, cpu;
 
 	if (!nvdev)
 		return;
@@ -1300,6 +1393,13 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 			p += ETH_GSTRING_LEN;
 		}
 
+		for_each_present_cpu(cpu) {
+			for (i = 0; i < ARRAY_SIZE(pcpu_stats); i++) {
+				sprintf(p, pcpu_stats[i].name, cpu);
+				p += ETH_GSTRING_LEN;
+			}
+		}
+
 		for (i = 0; i < nvdev->num_chn; i++) {
 			sprintf(p, "tx_queue_%u_packets", i);
 			p += ETH_GSTRING_LEN;
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2] hv_netvsc: Add per-cpu ethtool stats for netvsc
@ 2018-06-13 19:36 ` Yidong Ren
  0 siblings, 0 replies; 18+ messages in thread
From: Yidong Ren @ 2018-06-13 19:36 UTC (permalink / raw)
  To: K. Y. Srinivasan, Haiyang Zhang, Stephen Hemminger,
	David S. Miller, devel, netdev, linux-kernel

From: Yidong Ren <yidren@microsoft.com>

This patch implements following ethtool stats fields for netvsc:
cpu<n>_tx/rx_packets/bytes
cpu<n>_vf_tx/rx_packets/bytes

Corresponding per-cpu counters exist in current code. Exposing these
counters will help troubleshooting performance issues.

Signed-off-by: Yidong Ren <yidren@microsoft.com>
---
Changes in v2:
  - Remove cpp style comment
  - Resubmit after freeze

 drivers/net/hyperv/hyperv_net.h |  11 +++++
 drivers/net/hyperv/netvsc_drv.c | 104 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 113 insertions(+), 2 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 23304ac..c825353 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -873,6 +873,17 @@ struct netvsc_ethtool_stats {
 	unsigned long wake_queue;
 };
 
+struct netvsc_ethtool_pcpu_stats {
+	u64     rx_packets;
+	u64     rx_bytes;
+	u64     tx_packets;
+	u64     tx_bytes;
+	u64     vf_rx_packets;
+	u64     vf_rx_bytes;
+	u64     vf_tx_packets;
+	u64     vf_tx_bytes;
+};
+
 struct netvsc_vf_pcpu_stats {
 	u64     rx_packets;
 	u64     rx_bytes;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 7b18a8c..6803aae 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1105,6 +1105,66 @@ static void netvsc_get_vf_stats(struct net_device *net,
 	}
 }
 
+static void netvsc_get_pcpu_stats(struct net_device *net,
+				  struct netvsc_ethtool_pcpu_stats
+					__percpu *pcpu_tot)
+{
+	struct net_device_context *ndev_ctx = netdev_priv(net);
+	struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev);
+	int i;
+
+	/* fetch percpu stats of vf */
+	for_each_possible_cpu(i) {
+		const struct netvsc_vf_pcpu_stats *stats =
+			per_cpu_ptr(ndev_ctx->vf_stats, i);
+		struct netvsc_ethtool_pcpu_stats *this_tot =
+			per_cpu_ptr(pcpu_tot, i);
+		unsigned int start;
+
+		do {
+			start = u64_stats_fetch_begin_irq(&stats->syncp);
+			this_tot->vf_rx_packets = stats->rx_packets;
+			this_tot->vf_tx_packets = stats->tx_packets;
+			this_tot->vf_rx_bytes = stats->rx_bytes;
+			this_tot->vf_tx_bytes = stats->tx_bytes;
+		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+		this_tot->rx_packets = this_tot->vf_rx_packets;
+		this_tot->tx_packets = this_tot->vf_tx_packets;
+		this_tot->rx_bytes   = this_tot->vf_rx_bytes;
+		this_tot->tx_bytes   = this_tot->vf_tx_bytes;
+	}
+
+	/* fetch percpu stats of netvsc */
+	for (i = 0; i < nvdev->num_chn; i++) {
+		const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
+		const struct netvsc_stats *stats;
+		struct netvsc_ethtool_pcpu_stats *this_tot =
+			per_cpu_ptr(pcpu_tot, nvchan->channel->target_cpu);
+		u64 packets, bytes;
+		unsigned int start;
+
+		stats = &nvchan->tx_stats;
+		do {
+			start = u64_stats_fetch_begin_irq(&stats->syncp);
+			packets = stats->packets;
+			bytes = stats->bytes;
+		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+		this_tot->tx_bytes	+= bytes;
+		this_tot->tx_packets	+= packets;
+
+		stats = &nvchan->rx_stats;
+		do {
+			start = u64_stats_fetch_begin_irq(&stats->syncp);
+			packets = stats->packets;
+			bytes = stats->bytes;
+		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+		this_tot->rx_bytes	+= bytes;
+		this_tot->rx_packets	+= packets;
+	}
+}
+
 static void netvsc_get_stats64(struct net_device *net,
 			       struct rtnl_link_stats64 *t)
 {
@@ -1202,6 +1262,23 @@ static const struct {
 	{ "rx_no_memory", offsetof(struct netvsc_ethtool_stats, rx_no_memory) },
 	{ "stop_queue", offsetof(struct netvsc_ethtool_stats, stop_queue) },
 	{ "wake_queue", offsetof(struct netvsc_ethtool_stats, wake_queue) },
+}, pcpu_stats[] = {
+	{ "cpu%u_rx_packets",
+		offsetof(struct netvsc_ethtool_pcpu_stats, rx_packets) },
+	{ "cpu%u_rx_bytes",
+		offsetof(struct netvsc_ethtool_pcpu_stats, rx_bytes) },
+	{ "cpu%u_tx_packets",
+		offsetof(struct netvsc_ethtool_pcpu_stats, tx_packets) },
+	{ "cpu%u_tx_bytes",
+		offsetof(struct netvsc_ethtool_pcpu_stats, tx_bytes) },
+	{ "cpu%u_vf_rx_packets",
+		offsetof(struct netvsc_ethtool_pcpu_stats, vf_rx_packets) },
+	{ "cpu%u_vf_rx_bytes",
+		offsetof(struct netvsc_ethtool_pcpu_stats, vf_rx_bytes) },
+	{ "cpu%u_vf_tx_packets",
+		offsetof(struct netvsc_ethtool_pcpu_stats, vf_tx_packets) },
+	{ "cpu%u_vf_tx_bytes",
+		offsetof(struct netvsc_ethtool_pcpu_stats, vf_tx_bytes) },
 }, vf_stats[] = {
 	{ "vf_rx_packets", offsetof(struct netvsc_vf_pcpu_stats, rx_packets) },
 	{ "vf_rx_bytes",   offsetof(struct netvsc_vf_pcpu_stats, rx_bytes) },
@@ -1213,6 +1290,9 @@ static const struct {
 #define NETVSC_GLOBAL_STATS_LEN	ARRAY_SIZE(netvsc_stats)
 #define NETVSC_VF_STATS_LEN	ARRAY_SIZE(vf_stats)
 
+/* statistics per queue (rx/tx packets/bytes) */
+#define NETVSC_PCPU_STATS_LEN (num_present_cpus() * ARRAY_SIZE(pcpu_stats))
+
 /* 4 statistics per queue (rx/tx packets/bytes) */
 #define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 4)
 
@@ -1228,6 +1308,7 @@ static int netvsc_get_sset_count(struct net_device *dev, int string_set)
 	case ETH_SS_STATS:
 		return NETVSC_GLOBAL_STATS_LEN
 			+ NETVSC_VF_STATS_LEN
+			+ NETVSC_PCPU_STATS_LEN
 			+ NETVSC_QUEUE_STATS_LEN(nvdev);
 	default:
 		return -EINVAL;
@@ -1242,9 +1323,10 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
 	const void *nds = &ndc->eth_stats;
 	const struct netvsc_stats *qstats;
 	struct netvsc_vf_pcpu_stats sum;
+	struct netvsc_ethtool_pcpu_stats __percpu *pcpu_sum;
 	unsigned int start;
 	u64 packets, bytes;
-	int i, j;
+	int i, j, cpu;
 
 	if (!nvdev)
 		return;
@@ -1256,6 +1338,17 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
 	for (j = 0; j < NETVSC_VF_STATS_LEN; j++)
 		data[i++] = *(u64 *)((void *)&sum + vf_stats[j].offset);
 
+	pcpu_sum = alloc_percpu(struct netvsc_ethtool_pcpu_stats);
+	netvsc_get_pcpu_stats(dev, pcpu_sum);
+	for_each_present_cpu(cpu) {
+		struct netvsc_ethtool_pcpu_stats *this_sum =
+			per_cpu_ptr(pcpu_sum, cpu);
+		for (j = 0; j < ARRAY_SIZE(pcpu_stats); j++)
+			data[i++] = *(u64 *)((void *)this_sum
+					     + pcpu_stats[j].offset);
+	}
+	free_percpu(pcpu_sum);
+
 	for (j = 0; j < nvdev->num_chn; j++) {
 		qstats = &nvdev->chan_table[j].tx_stats;
 
@@ -1283,7 +1376,7 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 	struct net_device_context *ndc = netdev_priv(dev);
 	struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
 	u8 *p = data;
-	int i;
+	int i, cpu;
 
 	if (!nvdev)
 		return;
@@ -1300,6 +1393,13 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 			p += ETH_GSTRING_LEN;
 		}
 
+		for_each_present_cpu(cpu) {
+			for (i = 0; i < ARRAY_SIZE(pcpu_stats); i++) {
+				sprintf(p, pcpu_stats[i].name, cpu);
+				p += ETH_GSTRING_LEN;
+			}
+		}
+
 		for (i = 0; i < nvdev->num_chn; i++) {
 			sprintf(p, "tx_queue_%u_packets", i);
 			p += ETH_GSTRING_LEN;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v2] hv_netvsc: Add per-cpu ethtool stats for netvsc
  2018-06-13 19:36 ` Yidong Ren
@ 2018-06-13 20:57   ` Eric Dumazet
  -1 siblings, 0 replies; 18+ messages in thread
From: Eric Dumazet @ 2018-06-13 20:57 UTC (permalink / raw)
  To: Yidong Ren, K. Y. Srinivasan, Haiyang Zhang, Stephen Hemminger,
	David S. Miller, devel, netdev, linux-kernel
  Cc: Yidong Ren



On 06/13/2018 12:36 PM, Yidong Ren wrote:
> From: Yidong Ren <yidren@microsoft.com>
> 
> This patch implements following ethtool stats fields for netvsc:
> cpu<n>_tx/rx_packets/bytes
> cpu<n>_vf_tx/rx_packets/bytes

...
>  
> +	pcpu_sum = alloc_percpu(struct netvsc_ethtool_pcpu_stats);
> +	netvsc_get_pcpu_stats(dev, pcpu_sum);
> +	for_each_present_cpu(cpu) {
> +		struct netvsc_ethtool_pcpu_stats *this_sum =
> +			per_cpu_ptr(pcpu_sum, cpu);
> +		for (j = 0; j < ARRAY_SIZE(pcpu_stats); j++)
> +			data[i++] = *(u64 *)((void *)this_sum
> +					     + pcpu_stats[j].offset);
> +	}
> +	free_percpu(pcpu_sum);
>


Using alloc_percpu() / free_percpu() for a short section of code makes no sense.

You actually want to allocate memory local to this cpu, possibly in one chunk,
not spread all over the places.

kvmalloc(nr_cpu_ids * sizeof(struct netvsc_ethtool_pcpu_stats))  should be really better,
since it would most of the time be satisfied by a single kmalloc()


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2] hv_netvsc: Add per-cpu ethtool stats for netvsc
@ 2018-06-13 20:57   ` Eric Dumazet
  0 siblings, 0 replies; 18+ messages in thread
From: Eric Dumazet @ 2018-06-13 20:57 UTC (permalink / raw)
  To: Yidong Ren, K. Y. Srinivasan, Haiyang Zhang, Stephen Hemminger,
	David S. Miller, devel, netdev, linux-kernel



On 06/13/2018 12:36 PM, Yidong Ren wrote:
> From: Yidong Ren <yidren@microsoft.com>
> 
> This patch implements following ethtool stats fields for netvsc:
> cpu<n>_tx/rx_packets/bytes
> cpu<n>_vf_tx/rx_packets/bytes

...
>  
> +	pcpu_sum = alloc_percpu(struct netvsc_ethtool_pcpu_stats);
> +	netvsc_get_pcpu_stats(dev, pcpu_sum);
> +	for_each_present_cpu(cpu) {
> +		struct netvsc_ethtool_pcpu_stats *this_sum =
> +			per_cpu_ptr(pcpu_sum, cpu);
> +		for (j = 0; j < ARRAY_SIZE(pcpu_stats); j++)
> +			data[i++] = *(u64 *)((void *)this_sum
> +					     + pcpu_stats[j].offset);
> +	}
> +	free_percpu(pcpu_sum);
>


Using alloc_percpu() / free_percpu() for a short section of code makes no sense.

You actually want to allocate memory local to this cpu, possibly in one chunk,
not spread all over the places.

kvmalloc(nr_cpu_ids * sizeof(struct netvsc_ethtool_pcpu_stats))  should be really better,
since it would most of the time be satisfied by a single kmalloc()

^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH v2] hv_netvsc: Add per-cpu ethtool stats for netvsc
  2018-06-13 20:57   ` Eric Dumazet
  (?)
@ 2018-06-13 21:07   ` Yidong Ren
  -1 siblings, 0 replies; 18+ messages in thread
From: Yidong Ren @ 2018-06-13 21:07 UTC (permalink / raw)
  To: Eric Dumazet, Yidong Ren, KY Srinivasan, Haiyang Zhang,
	Stephen Hemminger, David S. Miller, devel, netdev, linux-kernel

> From: Eric Dumazet <eric.dumazet@gmail.com>
> You actually want to allocate memory local to this cpu, possibly in one chunk,
> not spread all over the places.
> 
> kvmalloc(nr_cpu_ids * sizeof(struct netvsc_ethtool_pcpu_stats))  should be
> really better, since it would most of the time be satisfied by a single kmalloc()

Got it. I'm just trying to allocate memory for each cpu. It doesn't have to be __percpu variable.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2] hv_netvsc: Add per-cpu ethtool stats for netvsc
  2018-06-13 19:36 ` Yidong Ren
  (?)
  (?)
@ 2018-06-13 21:48 ` Stephen Hemminger
  2018-06-13 22:03   ` Yidong Ren
  -1 siblings, 1 reply; 18+ messages in thread
From: Stephen Hemminger @ 2018-06-13 21:48 UTC (permalink / raw)
  To: Yidong Ren
  Cc: K. Y. Srinivasan, Haiyang Zhang, Stephen Hemminger,
	David S. Miller, devel, netdev, linux-kernel

On Wed, 13 Jun 2018 12:36:08 -0700
Yidong Ren <yidren@linuxonhyperv.com> wrote:

> From: Yidong Ren <yidren@microsoft.com>
> 
> This patch implements following ethtool stats fields for netvsc:
> cpu<n>_tx/rx_packets/bytes
> cpu<n>_vf_tx/rx_packets/bytes
> 
> Corresponding per-cpu counters exist in current code. Exposing these
> counters will help troubleshooting performance issues.
> 
> Signed-off-by: Yidong Ren <yidren@microsoft.com>
> ---
> Changes in v2:
>   - Remove cpp style comment
>   - Resubmit after freeze
> 
>  drivers/net/hyperv/hyperv_net.h |  11 +++++
>  drivers/net/hyperv/netvsc_drv.c | 104 +++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 113 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
> index 23304ac..c825353 100644
> --- a/drivers/net/hyperv/hyperv_net.h
> +++ b/drivers/net/hyperv/hyperv_net.h
> @@ -873,6 +873,17 @@ struct netvsc_ethtool_stats {
>  	unsigned long wake_queue;
>  };
>  
> +struct netvsc_ethtool_pcpu_stats {
> +	u64     rx_packets;
> +	u64     rx_bytes;
> +	u64     tx_packets;
> +	u64     tx_bytes;
> +	u64     vf_rx_packets;
> +	u64     vf_rx_bytes;
> +	u64     vf_tx_packets;
> +	u64     vf_tx_bytes;
> +};
> +
>  struct netvsc_vf_pcpu_stats {
>  	u64     rx_packets;
>  	u64     rx_bytes;
> diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
> index 7b18a8c..6803aae 100644
> --- a/drivers/net/hyperv/netvsc_drv.c
> +++ b/drivers/net/hyperv/netvsc_drv.c
> @@ -1105,6 +1105,66 @@ static void netvsc_get_vf_stats(struct net_device *net,
>  	}
>  }
>  
> +static void netvsc_get_pcpu_stats(struct net_device *net,
> +				  struct netvsc_ethtool_pcpu_stats
> +					__percpu *pcpu_tot)
> +{
> +	struct net_device_context *ndev_ctx = netdev_priv(net);
> +	struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev);
> +	int i;
> +
> +	/* fetch percpu stats of vf */
> +	for_each_possible_cpu(i) {
> +		const struct netvsc_vf_pcpu_stats *stats =
> +			per_cpu_ptr(ndev_ctx->vf_stats, i);
> +		struct netvsc_ethtool_pcpu_stats *this_tot =
> +			per_cpu_ptr(pcpu_tot, i);
> +		unsigned int start;
> +
> +		do {
> +			start = u64_stats_fetch_begin_irq(&stats->syncp);
> +			this_tot->vf_rx_packets = stats->rx_packets;
> +			this_tot->vf_tx_packets = stats->tx_packets;
> +			this_tot->vf_rx_bytes = stats->rx_bytes;
> +			this_tot->vf_tx_bytes = stats->tx_bytes;
> +		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
> +		this_tot->rx_packets = this_tot->vf_rx_packets;
> +		this_tot->tx_packets = this_tot->vf_tx_packets;
> +		this_tot->rx_bytes   = this_tot->vf_rx_bytes;
> +		this_tot->tx_bytes   = this_tot->vf_tx_bytes;
> +	}
> +
> +	/* fetch percpu stats of netvsc */
> +	for (i = 0; i < nvdev->num_chn; i++) {
> +		const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
> +		const struct netvsc_stats *stats;
> +		struct netvsc_ethtool_pcpu_stats *this_tot =
> +			per_cpu_ptr(pcpu_tot, nvchan->channel->target_cpu);
> +		u64 packets, bytes;
> +		unsigned int start;
> +
> +		stats = &nvchan->tx_stats;
> +		do {
> +			start = u64_stats_fetch_begin_irq(&stats->syncp);
> +			packets = stats->packets;
> +			bytes = stats->bytes;
> +		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
> +
> +		this_tot->tx_bytes	+= bytes;
> +		this_tot->tx_packets	+= packets;
> +
> +		stats = &nvchan->rx_stats;
> +		do {
> +			start = u64_stats_fetch_begin_irq(&stats->syncp);
> +			packets = stats->packets;
> +			bytes = stats->bytes;
> +		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
> +
> +		this_tot->rx_bytes	+= bytes;
> +		this_tot->rx_packets	+= packets;
> +	}
> +}
> +
>  static void netvsc_get_stats64(struct net_device *net,
>  			       struct rtnl_link_stats64 *t)
>  {
> @@ -1202,6 +1262,23 @@ static const struct {
>  	{ "rx_no_memory", offsetof(struct netvsc_ethtool_stats, rx_no_memory) },
>  	{ "stop_queue", offsetof(struct netvsc_ethtool_stats, stop_queue) },
>  	{ "wake_queue", offsetof(struct netvsc_ethtool_stats, wake_queue) },
> +}, pcpu_stats[] = {
> +	{ "cpu%u_rx_packets",
> +		offsetof(struct netvsc_ethtool_pcpu_stats, rx_packets) },
> +	{ "cpu%u_rx_bytes",
> +		offsetof(struct netvsc_ethtool_pcpu_stats, rx_bytes) },
> +	{ "cpu%u_tx_packets",
> +		offsetof(struct netvsc_ethtool_pcpu_stats, tx_packets) },
> +	{ "cpu%u_tx_bytes",
> +		offsetof(struct netvsc_ethtool_pcpu_stats, tx_bytes) },
> +	{ "cpu%u_vf_rx_packets",
> +		offsetof(struct netvsc_ethtool_pcpu_stats, vf_rx_packets) },
> +	{ "cpu%u_vf_rx_bytes",
> +		offsetof(struct netvsc_ethtool_pcpu_stats, vf_rx_bytes) },
> +	{ "cpu%u_vf_tx_packets",
> +		offsetof(struct netvsc_ethtool_pcpu_stats, vf_tx_packets) },
> +	{ "cpu%u_vf_tx_bytes",
> +		offsetof(struct netvsc_ethtool_pcpu_stats, vf_tx_bytes) },
>  }, vf_stats[] = {
>  	{ "vf_rx_packets", offsetof(struct netvsc_vf_pcpu_stats, rx_packets) },
>  	{ "vf_rx_bytes",   offsetof(struct netvsc_vf_pcpu_stats, rx_bytes) },
> @@ -1213,6 +1290,9 @@ static const struct {
>  #define NETVSC_GLOBAL_STATS_LEN	ARRAY_SIZE(netvsc_stats)
>  #define NETVSC_VF_STATS_LEN	ARRAY_SIZE(vf_stats)
>  
> +/* statistics per queue (rx/tx packets/bytes) */
> +#define NETVSC_PCPU_STATS_LEN (num_present_cpus() * ARRAY_SIZE(pcpu_stats))

Even though Hyper-V/Azure does not support hot plug cpu's it might be better
to num_cpu_possible to avoid any possible future surprises.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH v2] hv_netvsc: Add per-cpu ethtool stats for netvsc
  2018-06-13 21:48 ` Stephen Hemminger
@ 2018-06-13 22:03   ` Yidong Ren
  2018-06-13 22:18     ` Stephen Hemminger
  0 siblings, 1 reply; 18+ messages in thread
From: Yidong Ren @ 2018-06-13 22:03 UTC (permalink / raw)
  To: Stephen Hemminger, Yidong Ren
  Cc: Stephen Hemminger, netdev, Haiyang Zhang, linux-kernel, devel,
	David S. Miller

> From: devel <driverdev-devel-bounces@linuxdriverproject.org> On Behalf
> Of Stephen Hemminger
> > +/* statistics per queue (rx/tx packets/bytes) */ #define
> > +NETVSC_PCPU_STATS_LEN (num_present_cpus() *
> ARRAY_SIZE(pcpu_stats))
> 
> Even though Hyper-V/Azure does not support hot plug cpu's it might be
> better to num_cpu_possible to avoid any possible future surprises.

That will create a very long output (num_cpu_possible = 128 on my machine) for ethtool,
While doesn't provide additional info.
num_present_cpus() would cause problem only if someone removed cpu 
between netvsc_get_sset_count() and netvsc_get_strings() and netvsc_get_ethtool_stats(). 

An alternative way could be: Check all stats, and only output if not zero. 
This need to be done in two pass. First pass to get the correct count, second pass to print the number.
Is there an elegant way to do this? 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2] hv_netvsc: Add per-cpu ethtool stats for netvsc
  2018-06-13 22:03   ` Yidong Ren
@ 2018-06-13 22:18     ` Stephen Hemminger
  0 siblings, 0 replies; 18+ messages in thread
From: Stephen Hemminger @ 2018-06-13 22:18 UTC (permalink / raw)
  To: Yidong Ren
  Cc: Yidong Ren, Stephen Hemminger, netdev, Haiyang Zhang,
	linux-kernel, devel, David S. Miller

On Wed, 13 Jun 2018 22:03:34 +0000
Yidong Ren <Yidong.Ren@microsoft.com> wrote:

> > From: devel <driverdev-devel-bounces@linuxdriverproject.org> On Behalf
> > Of Stephen Hemminger  
> > > +/* statistics per queue (rx/tx packets/bytes) */ #define
> > > +NETVSC_PCPU_STATS_LEN (num_present_cpus() *  
> > ARRAY_SIZE(pcpu_stats))
> > 
> > Even though Hyper-V/Azure does not support hot plug cpu's it might be
> > better to num_cpu_possible to avoid any possible future surprises.  
> 
> That will create a very long output (num_cpu_possible = 128 on my machine) for ethtool,
> While doesn't provide additional info.
> num_present_cpus() would cause problem only if someone removed cpu 
> between netvsc_get_sset_count() and netvsc_get_strings() and netvsc_get_ethtool_stats(). 
> 
> An alternative way could be: Check all stats, and only output if not zero. 
> This need to be done in two pass. First pass to get the correct count, second pass to print the number.
> Is there an elegant way to do this? 

Ok, but there is a race between getting names and getting statistics.
If a cpu was added/removed then statistics would not match.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v3] hv_netvsc: Add per-cpu ethtool stats for netvsc
  2018-06-13 19:36 ` Yidong Ren
@ 2018-07-24  1:26   ` Yidong Ren
  -1 siblings, 0 replies; 18+ messages in thread
From: Yidong Ren @ 2018-07-24  1:26 UTC (permalink / raw)
  To: K. Y. Srinivasan, Haiyang Zhang, Stephen Hemminger,
	David S. Miller, devel, netdev, linux-kernel
  Cc: madhans, Yidong Ren

From: Yidong Ren <yidren@microsoft.com>

This patch implements following ethtool stats fields for netvsc:
cpu<n>_tx/rx_packets/bytes
cpu<n>_vf_tx/rx_packets/bytes

Corresponding per-cpu counters already exist in current code. Exposing
these counters will help troubleshooting performance issues.

Signed-off-by: Yidong Ren <yidren@microsoft.com>
---
Changes since v2:
 * Reimplemented with kvmalloc instead of alloc_percpu
 drivers/net/hyperv/hyperv_net.h |  11 ++++
 drivers/net/hyperv/netvsc_drv.c | 103 +++++++++++++++++++++++++++++++-
 2 files changed, 112 insertions(+), 2 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 4b6e308199d2..a32ded5b4f41 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -873,6 +873,17 @@ struct netvsc_ethtool_stats {
 	unsigned long wake_queue;
 };
 
+struct netvsc_ethtool_pcpu_stats {
+	u64     rx_packets;
+	u64     rx_bytes;
+	u64     tx_packets;
+	u64     tx_bytes;
+	u64     vf_rx_packets;
+	u64     vf_rx_bytes;
+	u64     vf_tx_packets;
+	u64     vf_tx_bytes;
+};
+
 struct netvsc_vf_pcpu_stats {
 	u64     rx_packets;
 	u64     rx_bytes;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index dd1d6e115145..51f9cab2da5b 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1118,6 +1118,64 @@ static void netvsc_get_vf_stats(struct net_device *net,
 	}
 }
 
+static void netvsc_get_pcpu_stats(struct net_device *net,
+				  struct netvsc_ethtool_pcpu_stats *pcpu_tot)
+{
+	struct net_device_context *ndev_ctx = netdev_priv(net);
+	struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev);
+	int i;
+
+	/* fetch percpu stats of vf */
+	for_each_possible_cpu(i) {
+		const struct netvsc_vf_pcpu_stats *stats =
+			per_cpu_ptr(ndev_ctx->vf_stats, i);
+		struct netvsc_ethtool_pcpu_stats *this_tot = &pcpu_tot[i];
+		unsigned int start;
+
+		do {
+			start = u64_stats_fetch_begin_irq(&stats->syncp);
+			this_tot->vf_rx_packets = stats->rx_packets;
+			this_tot->vf_tx_packets = stats->tx_packets;
+			this_tot->vf_rx_bytes = stats->rx_bytes;
+			this_tot->vf_tx_bytes = stats->tx_bytes;
+		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+		this_tot->rx_packets = this_tot->vf_rx_packets;
+		this_tot->tx_packets = this_tot->vf_tx_packets;
+		this_tot->rx_bytes   = this_tot->vf_rx_bytes;
+		this_tot->tx_bytes   = this_tot->vf_tx_bytes;
+	}
+
+	/* fetch percpu stats of netvsc */
+	for (i = 0; i < nvdev->num_chn; i++) {
+		const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
+		const struct netvsc_stats *stats;
+		struct netvsc_ethtool_pcpu_stats *this_tot =
+			&pcpu_tot[nvchan->channel->target_cpu];
+		u64 packets, bytes;
+		unsigned int start;
+
+		stats = &nvchan->tx_stats;
+		do {
+			start = u64_stats_fetch_begin_irq(&stats->syncp);
+			packets = stats->packets;
+			bytes = stats->bytes;
+		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+		this_tot->tx_bytes	+= bytes;
+		this_tot->tx_packets	+= packets;
+
+		stats = &nvchan->rx_stats;
+		do {
+			start = u64_stats_fetch_begin_irq(&stats->syncp);
+			packets = stats->packets;
+			bytes = stats->bytes;
+		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+		this_tot->rx_bytes	+= bytes;
+		this_tot->rx_packets	+= packets;
+	}
+}
+
 static void netvsc_get_stats64(struct net_device *net,
 			       struct rtnl_link_stats64 *t)
 {
@@ -1215,6 +1273,23 @@ static const struct {
 	{ "rx_no_memory", offsetof(struct netvsc_ethtool_stats, rx_no_memory) },
 	{ "stop_queue", offsetof(struct netvsc_ethtool_stats, stop_queue) },
 	{ "wake_queue", offsetof(struct netvsc_ethtool_stats, wake_queue) },
+}, pcpu_stats[] = {
+	{ "cpu%u_rx_packets",
+		offsetof(struct netvsc_ethtool_pcpu_stats, rx_packets) },
+	{ "cpu%u_rx_bytes",
+		offsetof(struct netvsc_ethtool_pcpu_stats, rx_bytes) },
+	{ "cpu%u_tx_packets",
+		offsetof(struct netvsc_ethtool_pcpu_stats, tx_packets) },
+	{ "cpu%u_tx_bytes",
+		offsetof(struct netvsc_ethtool_pcpu_stats, tx_bytes) },
+	{ "cpu%u_vf_rx_packets",
+		offsetof(struct netvsc_ethtool_pcpu_stats, vf_rx_packets) },
+	{ "cpu%u_vf_rx_bytes",
+		offsetof(struct netvsc_ethtool_pcpu_stats, vf_rx_bytes) },
+	{ "cpu%u_vf_tx_packets",
+		offsetof(struct netvsc_ethtool_pcpu_stats, vf_tx_packets) },
+	{ "cpu%u_vf_tx_bytes",
+		offsetof(struct netvsc_ethtool_pcpu_stats, vf_tx_bytes) },
 }, vf_stats[] = {
 	{ "vf_rx_packets", offsetof(struct netvsc_vf_pcpu_stats, rx_packets) },
 	{ "vf_rx_bytes",   offsetof(struct netvsc_vf_pcpu_stats, rx_bytes) },
@@ -1226,6 +1301,9 @@ static const struct {
 #define NETVSC_GLOBAL_STATS_LEN	ARRAY_SIZE(netvsc_stats)
 #define NETVSC_VF_STATS_LEN	ARRAY_SIZE(vf_stats)
 
+/* statistics per queue (rx/tx packets/bytes) */
+#define NETVSC_PCPU_STATS_LEN (num_present_cpus() * ARRAY_SIZE(pcpu_stats))
+
 /* 4 statistics per queue (rx/tx packets/bytes) */
 #define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 4)
 
@@ -1241,6 +1319,7 @@ static int netvsc_get_sset_count(struct net_device *dev, int string_set)
 	case ETH_SS_STATS:
 		return NETVSC_GLOBAL_STATS_LEN
 			+ NETVSC_VF_STATS_LEN
+			+ NETVSC_PCPU_STATS_LEN
 			+ NETVSC_QUEUE_STATS_LEN(nvdev);
 	default:
 		return -EINVAL;
@@ -1255,9 +1334,10 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
 	const void *nds = &ndc->eth_stats;
 	const struct netvsc_stats *qstats;
 	struct netvsc_vf_pcpu_stats sum;
+	struct netvsc_ethtool_pcpu_stats *pcpu_sum;
 	unsigned int start;
 	u64 packets, bytes;
-	int i, j;
+	int i, j, cpu;
 
 	if (!nvdev)
 		return;
@@ -1269,6 +1349,18 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
 	for (j = 0; j < NETVSC_VF_STATS_LEN; j++)
 		data[i++] = *(u64 *)((void *)&sum + vf_stats[j].offset);
 
+	pcpu_sum = kvmalloc(sizeof(struct netvsc_ethtool_pcpu_stats) *
+			num_present_cpus(), GFP_KERNEL);
+	netvsc_get_pcpu_stats(dev, pcpu_sum);
+	for_each_present_cpu(cpu) {
+		struct netvsc_ethtool_pcpu_stats *this_sum = &pcpu_sum[cpu];
+
+		for (j = 0; j < ARRAY_SIZE(pcpu_stats); j++)
+			data[i++] = *(u64 *)((void *)this_sum
+					     + pcpu_stats[j].offset);
+	}
+	kvfree(pcpu_sum);
+
 	for (j = 0; j < nvdev->num_chn; j++) {
 		qstats = &nvdev->chan_table[j].tx_stats;
 
@@ -1296,7 +1388,7 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 	struct net_device_context *ndc = netdev_priv(dev);
 	struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
 	u8 *p = data;
-	int i;
+	int i, cpu;
 
 	if (!nvdev)
 		return;
@@ -1313,6 +1405,13 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 			p += ETH_GSTRING_LEN;
 		}
 
+		for_each_present_cpu(cpu) {
+			for (i = 0; i < ARRAY_SIZE(pcpu_stats); i++) {
+				sprintf(p, pcpu_stats[i].name, cpu);
+				p += ETH_GSTRING_LEN;
+			}
+		}
+
 		for (i = 0; i < nvdev->num_chn; i++) {
 			sprintf(p, "tx_queue_%u_packets", i);
 			p += ETH_GSTRING_LEN;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v3] hv_netvsc: Add per-cpu ethtool stats for netvsc
@ 2018-07-24  1:26   ` Yidong Ren
  0 siblings, 0 replies; 18+ messages in thread
From: Yidong Ren @ 2018-07-24  1:26 UTC (permalink / raw)
  To: K. Y. Srinivasan, Haiyang Zhang, Stephen Hemminger,
	David S. Miller, devel, netdev, linux-kernel
  Cc: madhans

From: Yidong Ren <yidren@microsoft.com>

This patch implements following ethtool stats fields for netvsc:
cpu<n>_tx/rx_packets/bytes
cpu<n>_vf_tx/rx_packets/bytes

Corresponding per-cpu counters already exist in current code. Exposing
these counters will help troubleshooting performance issues.

Signed-off-by: Yidong Ren <yidren@microsoft.com>
---
Changes since v2:
 * Reimplemented with kvmalloc instead of alloc_percpu
 drivers/net/hyperv/hyperv_net.h |  11 ++++
 drivers/net/hyperv/netvsc_drv.c | 103 +++++++++++++++++++++++++++++++-
 2 files changed, 112 insertions(+), 2 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 4b6e308199d2..a32ded5b4f41 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -873,6 +873,17 @@ struct netvsc_ethtool_stats {
 	unsigned long wake_queue;
 };
 
+struct netvsc_ethtool_pcpu_stats {
+	u64     rx_packets;
+	u64     rx_bytes;
+	u64     tx_packets;
+	u64     tx_bytes;
+	u64     vf_rx_packets;
+	u64     vf_rx_bytes;
+	u64     vf_tx_packets;
+	u64     vf_tx_bytes;
+};
+
 struct netvsc_vf_pcpu_stats {
 	u64     rx_packets;
 	u64     rx_bytes;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index dd1d6e115145..51f9cab2da5b 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1118,6 +1118,64 @@ static void netvsc_get_vf_stats(struct net_device *net,
 	}
 }
 
+static void netvsc_get_pcpu_stats(struct net_device *net,
+				  struct netvsc_ethtool_pcpu_stats *pcpu_tot)
+{
+	struct net_device_context *ndev_ctx = netdev_priv(net);
+	struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev);
+	int i;
+
+	/* fetch percpu stats of vf */
+	for_each_possible_cpu(i) {
+		const struct netvsc_vf_pcpu_stats *stats =
+			per_cpu_ptr(ndev_ctx->vf_stats, i);
+		struct netvsc_ethtool_pcpu_stats *this_tot = &pcpu_tot[i];
+		unsigned int start;
+
+		do {
+			start = u64_stats_fetch_begin_irq(&stats->syncp);
+			this_tot->vf_rx_packets = stats->rx_packets;
+			this_tot->vf_tx_packets = stats->tx_packets;
+			this_tot->vf_rx_bytes = stats->rx_bytes;
+			this_tot->vf_tx_bytes = stats->tx_bytes;
+		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+		this_tot->rx_packets = this_tot->vf_rx_packets;
+		this_tot->tx_packets = this_tot->vf_tx_packets;
+		this_tot->rx_bytes   = this_tot->vf_rx_bytes;
+		this_tot->tx_bytes   = this_tot->vf_tx_bytes;
+	}
+
+	/* fetch percpu stats of netvsc */
+	for (i = 0; i < nvdev->num_chn; i++) {
+		const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
+		const struct netvsc_stats *stats;
+		struct netvsc_ethtool_pcpu_stats *this_tot =
+			&pcpu_tot[nvchan->channel->target_cpu];
+		u64 packets, bytes;
+		unsigned int start;
+
+		stats = &nvchan->tx_stats;
+		do {
+			start = u64_stats_fetch_begin_irq(&stats->syncp);
+			packets = stats->packets;
+			bytes = stats->bytes;
+		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+		this_tot->tx_bytes	+= bytes;
+		this_tot->tx_packets	+= packets;
+
+		stats = &nvchan->rx_stats;
+		do {
+			start = u64_stats_fetch_begin_irq(&stats->syncp);
+			packets = stats->packets;
+			bytes = stats->bytes;
+		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+		this_tot->rx_bytes	+= bytes;
+		this_tot->rx_packets	+= packets;
+	}
+}
+
 static void netvsc_get_stats64(struct net_device *net,
 			       struct rtnl_link_stats64 *t)
 {
@@ -1215,6 +1273,23 @@ static const struct {
 	{ "rx_no_memory", offsetof(struct netvsc_ethtool_stats, rx_no_memory) },
 	{ "stop_queue", offsetof(struct netvsc_ethtool_stats, stop_queue) },
 	{ "wake_queue", offsetof(struct netvsc_ethtool_stats, wake_queue) },
+}, pcpu_stats[] = {
+	{ "cpu%u_rx_packets",
+		offsetof(struct netvsc_ethtool_pcpu_stats, rx_packets) },
+	{ "cpu%u_rx_bytes",
+		offsetof(struct netvsc_ethtool_pcpu_stats, rx_bytes) },
+	{ "cpu%u_tx_packets",
+		offsetof(struct netvsc_ethtool_pcpu_stats, tx_packets) },
+	{ "cpu%u_tx_bytes",
+		offsetof(struct netvsc_ethtool_pcpu_stats, tx_bytes) },
+	{ "cpu%u_vf_rx_packets",
+		offsetof(struct netvsc_ethtool_pcpu_stats, vf_rx_packets) },
+	{ "cpu%u_vf_rx_bytes",
+		offsetof(struct netvsc_ethtool_pcpu_stats, vf_rx_bytes) },
+	{ "cpu%u_vf_tx_packets",
+		offsetof(struct netvsc_ethtool_pcpu_stats, vf_tx_packets) },
+	{ "cpu%u_vf_tx_bytes",
+		offsetof(struct netvsc_ethtool_pcpu_stats, vf_tx_bytes) },
 }, vf_stats[] = {
 	{ "vf_rx_packets", offsetof(struct netvsc_vf_pcpu_stats, rx_packets) },
 	{ "vf_rx_bytes",   offsetof(struct netvsc_vf_pcpu_stats, rx_bytes) },
@@ -1226,6 +1301,9 @@ static const struct {
 #define NETVSC_GLOBAL_STATS_LEN	ARRAY_SIZE(netvsc_stats)
 #define NETVSC_VF_STATS_LEN	ARRAY_SIZE(vf_stats)
 
+/* statistics per queue (rx/tx packets/bytes) */
+#define NETVSC_PCPU_STATS_LEN (num_present_cpus() * ARRAY_SIZE(pcpu_stats))
+
 /* 4 statistics per queue (rx/tx packets/bytes) */
 #define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 4)
 
@@ -1241,6 +1319,7 @@ static int netvsc_get_sset_count(struct net_device *dev, int string_set)
 	case ETH_SS_STATS:
 		return NETVSC_GLOBAL_STATS_LEN
 			+ NETVSC_VF_STATS_LEN
+			+ NETVSC_PCPU_STATS_LEN
 			+ NETVSC_QUEUE_STATS_LEN(nvdev);
 	default:
 		return -EINVAL;
@@ -1255,9 +1334,10 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
 	const void *nds = &ndc->eth_stats;
 	const struct netvsc_stats *qstats;
 	struct netvsc_vf_pcpu_stats sum;
+	struct netvsc_ethtool_pcpu_stats *pcpu_sum;
 	unsigned int start;
 	u64 packets, bytes;
-	int i, j;
+	int i, j, cpu;
 
 	if (!nvdev)
 		return;
@@ -1269,6 +1349,18 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
 	for (j = 0; j < NETVSC_VF_STATS_LEN; j++)
 		data[i++] = *(u64 *)((void *)&sum + vf_stats[j].offset);
 
+	pcpu_sum = kvmalloc(sizeof(struct netvsc_ethtool_pcpu_stats) *
+			num_present_cpus(), GFP_KERNEL);
+	netvsc_get_pcpu_stats(dev, pcpu_sum);
+	for_each_present_cpu(cpu) {
+		struct netvsc_ethtool_pcpu_stats *this_sum = &pcpu_sum[cpu];
+
+		for (j = 0; j < ARRAY_SIZE(pcpu_stats); j++)
+			data[i++] = *(u64 *)((void *)this_sum
+					     + pcpu_stats[j].offset);
+	}
+	kvfree(pcpu_sum);
+
 	for (j = 0; j < nvdev->num_chn; j++) {
 		qstats = &nvdev->chan_table[j].tx_stats;
 
@@ -1296,7 +1388,7 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 	struct net_device_context *ndc = netdev_priv(dev);
 	struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
 	u8 *p = data;
-	int i;
+	int i, cpu;
 
 	if (!nvdev)
 		return;
@@ -1313,6 +1405,13 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 			p += ETH_GSTRING_LEN;
 		}
 
+		for_each_present_cpu(cpu) {
+			for (i = 0; i < ARRAY_SIZE(pcpu_stats); i++) {
+				sprintf(p, pcpu_stats[i].name, cpu);
+				p += ETH_GSTRING_LEN;
+			}
+		}
+
 		for (i = 0; i < nvdev->num_chn; i++) {
 			sprintf(p, "tx_queue_%u_packets", i);
 			p += ETH_GSTRING_LEN;
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* RE: [PATCH v3] hv_netvsc: Add per-cpu ethtool stats for netvsc
  2018-07-24  1:26   ` Yidong Ren
  (?)
@ 2018-07-24  1:42   ` Yidong Ren
  2018-07-24 11:00       ` Vitaly Kuznetsov
  -1 siblings, 1 reply; 18+ messages in thread
From: Yidong Ren @ 2018-07-24  1:42 UTC (permalink / raw)
  To: KY Srinivasan, Haiyang Zhang, Stephen Hemminger, David S. Miller,
	devel, netdev, linux-kernel
  Cc: Madhan Sivakumar

> From: Yidong Ren <yidren@linuxonhyperv.com>
> Sent: Monday, July 23, 2018 6:26 PM
> +	pcpu_sum = kvmalloc(sizeof(struct netvsc_ethtool_pcpu_stats) *
> +			num_present_cpus(), GFP_KERNEL);

Since there is no plan for CPU hotplug in Hyper-V in short term, it is fine 
to use num_present_cpus for now. We can move to debugfs later if necessary.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] hv_netvsc: Add per-cpu ethtool stats for netvsc
  2018-07-24  1:42   ` Yidong Ren
@ 2018-07-24 11:00       ` Vitaly Kuznetsov
  0 siblings, 0 replies; 18+ messages in thread
From: Vitaly Kuznetsov @ 2018-07-24 11:00 UTC (permalink / raw)
  To: Yidong Ren
  Cc: KY Srinivasan, Haiyang Zhang, Stephen Hemminger, David S. Miller,
	devel, netdev, linux-kernel, Madhan Sivakumar

Yidong Ren <Yidong.Ren@microsoft.com> writes:

>> From: Yidong Ren <yidren@linuxonhyperv.com>
>> Sent: Monday, July 23, 2018 6:26 PM
>> +	pcpu_sum = kvmalloc(sizeof(struct netvsc_ethtool_pcpu_stats) *
>> +			num_present_cpus(), GFP_KERNEL);
>
> Since there is no plan for CPU hotplug in Hyper-V in short term, it is fine 
> to use num_present_cpus for now. We can move to debugfs later if necessary.

While you do for_each_present_cpu() in netvsc_get_ethtool_stats(),
netvsc_get_pcpu_stats() does for_each_possible_cpu(). This looks
inconsistent.

The allocation you're doing here is short-lived so I would suggest you
use possible_cpus everywhere. Even knowing there's no CPU hotplug on
Hyper-V at this moment, it can appear later and we'll get a hard-to-find
issue. Moreover, we may consider using netvsc driver on e.g. KVM with
Hyper-V enlightenments and KVM has CPU hotplug already.

-- 
  Vitaly

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] hv_netvsc: Add per-cpu ethtool stats for netvsc
@ 2018-07-24 11:00       ` Vitaly Kuznetsov
  0 siblings, 0 replies; 18+ messages in thread
From: Vitaly Kuznetsov @ 2018-07-24 11:00 UTC (permalink / raw)
  To: Yidong Ren
  Cc: Stephen Hemminger, netdev, Haiyang Zhang, linux-kernel,
	Madhan Sivakumar, devel, David S. Miller

Yidong Ren <Yidong.Ren@microsoft.com> writes:

>> From: Yidong Ren <yidren@linuxonhyperv.com>
>> Sent: Monday, July 23, 2018 6:26 PM
>> +	pcpu_sum = kvmalloc(sizeof(struct netvsc_ethtool_pcpu_stats) *
>> +			num_present_cpus(), GFP_KERNEL);
>
> Since there is no plan for CPU hotplug in Hyper-V in short term, it is fine 
> to use num_present_cpus for now. We can move to debugfs later if necessary.

While you do for_each_present_cpu() in netvsc_get_ethtool_stats(),
netvsc_get_pcpu_stats() does for_each_possible_cpu(). This looks
inconsistent.

The allocation you're doing here is short-lived so I would suggest you
use possible_cpus everywhere. Even knowing there's no CPU hotplug on
Hyper-V at this moment, it can appear later and we'll get a hard-to-find
issue. Moreover, we may consider using netvsc driver on e.g. KVM with
Hyper-V enlightenments and KVM has CPU hotplug already.

-- 
  Vitaly

^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH v3] hv_netvsc: Add per-cpu ethtool stats for netvsc
  2018-07-24 11:00       ` Vitaly Kuznetsov
@ 2018-07-25 22:54         ` Yidong Ren
  -1 siblings, 0 replies; 18+ messages in thread
From: Yidong Ren @ 2018-07-25 22:54 UTC (permalink / raw)
  To: Vitaly Kuznetsov
  Cc: KY Srinivasan, Haiyang Zhang, Stephen Hemminger, David S. Miller,
	devel, netdev, linux-kernel, Madhan Sivakumar

> From: Vitaly Kuznetsov <vkuznets@redhat.com>
> While you do for_each_present_cpu() in netvsc_get_ethtool_stats(),
> netvsc_get_pcpu_stats() does for_each_possible_cpu(). This looks
> inconsistent.

I made a mistake there.  Thanks for catch me.
 
> The allocation you're doing here is short-lived so I would suggest you use
> possible_cpus everywhere. Even knowing there's no CPU hotplug on Hyper-
> V at this moment, it can appear later and we'll get a hard-to-find issue.
> Moreover, we may consider using netvsc driver on e.g. KVM with Hyper-V
> enlightenments and KVM has CPU hotplug already.

That would cause the output to be very long and useless.
I will submit a revision that allocates for num_possible_cpus, but only copy out stats for each present cpu.

-Yidong

^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH v3] hv_netvsc: Add per-cpu ethtool stats for netvsc
@ 2018-07-25 22:54         ` Yidong Ren
  0 siblings, 0 replies; 18+ messages in thread
From: Yidong Ren @ 2018-07-25 22:54 UTC (permalink / raw)
  To: Vitaly Kuznetsov
  Cc: Stephen Hemminger, netdev, Haiyang Zhang, linux-kernel,
	Madhan Sivakumar, devel, David S. Miller

> From: Vitaly Kuznetsov <vkuznets@redhat.com>
> While you do for_each_present_cpu() in netvsc_get_ethtool_stats(),
> netvsc_get_pcpu_stats() does for_each_possible_cpu(). This looks
> inconsistent.

I made a mistake there.  Thanks for catch me.
 
> The allocation you're doing here is short-lived so I would suggest you use
> possible_cpus everywhere. Even knowing there's no CPU hotplug on Hyper-
> V at this moment, it can appear later and we'll get a hard-to-find issue.
> Moreover, we may consider using netvsc driver on e.g. KVM with Hyper-V
> enlightenments and KVM has CPU hotplug already.

That would cause the output to be very long and useless.
I will submit a revision that allocates for num_possible_cpus, but only copy out stats for each present cpu.

-Yidong

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v4 net-next] hv_netvsc: Add per-cpu ethtool stats for netvsc
  2018-07-24  1:26   ` Yidong Ren
  (?)
  (?)
@ 2018-07-30 17:09   ` Yidong Ren
  2018-07-30 17:34     ` Stephen Hemminger
  2018-07-30 19:34     ` David Miller
  -1 siblings, 2 replies; 18+ messages in thread
From: Yidong Ren @ 2018-07-30 17:09 UTC (permalink / raw)
  To: K. Y. Srinivasan, Haiyang Zhang, Stephen Hemminger,
	David S. Miller, devel, netdev, linux-kernel
  Cc: madhans, Yidong Ren

From: Yidong Ren <yidren@microsoft.com>

This patch implements following ethtool stats fields for netvsc:
cpu<n>_tx/rx_packets/bytes
cpu<n>_vf_tx/rx_packets/bytes

Corresponding per-cpu counters already exist in current code. Exposing
these counters will help troubleshooting performance issues.

for_each_present_cpu() was used instead of for_each_possible_cpu().
for_each_possible_cpu() would create very long and useless output.
It is still being used for internal buffer, but not for ethtool
output.

There could be an overflow if cpu was added between ethtool
call netvsc_get_sset_count() and netvsc_get_ethtool_stats() and
netvsc_get_strings(). (still safe if cpu was removed)
ethtool makes these three function calls separately.
As long as we use ethtool, I can't see any clean solution.

Currently and in foreseeable short term, Hyper-V doesn't support
cpu hot-plug. Plus, ethtool is for admin use. Unlikely the admin
would perform such combo operations.

Signed-off-by: Yidong Ren <yidren@microsoft.com>
---
Changes in v2:
  - Remove cpp style comment
  - Resubmit after freeze

Changes in v3:
  - Reimplemented with kvmalloc instead of alloc_percpu

Changes in v4:
  - Fixed inconsistent array size
  - Use kvmalloc_array instead of kvmalloc

 drivers/net/hyperv/hyperv_net.h |  11 ++++
 drivers/net/hyperv/netvsc_drv.c | 106 +++++++++++++++++++++++++++++++-
 2 files changed, 114 insertions(+), 3 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 4b6e308199d2..a32ded5b4f41 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -873,6 +873,17 @@ struct netvsc_ethtool_stats {
 	unsigned long wake_queue;
 };
 
+struct netvsc_ethtool_pcpu_stats {
+	u64     rx_packets;
+	u64     rx_bytes;
+	u64     tx_packets;
+	u64     tx_bytes;
+	u64     vf_rx_packets;
+	u64     vf_rx_bytes;
+	u64     vf_tx_packets;
+	u64     vf_tx_bytes;
+};
+
 struct netvsc_vf_pcpu_stats {
 	u64     rx_packets;
 	u64     rx_bytes;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index dd1d6e115145..805388bfbce7 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1118,6 +1118,64 @@ static void netvsc_get_vf_stats(struct net_device *net,
 	}
 }
 
+static void netvsc_get_pcpu_stats(struct net_device *net,
+				  struct netvsc_ethtool_pcpu_stats *pcpu_tot)
+{
+	struct net_device_context *ndev_ctx = netdev_priv(net);
+	struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev);
+	int i;
+
+	/* fetch percpu stats of vf */
+	for_each_possible_cpu(i) {
+		const struct netvsc_vf_pcpu_stats *stats =
+			per_cpu_ptr(ndev_ctx->vf_stats, i);
+		struct netvsc_ethtool_pcpu_stats *this_tot = &pcpu_tot[i];
+		unsigned int start;
+
+		do {
+			start = u64_stats_fetch_begin_irq(&stats->syncp);
+			this_tot->vf_rx_packets = stats->rx_packets;
+			this_tot->vf_tx_packets = stats->tx_packets;
+			this_tot->vf_rx_bytes = stats->rx_bytes;
+			this_tot->vf_tx_bytes = stats->tx_bytes;
+		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+		this_tot->rx_packets = this_tot->vf_rx_packets;
+		this_tot->tx_packets = this_tot->vf_tx_packets;
+		this_tot->rx_bytes   = this_tot->vf_rx_bytes;
+		this_tot->tx_bytes   = this_tot->vf_tx_bytes;
+	}
+
+	/* fetch percpu stats of netvsc */
+	for (i = 0; i < nvdev->num_chn; i++) {
+		const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
+		const struct netvsc_stats *stats;
+		struct netvsc_ethtool_pcpu_stats *this_tot =
+			&pcpu_tot[nvchan->channel->target_cpu];
+		u64 packets, bytes;
+		unsigned int start;
+
+		stats = &nvchan->tx_stats;
+		do {
+			start = u64_stats_fetch_begin_irq(&stats->syncp);
+			packets = stats->packets;
+			bytes = stats->bytes;
+		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+		this_tot->tx_bytes	+= bytes;
+		this_tot->tx_packets	+= packets;
+
+		stats = &nvchan->rx_stats;
+		do {
+			start = u64_stats_fetch_begin_irq(&stats->syncp);
+			packets = stats->packets;
+			bytes = stats->bytes;
+		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+		this_tot->rx_bytes	+= bytes;
+		this_tot->rx_packets	+= packets;
+	}
+}
+
 static void netvsc_get_stats64(struct net_device *net,
 			       struct rtnl_link_stats64 *t)
 {
@@ -1215,6 +1273,23 @@ static const struct {
 	{ "rx_no_memory", offsetof(struct netvsc_ethtool_stats, rx_no_memory) },
 	{ "stop_queue", offsetof(struct netvsc_ethtool_stats, stop_queue) },
 	{ "wake_queue", offsetof(struct netvsc_ethtool_stats, wake_queue) },
+}, pcpu_stats[] = {
+	{ "cpu%u_rx_packets",
+		offsetof(struct netvsc_ethtool_pcpu_stats, rx_packets) },
+	{ "cpu%u_rx_bytes",
+		offsetof(struct netvsc_ethtool_pcpu_stats, rx_bytes) },
+	{ "cpu%u_tx_packets",
+		offsetof(struct netvsc_ethtool_pcpu_stats, tx_packets) },
+	{ "cpu%u_tx_bytes",
+		offsetof(struct netvsc_ethtool_pcpu_stats, tx_bytes) },
+	{ "cpu%u_vf_rx_packets",
+		offsetof(struct netvsc_ethtool_pcpu_stats, vf_rx_packets) },
+	{ "cpu%u_vf_rx_bytes",
+		offsetof(struct netvsc_ethtool_pcpu_stats, vf_rx_bytes) },
+	{ "cpu%u_vf_tx_packets",
+		offsetof(struct netvsc_ethtool_pcpu_stats, vf_tx_packets) },
+	{ "cpu%u_vf_tx_bytes",
+		offsetof(struct netvsc_ethtool_pcpu_stats, vf_tx_bytes) },
 }, vf_stats[] = {
 	{ "vf_rx_packets", offsetof(struct netvsc_vf_pcpu_stats, rx_packets) },
 	{ "vf_rx_bytes",   offsetof(struct netvsc_vf_pcpu_stats, rx_bytes) },
@@ -1226,6 +1301,9 @@ static const struct {
 #define NETVSC_GLOBAL_STATS_LEN	ARRAY_SIZE(netvsc_stats)
 #define NETVSC_VF_STATS_LEN	ARRAY_SIZE(vf_stats)
 
+/* statistics per queue (rx/tx packets/bytes) */
+#define NETVSC_PCPU_STATS_LEN (num_present_cpus() * ARRAY_SIZE(pcpu_stats))
+
 /* 4 statistics per queue (rx/tx packets/bytes) */
 #define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 4)
 
@@ -1241,7 +1319,8 @@ static int netvsc_get_sset_count(struct net_device *dev, int string_set)
 	case ETH_SS_STATS:
 		return NETVSC_GLOBAL_STATS_LEN
 			+ NETVSC_VF_STATS_LEN
-			+ NETVSC_QUEUE_STATS_LEN(nvdev);
+			+ NETVSC_QUEUE_STATS_LEN(nvdev)
+			+ NETVSC_PCPU_STATS_LEN;
 	default:
 		return -EINVAL;
 	}
@@ -1255,9 +1334,10 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
 	const void *nds = &ndc->eth_stats;
 	const struct netvsc_stats *qstats;
 	struct netvsc_vf_pcpu_stats sum;
+	struct netvsc_ethtool_pcpu_stats *pcpu_sum;
 	unsigned int start;
 	u64 packets, bytes;
-	int i, j;
+	int i, j, cpu;
 
 	if (!nvdev)
 		return;
@@ -1289,6 +1369,19 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
 		data[i++] = packets;
 		data[i++] = bytes;
 	}
+
+	pcpu_sum = kvmalloc_array(num_possible_cpus(),
+				  sizeof(struct netvsc_ethtool_pcpu_stats),
+				  GFP_KERNEL);
+	netvsc_get_pcpu_stats(dev, pcpu_sum);
+	for_each_present_cpu(cpu) {
+		struct netvsc_ethtool_pcpu_stats *this_sum = &pcpu_sum[cpu];
+
+		for (j = 0; j < ARRAY_SIZE(pcpu_stats); j++)
+			data[i++] = *(u64 *)((void *)this_sum
+					     + pcpu_stats[j].offset);
+	}
+	kvfree(pcpu_sum);
 }
 
 static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
@@ -1296,7 +1389,7 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 	struct net_device_context *ndc = netdev_priv(dev);
 	struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
 	u8 *p = data;
-	int i;
+	int i, cpu;
 
 	if (!nvdev)
 		return;
@@ -1324,6 +1417,13 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 			p += ETH_GSTRING_LEN;
 		}
 
+		for_each_present_cpu(cpu) {
+			for (i = 0; i < ARRAY_SIZE(pcpu_stats); i++) {
+				sprintf(p, pcpu_stats[i].name, cpu);
+				p += ETH_GSTRING_LEN;
+			}
+		}
+
 		break;
 	}
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v4 net-next] hv_netvsc: Add per-cpu ethtool stats for netvsc
  2018-07-30 17:09   ` [PATCH v4 net-next] " Yidong Ren
@ 2018-07-30 17:34     ` Stephen Hemminger
  2018-07-30 19:34     ` David Miller
  1 sibling, 0 replies; 18+ messages in thread
From: Stephen Hemminger @ 2018-07-30 17:34 UTC (permalink / raw)
  To: Yidong Ren
  Cc: Yidong Ren, K. Y. Srinivasan, Haiyang Zhang, Stephen Hemminger,
	David S. Miller, devel, netdev, linux-kernel, madhans

On Mon, 30 Jul 2018 17:09:45 +0000
Yidong Ren <yidren@linuxonhyperv.com> wrote:

> From: Yidong Ren <yidren@microsoft.com>
> 
> This patch implements following ethtool stats fields for netvsc:
> cpu<n>_tx/rx_packets/bytes
> cpu<n>_vf_tx/rx_packets/bytes
> 
> Corresponding per-cpu counters already exist in current code. Exposing
> these counters will help troubleshooting performance issues.
> 
> for_each_present_cpu() was used instead of for_each_possible_cpu().
> for_each_possible_cpu() would create very long and useless output.
> It is still being used for internal buffer, but not for ethtool
> output.
> 
> There could be an overflow if cpu was added between ethtool
> call netvsc_get_sset_count() and netvsc_get_ethtool_stats() and
> netvsc_get_strings(). (still safe if cpu was removed)
> ethtool makes these three function calls separately.
> As long as we use ethtool, I can't see any clean solution.
> 
> Currently and in foreseeable short term, Hyper-V doesn't support
> cpu hot-plug. Plus, ethtool is for admin use. Unlikely the admin
> would perform such combo operations.
> 
> Signed-off-by: Yidong Ren <yidren@microsoft.com>

Reviewed-by: Stephen Hemminger <sthemmin@microsoft.com>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v4 net-next] hv_netvsc: Add per-cpu ethtool stats for netvsc
  2018-07-30 17:09   ` [PATCH v4 net-next] " Yidong Ren
  2018-07-30 17:34     ` Stephen Hemminger
@ 2018-07-30 19:34     ` David Miller
  1 sibling, 0 replies; 18+ messages in thread
From: David Miller @ 2018-07-30 19:34 UTC (permalink / raw)
  To: yidren, yidren
  Cc: kys, haiyangz, sthemmin, devel, netdev, linux-kernel, madhans

From: Yidong Ren <yidren@linuxonhyperv.com>
Date: Mon, 30 Jul 2018 17:09:45 +0000

> From: Yidong Ren <yidren@microsoft.com>
> 
> This patch implements following ethtool stats fields for netvsc:
> cpu<n>_tx/rx_packets/bytes
> cpu<n>_vf_tx/rx_packets/bytes
> 
> Corresponding per-cpu counters already exist in current code. Exposing
> these counters will help troubleshooting performance issues.
> 
> for_each_present_cpu() was used instead of for_each_possible_cpu().
> for_each_possible_cpu() would create very long and useless output.
> It is still being used for internal buffer, but not for ethtool
> output.
> 
> There could be an overflow if cpu was added between ethtool
> call netvsc_get_sset_count() and netvsc_get_ethtool_stats() and
> netvsc_get_strings(). (still safe if cpu was removed)
> ethtool makes these three function calls separately.
> As long as we use ethtool, I can't see any clean solution.
> 
> Currently and in foreseeable short term, Hyper-V doesn't support
> cpu hot-plug. Plus, ethtool is for admin use. Unlikely the admin
> would perform such combo operations.
> 
> Signed-off-by: Yidong Ren <yidren@microsoft.com>

Applied, thank you.

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2018-07-30 19:34 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-13 19:36 [PATCH v2] hv_netvsc: Add per-cpu ethtool stats for netvsc Yidong Ren
2018-06-13 19:36 ` Yidong Ren
2018-06-13 20:57 ` Eric Dumazet
2018-06-13 20:57   ` Eric Dumazet
2018-06-13 21:07   ` Yidong Ren
2018-06-13 21:48 ` Stephen Hemminger
2018-06-13 22:03   ` Yidong Ren
2018-06-13 22:18     ` Stephen Hemminger
2018-07-24  1:26 ` [PATCH v3] " Yidong Ren
2018-07-24  1:26   ` Yidong Ren
2018-07-24  1:42   ` Yidong Ren
2018-07-24 11:00     ` Vitaly Kuznetsov
2018-07-24 11:00       ` Vitaly Kuznetsov
2018-07-25 22:54       ` Yidong Ren
2018-07-25 22:54         ` Yidong Ren
2018-07-30 17:09   ` [PATCH v4 net-next] " Yidong Ren
2018-07-30 17:34     ` Stephen Hemminger
2018-07-30 19:34     ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.