* [PATCH net-next,1/1] hv_netvsc: use per_cpu stats to calculate TX/RX data
@ 2015-05-12 22:50 sixiao
2015-05-14 4:52 ` David Miller
0 siblings, 1 reply; 4+ messages in thread
From: sixiao @ 2015-05-12 22:50 UTC (permalink / raw)
To: kys, haiyangz, devel, netdev, linux-kernel; +Cc: Simon Xiao
From: Simon Xiao <sixiao@microsoft.com>
Current code does not lock anything when calculating the TX and RX stats.
As a result, the RX and TX data reported by ifconfig are not accuracy in a
system with high network throughput and multiple CPUs (in my test,
RX/TX = 83% between 2 HyperV VM nodes which have 8 vCPUs and 40G Ethernet).
This patch fixed the above issue by using per_cpu stats.
netvsc_get_stats64() summarizes TX and RX data by iterating over all CPUs
to get their respective stats.
Signed-off-by: Simon Xiao <sixiao@microsoft.com>
Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
---
drivers/net/hyperv/hyperv_net.h | 9 +++++
drivers/net/hyperv/netvsc_drv.c | 80 ++++++++++++++++++++++++++++++++++++-----
2 files changed, 81 insertions(+), 8 deletions(-)
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 41071d3..5a92b36 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -611,6 +611,12 @@ struct multi_send_data {
u32 count; /* counter of batched packets */
};
+struct netvsc_stats {
+ u64 packets;
+ u64 bytes;
+ struct u64_stats_sync s_sync;
+};
+
/* The context of the netvsc device */
struct net_device_context {
/* point back to our device context */
@@ -618,6 +624,9 @@ struct net_device_context {
struct delayed_work dwork;
struct work_struct work;
u32 msg_enable; /* debug level */
+
+ struct netvsc_stats __percpu *tx_stats;
+ struct netvsc_stats __percpu *rx_stats;
};
/* Per netvsc device */
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 5993c7e..310b902 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -391,7 +391,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
u32 skb_length;
u32 pkt_sz;
struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT];
-
+ struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats);
/* We will atmost need two pages to describe the rndis
* header. We can only transmit MAX_PAGE_BUFFER_COUNT number
@@ -580,8 +580,10 @@ do_send:
drop:
if (ret == 0) {
- net->stats.tx_bytes += skb_length;
- net->stats.tx_packets++;
+ u64_stats_update_begin(&tx_stats->s_sync);
+ tx_stats->packets++;
+ tx_stats->bytes += skb_length;
+ u64_stats_update_end(&tx_stats->s_sync);
} else {
if (ret != -EAGAIN) {
dev_kfree_skb_any(skb);
@@ -644,13 +646,17 @@ int netvsc_recv_callback(struct hv_device *device_obj,
struct ndis_tcp_ip_checksum_info *csum_info)
{
struct net_device *net;
+ struct net_device_context *net_device_ctx;
struct sk_buff *skb;
+ struct netvsc_stats *rx_stats;
net = ((struct netvsc_device *)hv_get_drvdata(device_obj))->ndev;
if (!net || net->reg_state != NETREG_REGISTERED) {
packet->status = NVSP_STAT_FAIL;
return 0;
}
+ net_device_ctx = netdev_priv(net);
+ rx_stats = this_cpu_ptr(net_device_ctx->rx_stats);
/* Allocate a skb - TODO direct I/O to pages? */
skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen);
@@ -686,8 +692,10 @@ int netvsc_recv_callback(struct hv_device *device_obj,
skb_record_rx_queue(skb, packet->channel->
offermsg.offer.sub_channel_index);
- net->stats.rx_packets++;
- net->stats.rx_bytes += packet->total_data_buflen;
+ u64_stats_update_begin(&rx_stats->s_sync);
+ rx_stats->packets++;
+ rx_stats->bytes += packet->total_data_buflen;
+ u64_stats_update_end(&rx_stats->s_sync);
/*
* Pass the skb back up. Network stack will deallocate the skb when it
@@ -753,6 +761,46 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
return 0;
}
+static struct rtnl_link_stats64 *netvsc_get_stats64(struct net_device *net,
+ struct rtnl_link_stats64 *t)
+{
+ struct net_device_context *ndev_ctx = netdev_priv(net);
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct netvsc_stats *tx_stats = per_cpu_ptr(ndev_ctx->tx_stats,
+ cpu);
+ struct netvsc_stats *rx_stats = per_cpu_ptr(ndev_ctx->rx_stats,
+ cpu);
+ u64 tx_packets, tx_bytes, rx_packets, rx_bytes;
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin_irq(&tx_stats->s_sync);
+ tx_packets = tx_stats->packets;
+ tx_bytes = tx_stats->bytes;
+ } while (u64_stats_fetch_retry_irq(&tx_stats->s_sync, start));
+
+ do {
+ start = u64_stats_fetch_begin_irq(&rx_stats->s_sync);
+ rx_packets = rx_stats->packets;
+ rx_bytes = rx_stats->bytes;
+ } while (u64_stats_fetch_retry_irq(&rx_stats->s_sync, start));
+
+ t->tx_bytes += tx_bytes;
+ t->tx_packets += tx_packets;
+ t->rx_bytes += rx_bytes;
+ t->rx_packets += rx_packets;
+ }
+
+ t->tx_dropped = net->stats.tx_dropped;
+ t->tx_errors = net->stats.tx_dropped;
+
+ t->rx_dropped = net->stats.rx_dropped;
+ t->rx_errors = net->stats.rx_errors;
+
+ return t;
+}
static int netvsc_set_mac_addr(struct net_device *ndev, void *p)
{
@@ -804,6 +852,7 @@ static const struct net_device_ops device_ops = {
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = netvsc_set_mac_addr,
.ndo_select_queue = netvsc_select_queue,
+ .ndo_get_stats64 = netvsc_get_stats64,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = netvsc_poll_controller,
#endif
@@ -855,6 +904,14 @@ static void netvsc_link_change(struct work_struct *w)
netdev_notify_peers(net);
}
+static void netvsc_free_netdev(struct net_device *netdev)
+{
+ struct net_device_context *net_device_ctx = netdev_priv(netdev);
+
+ free_percpu(net_device_ctx->tx_stats);
+ free_percpu(net_device_ctx->rx_stats);
+ free_netdev(netdev);
+}
static int netvsc_probe(struct hv_device *dev,
const struct hv_vmbus_device_id *dev_id)
@@ -883,6 +940,13 @@ static int netvsc_probe(struct hv_device *dev,
netdev_dbg(net, "netvsc msg_enable: %d\n",
net_device_ctx->msg_enable);
+ net_device_ctx->tx_stats = netdev_alloc_pcpu_stats(struct netvsc_stats);
+ if (!net_device_ctx->tx_stats)
+ return -ENOMEM;
+ net_device_ctx->rx_stats = netdev_alloc_pcpu_stats(struct netvsc_stats);
+ if (!net_device_ctx->rx_stats)
+ return -ENOMEM;
+
hv_set_drvdata(dev, net);
INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change);
INIT_WORK(&net_device_ctx->work, do_set_multicast);
@@ -909,7 +973,7 @@ static int netvsc_probe(struct hv_device *dev,
ret = rndis_filter_device_add(dev, &device_info);
if (ret != 0) {
netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
- free_netdev(net);
+ netvsc_free_netdev(net);
hv_set_drvdata(dev, NULL);
return ret;
}
@@ -923,7 +987,7 @@ static int netvsc_probe(struct hv_device *dev,
if (ret != 0) {
pr_err("Unable to register netdev.\n");
rndis_filter_device_remove(dev);
- free_netdev(net);
+ netvsc_free_netdev(net);
} else {
schedule_delayed_work(&net_device_ctx->dwork, 0);
}
@@ -962,7 +1026,7 @@ static int netvsc_remove(struct hv_device *dev)
*/
rndis_filter_device_remove(dev);
- free_netdev(net);
+ netvsc_free_netdev(net);
return 0;
}
--
1.8.5.2
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH net-next,1/1] hv_netvsc: use per_cpu stats to calculate TX/RX data
2015-05-12 22:50 [PATCH net-next,1/1] hv_netvsc: use per_cpu stats to calculate TX/RX data sixiao
@ 2015-05-14 4:52 ` David Miller
2015-05-14 6:02 ` Simon Xiao
0 siblings, 1 reply; 4+ messages in thread
From: David Miller @ 2015-05-14 4:52 UTC (permalink / raw)
To: sixiao; +Cc: kys, haiyangz, devel, netdev, linux-kernel
From: sixiao@microsoft.com
Date: Tue, 12 May 2015 15:50:02 -0700
> From: Simon Xiao <sixiao@microsoft.com>
>
> Current code does not lock anything when calculating the TX and RX stats.
> As a result, the RX and TX data reported by ifconfig are not accuracy in a
> system with high network throughput and multiple CPUs (in my test,
> RX/TX = 83% between 2 HyperV VM nodes which have 8 vCPUs and 40G Ethernet).
>
> This patch fixed the above issue by using per_cpu stats.
> netvsc_get_stats64() summarizes TX and RX data by iterating over all CPUs
> to get their respective stats.
>
> Signed-off-by: Simon Xiao <sixiao@microsoft.com>
> Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
> Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
> ---
> drivers/net/hyperv/hyperv_net.h | 9 +++++
> drivers/net/hyperv/netvsc_drv.c | 80 ++++++++++++++++++++++++++++++++++++-----
> 2 files changed, 81 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
> index 41071d3..5a92b36 100644
> --- a/drivers/net/hyperv/hyperv_net.h
> +++ b/drivers/net/hyperv/hyperv_net.h
> @@ -611,6 +611,12 @@ struct multi_send_data {
> u32 count; /* counter of batched packets */
> };
>
> +struct netvsc_stats {
> + u64 packets;
> + u64 bytes;
> + struct u64_stats_sync s_sync;
> +};
> +
> /* The context of the netvsc device */
> struct net_device_context {
> /* point back to our device context */
> @@ -618,6 +624,9 @@ struct net_device_context {
> struct delayed_work dwork;
> struct work_struct work;
> u32 msg_enable; /* debug level */
> +
> + struct netvsc_stats __percpu *tx_stats;
> + struct netvsc_stats __percpu *rx_stats;
> };
>
> /* Per netvsc device */
> diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
> index 5993c7e..310b902 100644
> --- a/drivers/net/hyperv/netvsc_drv.c
> +++ b/drivers/net/hyperv/netvsc_drv.c
> @@ -391,7 +391,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
> u32 skb_length;
> u32 pkt_sz;
> struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT];
> -
> + struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats);
>
> /* We will atmost need two pages to describe the rndis
> * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
> @@ -580,8 +580,10 @@ do_send:
>
> drop:
> if (ret == 0) {
> - net->stats.tx_bytes += skb_length;
> - net->stats.tx_packets++;
> + u64_stats_update_begin(&tx_stats->s_sync);
> + tx_stats->packets++;
> + tx_stats->bytes += skb_length;
> + u64_stats_update_end(&tx_stats->s_sync);
> } else {
> if (ret != -EAGAIN) {
> dev_kfree_skb_any(skb);
> @@ -644,13 +646,17 @@ int netvsc_recv_callback(struct hv_device *device_obj,
> struct ndis_tcp_ip_checksum_info *csum_info)
> {
> struct net_device *net;
> + struct net_device_context *net_device_ctx;
> struct sk_buff *skb;
> + struct netvsc_stats *rx_stats;
>
> net = ((struct netvsc_device *)hv_get_drvdata(device_obj))->ndev;
> if (!net || net->reg_state != NETREG_REGISTERED) {
> packet->status = NVSP_STAT_FAIL;
> return 0;
> }
> + net_device_ctx = netdev_priv(net);
> + rx_stats = this_cpu_ptr(net_device_ctx->rx_stats);
>
> /* Allocate a skb - TODO direct I/O to pages? */
> skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen);
> @@ -686,8 +692,10 @@ int netvsc_recv_callback(struct hv_device *device_obj,
> skb_record_rx_queue(skb, packet->channel->
> offermsg.offer.sub_channel_index);
>
> - net->stats.rx_packets++;
> - net->stats.rx_bytes += packet->total_data_buflen;
> + u64_stats_update_begin(&rx_stats->s_sync);
> + rx_stats->packets++;
> + rx_stats->bytes += packet->total_data_buflen;
> + u64_stats_update_end(&rx_stats->s_sync);
>
> /*
> * Pass the skb back up. Network stack will deallocate the skb when it
> @@ -753,6 +761,46 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
> return 0;
> }
>
> +static struct rtnl_link_stats64 *netvsc_get_stats64(struct net_device *net,
> + struct rtnl_link_stats64 *t)
> +{
> + struct net_device_context *ndev_ctx = netdev_priv(net);
> + int cpu;
> +
> + for_each_possible_cpu(cpu) {
> + struct netvsc_stats *tx_stats = per_cpu_ptr(ndev_ctx->tx_stats,
> + cpu);
> + struct netvsc_stats *rx_stats = per_cpu_ptr(ndev_ctx->rx_stats,
> + cpu);
> + u64 tx_packets, tx_bytes, rx_packets, rx_bytes;
> + unsigned int start;
> +
> + do {
> + start = u64_stats_fetch_begin_irq(&tx_stats->s_sync);
> + tx_packets = tx_stats->packets;
> + tx_bytes = tx_stats->bytes;
> + } while (u64_stats_fetch_retry_irq(&tx_stats->s_sync, start));
> +
> + do {
> + start = u64_stats_fetch_begin_irq(&rx_stats->s_sync);
> + rx_packets = rx_stats->packets;
> + rx_bytes = rx_stats->bytes;
> + } while (u64_stats_fetch_retry_irq(&rx_stats->s_sync, start));
> +
> + t->tx_bytes += tx_bytes;
> + t->tx_packets += tx_packets;
> + t->rx_bytes += rx_bytes;
> + t->rx_packets += rx_packets;
> + }
> +
> + t->tx_dropped = net->stats.tx_dropped;
> + t->tx_errors = net->stats.tx_dropped;
> +
> + t->rx_dropped = net->stats.rx_dropped;
> + t->rx_errors = net->stats.rx_errors;
> +
> + return t;
> +}
>
> static int netvsc_set_mac_addr(struct net_device *ndev, void *p)
> {
> @@ -804,6 +852,7 @@ static const struct net_device_ops device_ops = {
> .ndo_validate_addr = eth_validate_addr,
> .ndo_set_mac_address = netvsc_set_mac_addr,
> .ndo_select_queue = netvsc_select_queue,
> + .ndo_get_stats64 = netvsc_get_stats64,
> #ifdef CONFIG_NET_POLL_CONTROLLER
> .ndo_poll_controller = netvsc_poll_controller,
> #endif
> @@ -855,6 +904,14 @@ static void netvsc_link_change(struct work_struct *w)
> netdev_notify_peers(net);
> }
>
> +static void netvsc_free_netdev(struct net_device *netdev)
> +{
> + struct net_device_context *net_device_ctx = netdev_priv(netdev);
> +
> + free_percpu(net_device_ctx->tx_stats);
> + free_percpu(net_device_ctx->rx_stats);
> + free_netdev(netdev);
> +}
>
> static int netvsc_probe(struct hv_device *dev,
> const struct hv_vmbus_device_id *dev_id)
> @@ -883,6 +940,13 @@ static int netvsc_probe(struct hv_device *dev,
> netdev_dbg(net, "netvsc msg_enable: %d\n",
> net_device_ctx->msg_enable);
>
> + net_device_ctx->tx_stats = netdev_alloc_pcpu_stats(struct netvsc_stats);
> + if (!net_device_ctx->tx_stats)
> + return -ENOMEM;
> + net_device_ctx->rx_stats = netdev_alloc_pcpu_stats(struct netvsc_stats);
> + if (!net_device_ctx->rx_stats)
> + return -ENOMEM;
Both of these return statements leak.
The first one leaks the net_device, the second one leaks the net_device
and the ->tx_stats memory.
You have to have cleanup paths here.
^ permalink raw reply [flat|nested] 4+ messages in thread
* RE: [PATCH net-next,1/1] hv_netvsc: use per_cpu stats to calculate TX/RX data
2015-05-14 4:52 ` David Miller
@ 2015-05-14 6:02 ` Simon Xiao
0 siblings, 0 replies; 4+ messages in thread
From: Simon Xiao @ 2015-05-14 6:02 UTC (permalink / raw)
To: David Miller; +Cc: KY Srinivasan, Haiyang Zhang, devel, netdev, linux-kernel
> -----Original Message-----
> From: David Miller [mailto:davem@davemloft.net]
> Sent: Wednesday, May 13, 2015 9:52 PM
> To: Simon Xiao
> Cc: KY Srinivasan; Haiyang Zhang; devel@linuxdriverproject.org;
> netdev@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH net-next,1/1] hv_netvsc: use per_cpu stats to calculate
> TX/RX data
>
> From: sixiao@microsoft.com
> Date: Tue, 12 May 2015 15:50:02 -0700
>
> > From: Simon Xiao <sixiao@microsoft.com>
> >
> > Current code does not lock anything when calculating the TX and RX stats.
> > As a result, the RX and TX data reported by ifconfig are not accuracy
> > in a system with high network throughput and multiple CPUs (in my
> > test, RX/TX = 83% between 2 HyperV VM nodes which have 8 vCPUs and 40G
> Ethernet).
> >
> > This patch fixed the above issue by using per_cpu stats.
> > netvsc_get_stats64() summarizes TX and RX data by iterating over all
> > CPUs to get their respective stats.
> >
> > Signed-off-by: Simon Xiao <sixiao@microsoft.com>
> > Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
> > Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
> > ---
> > drivers/net/hyperv/hyperv_net.h | 9 +++++
> > drivers/net/hyperv/netvsc_drv.c | 80
> > ++++++++++++++++++++++++++++++++++++-----
> > 2 files changed, 81 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/net/hyperv/hyperv_net.h
> > b/drivers/net/hyperv/hyperv_net.h index 41071d3..5a92b36 100644
> > --- a/drivers/net/hyperv/hyperv_net.h
> > +++ b/drivers/net/hyperv/hyperv_net.h
> > @@ -611,6 +611,12 @@ struct multi_send_data {
> > u32 count; /* counter of batched packets */ };
> >
> > +struct netvsc_stats {
> > + u64 packets;
> > + u64 bytes;
> > + struct u64_stats_sync s_sync;
> > +};
> > +
> > /* The context of the netvsc device */ struct net_device_context {
> > /* point back to our device context */ @@ -618,6 +624,9 @@ struct
> > net_device_context {
> > struct delayed_work dwork;
> > struct work_struct work;
> > u32 msg_enable; /* debug level */
> > +
> > + struct netvsc_stats __percpu *tx_stats;
> > + struct netvsc_stats __percpu *rx_stats;
> > };
> >
> > /* Per netvsc device */
> > diff --git a/drivers/net/hyperv/netvsc_drv.c
> > b/drivers/net/hyperv/netvsc_drv.c index 5993c7e..310b902 100644
> > --- a/drivers/net/hyperv/netvsc_drv.c
> > +++ b/drivers/net/hyperv/netvsc_drv.c
> > @@ -391,7 +391,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct
> net_device *net)
> > u32 skb_length;
> > u32 pkt_sz;
> > struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT];
> > -
> > + struct netvsc_stats *tx_stats =
> > +this_cpu_ptr(net_device_ctx->tx_stats);
> >
> > /* We will atmost need two pages to describe the rndis
> > * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
> @@
> > -580,8 +580,10 @@ do_send:
> >
> > drop:
> > if (ret == 0) {
> > - net->stats.tx_bytes += skb_length;
> > - net->stats.tx_packets++;
> > + u64_stats_update_begin(&tx_stats->s_sync);
> > + tx_stats->packets++;
> > + tx_stats->bytes += skb_length;
> > + u64_stats_update_end(&tx_stats->s_sync);
> > } else {
> > if (ret != -EAGAIN) {
> > dev_kfree_skb_any(skb);
> > @@ -644,13 +646,17 @@ int netvsc_recv_callback(struct hv_device
> *device_obj,
> > struct ndis_tcp_ip_checksum_info *csum_info)
> {
> > struct net_device *net;
> > + struct net_device_context *net_device_ctx;
> > struct sk_buff *skb;
> > + struct netvsc_stats *rx_stats;
> >
> > net = ((struct netvsc_device *)hv_get_drvdata(device_obj))->ndev;
> > if (!net || net->reg_state != NETREG_REGISTERED) {
> > packet->status = NVSP_STAT_FAIL;
> > return 0;
> > }
> > + net_device_ctx = netdev_priv(net);
> > + rx_stats = this_cpu_ptr(net_device_ctx->rx_stats);
> >
> > /* Allocate a skb - TODO direct I/O to pages? */
> > skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen); @@
> > -686,8 +692,10 @@ int netvsc_recv_callback(struct hv_device *device_obj,
> > skb_record_rx_queue(skb, packet->channel->
> > offermsg.offer.sub_channel_index);
> >
> > - net->stats.rx_packets++;
> > - net->stats.rx_bytes += packet->total_data_buflen;
> > + u64_stats_update_begin(&rx_stats->s_sync);
> > + rx_stats->packets++;
> > + rx_stats->bytes += packet->total_data_buflen;
> > + u64_stats_update_end(&rx_stats->s_sync);
> >
> > /*
> > * Pass the skb back up. Network stack will deallocate the skb when
> > it @@ -753,6 +761,46 @@ static int netvsc_change_mtu(struct net_device
> *ndev, int mtu)
> > return 0;
> > }
> >
> > +static struct rtnl_link_stats64 *netvsc_get_stats64(struct net_device *net,
> > + struct rtnl_link_stats64 *t) {
> > + struct net_device_context *ndev_ctx = netdev_priv(net);
> > + int cpu;
> > +
> > + for_each_possible_cpu(cpu) {
> > + struct netvsc_stats *tx_stats = per_cpu_ptr(ndev_ctx->tx_stats,
> > + cpu);
> > + struct netvsc_stats *rx_stats = per_cpu_ptr(ndev_ctx->rx_stats,
> > + cpu);
> > + u64 tx_packets, tx_bytes, rx_packets, rx_bytes;
> > + unsigned int start;
> > +
> > + do {
> > + start = u64_stats_fetch_begin_irq(&tx_stats->s_sync);
> > + tx_packets = tx_stats->packets;
> > + tx_bytes = tx_stats->bytes;
> > + } while (u64_stats_fetch_retry_irq(&tx_stats->s_sync, start));
> > +
> > + do {
> > + start = u64_stats_fetch_begin_irq(&rx_stats->s_sync);
> > + rx_packets = rx_stats->packets;
> > + rx_bytes = rx_stats->bytes;
> > + } while (u64_stats_fetch_retry_irq(&rx_stats->s_sync, start));
> > +
> > + t->tx_bytes += tx_bytes;
> > + t->tx_packets += tx_packets;
> > + t->rx_bytes += rx_bytes;
> > + t->rx_packets += rx_packets;
> > + }
> > +
> > + t->tx_dropped = net->stats.tx_dropped;
> > + t->tx_errors = net->stats.tx_dropped;
> > +
> > + t->rx_dropped = net->stats.rx_dropped;
> > + t->rx_errors = net->stats.rx_errors;
> > +
> > + return t;
> > +}
> >
> > static int netvsc_set_mac_addr(struct net_device *ndev, void *p) {
> > @@ -804,6 +852,7 @@ static const struct net_device_ops device_ops = {
> > .ndo_validate_addr = eth_validate_addr,
> > .ndo_set_mac_address = netvsc_set_mac_addr,
> > .ndo_select_queue = netvsc_select_queue,
> > + .ndo_get_stats64 = netvsc_get_stats64,
> > #ifdef CONFIG_NET_POLL_CONTROLLER
> > .ndo_poll_controller = netvsc_poll_controller,
> > #endif
> > @@ -855,6 +904,14 @@ static void netvsc_link_change(struct work_struct *w)
> > netdev_notify_peers(net);
> > }
> >
> > +static void netvsc_free_netdev(struct net_device *netdev) {
> > + struct net_device_context *net_device_ctx = netdev_priv(netdev);
> > +
> > + free_percpu(net_device_ctx->tx_stats);
> > + free_percpu(net_device_ctx->rx_stats);
> > + free_netdev(netdev);
> > +}
> >
> > static int netvsc_probe(struct hv_device *dev,
> > const struct hv_vmbus_device_id *dev_id) @@ -883,6
> +940,13 @@
> > static int netvsc_probe(struct hv_device *dev,
> > netdev_dbg(net, "netvsc msg_enable: %d\n",
> > net_device_ctx->msg_enable);
> >
> > + net_device_ctx->tx_stats = netdev_alloc_pcpu_stats(struct
> netvsc_stats);
> > + if (!net_device_ctx->tx_stats)
> > + return -ENOMEM;
> > + net_device_ctx->rx_stats = netdev_alloc_pcpu_stats(struct
> netvsc_stats);
> > + if (!net_device_ctx->rx_stats)
> > + return -ENOMEM;
>
> Both of these return statements leak.
>
> The first one leaks the net_device, the second one leaks the net_device and the -
> >tx_stats memory.
>
> You have to have cleanup paths here.
Thanks David. I will fix this and submit the new patch again.
^ permalink raw reply [flat|nested] 4+ messages in thread
* RE: [PATCH net-next,1/1] hv_netvsc: use per_cpu stats to calculate TX/RX data
@ 2015-05-14 6:02 ` Simon Xiao
0 siblings, 0 replies; 4+ messages in thread
From: Simon Xiao @ 2015-05-14 6:02 UTC (permalink / raw)
To: David Miller; +Cc: devel, Haiyang Zhang, linux-kernel, netdev
> -----Original Message-----
> From: David Miller [mailto:davem@davemloft.net]
> Sent: Wednesday, May 13, 2015 9:52 PM
> To: Simon Xiao
> Cc: KY Srinivasan; Haiyang Zhang; devel@linuxdriverproject.org;
> netdev@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH net-next,1/1] hv_netvsc: use per_cpu stats to calculate
> TX/RX data
>
> From: sixiao@microsoft.com
> Date: Tue, 12 May 2015 15:50:02 -0700
>
> > From: Simon Xiao <sixiao@microsoft.com>
> >
> > Current code does not lock anything when calculating the TX and RX stats.
> > As a result, the RX and TX data reported by ifconfig are not accuracy
> > in a system with high network throughput and multiple CPUs (in my
> > test, RX/TX = 83% between 2 HyperV VM nodes which have 8 vCPUs and 40G
> Ethernet).
> >
> > This patch fixed the above issue by using per_cpu stats.
> > netvsc_get_stats64() summarizes TX and RX data by iterating over all
> > CPUs to get their respective stats.
> >
> > Signed-off-by: Simon Xiao <sixiao@microsoft.com>
> > Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
> > Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
> > ---
> > drivers/net/hyperv/hyperv_net.h | 9 +++++
> > drivers/net/hyperv/netvsc_drv.c | 80
> > ++++++++++++++++++++++++++++++++++++-----
> > 2 files changed, 81 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/net/hyperv/hyperv_net.h
> > b/drivers/net/hyperv/hyperv_net.h index 41071d3..5a92b36 100644
> > --- a/drivers/net/hyperv/hyperv_net.h
> > +++ b/drivers/net/hyperv/hyperv_net.h
> > @@ -611,6 +611,12 @@ struct multi_send_data {
> > u32 count; /* counter of batched packets */ };
> >
> > +struct netvsc_stats {
> > + u64 packets;
> > + u64 bytes;
> > + struct u64_stats_sync s_sync;
> > +};
> > +
> > /* The context of the netvsc device */ struct net_device_context {
> > /* point back to our device context */ @@ -618,6 +624,9 @@ struct
> > net_device_context {
> > struct delayed_work dwork;
> > struct work_struct work;
> > u32 msg_enable; /* debug level */
> > +
> > + struct netvsc_stats __percpu *tx_stats;
> > + struct netvsc_stats __percpu *rx_stats;
> > };
> >
> > /* Per netvsc device */
> > diff --git a/drivers/net/hyperv/netvsc_drv.c
> > b/drivers/net/hyperv/netvsc_drv.c index 5993c7e..310b902 100644
> > --- a/drivers/net/hyperv/netvsc_drv.c
> > +++ b/drivers/net/hyperv/netvsc_drv.c
> > @@ -391,7 +391,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct
> net_device *net)
> > u32 skb_length;
> > u32 pkt_sz;
> > struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT];
> > -
> > + struct netvsc_stats *tx_stats =
> > +this_cpu_ptr(net_device_ctx->tx_stats);
> >
> > /* We will atmost need two pages to describe the rndis
> > * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
> @@
> > -580,8 +580,10 @@ do_send:
> >
> > drop:
> > if (ret == 0) {
> > - net->stats.tx_bytes += skb_length;
> > - net->stats.tx_packets++;
> > + u64_stats_update_begin(&tx_stats->s_sync);
> > + tx_stats->packets++;
> > + tx_stats->bytes += skb_length;
> > + u64_stats_update_end(&tx_stats->s_sync);
> > } else {
> > if (ret != -EAGAIN) {
> > dev_kfree_skb_any(skb);
> > @@ -644,13 +646,17 @@ int netvsc_recv_callback(struct hv_device
> *device_obj,
> > struct ndis_tcp_ip_checksum_info *csum_info)
> {
> > struct net_device *net;
> > + struct net_device_context *net_device_ctx;
> > struct sk_buff *skb;
> > + struct netvsc_stats *rx_stats;
> >
> > net = ((struct netvsc_device *)hv_get_drvdata(device_obj))->ndev;
> > if (!net || net->reg_state != NETREG_REGISTERED) {
> > packet->status = NVSP_STAT_FAIL;
> > return 0;
> > }
> > + net_device_ctx = netdev_priv(net);
> > + rx_stats = this_cpu_ptr(net_device_ctx->rx_stats);
> >
> > /* Allocate a skb - TODO direct I/O to pages? */
> > skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen); @@
> > -686,8 +692,10 @@ int netvsc_recv_callback(struct hv_device *device_obj,
> > skb_record_rx_queue(skb, packet->channel->
> > offermsg.offer.sub_channel_index);
> >
> > - net->stats.rx_packets++;
> > - net->stats.rx_bytes += packet->total_data_buflen;
> > + u64_stats_update_begin(&rx_stats->s_sync);
> > + rx_stats->packets++;
> > + rx_stats->bytes += packet->total_data_buflen;
> > + u64_stats_update_end(&rx_stats->s_sync);
> >
> > /*
> > * Pass the skb back up. Network stack will deallocate the skb when
> > it @@ -753,6 +761,46 @@ static int netvsc_change_mtu(struct net_device
> *ndev, int mtu)
> > return 0;
> > }
> >
> > +static struct rtnl_link_stats64 *netvsc_get_stats64(struct net_device *net,
> > + struct rtnl_link_stats64 *t) {
> > + struct net_device_context *ndev_ctx = netdev_priv(net);
> > + int cpu;
> > +
> > + for_each_possible_cpu(cpu) {
> > + struct netvsc_stats *tx_stats = per_cpu_ptr(ndev_ctx->tx_stats,
> > + cpu);
> > + struct netvsc_stats *rx_stats = per_cpu_ptr(ndev_ctx->rx_stats,
> > + cpu);
> > + u64 tx_packets, tx_bytes, rx_packets, rx_bytes;
> > + unsigned int start;
> > +
> > + do {
> > + start = u64_stats_fetch_begin_irq(&tx_stats->s_sync);
> > + tx_packets = tx_stats->packets;
> > + tx_bytes = tx_stats->bytes;
> > + } while (u64_stats_fetch_retry_irq(&tx_stats->s_sync, start));
> > +
> > + do {
> > + start = u64_stats_fetch_begin_irq(&rx_stats->s_sync);
> > + rx_packets = rx_stats->packets;
> > + rx_bytes = rx_stats->bytes;
> > + } while (u64_stats_fetch_retry_irq(&rx_stats->s_sync, start));
> > +
> > + t->tx_bytes += tx_bytes;
> > + t->tx_packets += tx_packets;
> > + t->rx_bytes += rx_bytes;
> > + t->rx_packets += rx_packets;
> > + }
> > +
> > + t->tx_dropped = net->stats.tx_dropped;
> > + t->tx_errors = net->stats.tx_dropped;
> > +
> > + t->rx_dropped = net->stats.rx_dropped;
> > + t->rx_errors = net->stats.rx_errors;
> > +
> > + return t;
> > +}
> >
> > static int netvsc_set_mac_addr(struct net_device *ndev, void *p) {
> > @@ -804,6 +852,7 @@ static const struct net_device_ops device_ops = {
> > .ndo_validate_addr = eth_validate_addr,
> > .ndo_set_mac_address = netvsc_set_mac_addr,
> > .ndo_select_queue = netvsc_select_queue,
> > + .ndo_get_stats64 = netvsc_get_stats64,
> > #ifdef CONFIG_NET_POLL_CONTROLLER
> > .ndo_poll_controller = netvsc_poll_controller,
> > #endif
> > @@ -855,6 +904,14 @@ static void netvsc_link_change(struct work_struct *w)
> > netdev_notify_peers(net);
> > }
> >
> > +static void netvsc_free_netdev(struct net_device *netdev) {
> > + struct net_device_context *net_device_ctx = netdev_priv(netdev);
> > +
> > + free_percpu(net_device_ctx->tx_stats);
> > + free_percpu(net_device_ctx->rx_stats);
> > + free_netdev(netdev);
> > +}
> >
> > static int netvsc_probe(struct hv_device *dev,
> > const struct hv_vmbus_device_id *dev_id) @@ -883,6
> +940,13 @@
> > static int netvsc_probe(struct hv_device *dev,
> > netdev_dbg(net, "netvsc msg_enable: %d\n",
> > net_device_ctx->msg_enable);
> >
> > + net_device_ctx->tx_stats = netdev_alloc_pcpu_stats(struct
> netvsc_stats);
> > + if (!net_device_ctx->tx_stats)
> > + return -ENOMEM;
> > + net_device_ctx->rx_stats = netdev_alloc_pcpu_stats(struct
> netvsc_stats);
> > + if (!net_device_ctx->rx_stats)
> > + return -ENOMEM;
>
> Both of these return statements leak.
>
> The first one leaks the net_device, the second one leaks the net_device and the -
> >tx_stats memory.
>
> You have to have cleanup paths here.
Thanks David. I will fix this and submit the new patch again.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2015-05-14 6:02 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-12 22:50 [PATCH net-next,1/1] hv_netvsc: use per_cpu stats to calculate TX/RX data sixiao
2015-05-14 4:52 ` David Miller
2015-05-14 6:02 ` Simon Xiao
2015-05-14 6:02 ` Simon Xiao
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.