All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net 0/4] hv_netvsc: fixes for VF removal path
@ 2016-08-11 10:58 ` Vitaly Kuznetsov
  0 siblings, 0 replies; 31+ messages in thread
From: Vitaly Kuznetsov @ 2016-08-11 10:58 UTC (permalink / raw)
  To: netdev; +Cc: devel, linux-kernel, Haiyang Zhang, K. Y. Srinivasan

Kernel crash is reported after VF is removed and detached from netvsc
device. My investigation led me to PATCH2 of this series but PATCH1 is
required to support the change. I also noticed a couple of other issues
while debugging and I fix them with PATCH3 and PATCH4.

Please review.

Vitaly Kuznetsov (4):
  hv_netvsc: don't lose VF information
  hv_netvsc: reset vf_inject on VF removal
  hv_netvsc: protect module refcount by checking
    net_device_ctx->vf_netdev
  hv_netvsc: avoid deadlocks between rtnl lock and
    netvsc_inject_disable()

 drivers/net/hyperv/hyperv_net.h | 24 ++++-------
 drivers/net/hyperv/netvsc.c     | 19 ++++----
 drivers/net/hyperv/netvsc_drv.c | 96 ++++++++++++++++++-----------------------
 3 files changed, 59 insertions(+), 80 deletions(-)

-- 
2.7.4

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH net 0/4] hv_netvsc: fixes for VF removal path
@ 2016-08-11 10:58 ` Vitaly Kuznetsov
  0 siblings, 0 replies; 31+ messages in thread
From: Vitaly Kuznetsov @ 2016-08-11 10:58 UTC (permalink / raw)
  To: netdev; +Cc: devel, Haiyang Zhang, linux-kernel

Kernel crash is reported after VF is removed and detached from netvsc
device. My investigation led me to PATCH2 of this series but PATCH1 is
required to support the change. I also noticed a couple of other issues
while debugging and I fix them with PATCH3 and PATCH4.

Please review.

Vitaly Kuznetsov (4):
  hv_netvsc: don't lose VF information
  hv_netvsc: reset vf_inject on VF removal
  hv_netvsc: protect module refcount by checking
    net_device_ctx->vf_netdev
  hv_netvsc: avoid deadlocks between rtnl lock and
    netvsc_inject_disable()

 drivers/net/hyperv/hyperv_net.h | 24 ++++-------
 drivers/net/hyperv/netvsc.c     | 19 ++++----
 drivers/net/hyperv/netvsc_drv.c | 96 ++++++++++++++++++-----------------------
 3 files changed, 59 insertions(+), 80 deletions(-)

-- 
2.7.4

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH net 1/4] hv_netvsc: don't lose VF information
  2016-08-11 10:58 ` Vitaly Kuznetsov
@ 2016-08-11 10:58   ` Vitaly Kuznetsov
  -1 siblings, 0 replies; 31+ messages in thread
From: Vitaly Kuznetsov @ 2016-08-11 10:58 UTC (permalink / raw)
  To: netdev; +Cc: devel, linux-kernel, Haiyang Zhang, K. Y. Srinivasan

struct netvsc_device is not suitable for storing VF information as this
structure is being destroyed on MTU change / set channel operation (see
rndis_filter_device_remove()). Move all VF related stuff to struct
net_device_context which is persistent.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 drivers/net/hyperv/hyperv_net.h | 19 ++++++++--------
 drivers/net/hyperv/netvsc.c     | 19 +++++++---------
 drivers/net/hyperv/netvsc_drv.c | 49 +++++++++++++++++++++++------------------
 3 files changed, 45 insertions(+), 42 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 467fb8b..3b3ecf2 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -647,7 +647,7 @@ struct netvsc_reconfig {
 struct garp_wrk {
 	struct work_struct dwrk;
 	struct net_device *netdev;
-	struct netvsc_device *netvsc_dev;
+	struct net_device_context *net_device_ctx;
 };
 
 /* The context of the netvsc device  */
@@ -678,6 +678,15 @@ struct net_device_context {
 
 	/* the device is going away */
 	bool start_remove;
+
+	/* State to manage the associated VF interface. */
+	struct net_device *vf_netdev;
+	bool vf_inject;
+	atomic_t vf_use_cnt;
+	/* 1: allocated, serial number is valid. 0: not allocated */
+	u32 vf_alloc;
+	/* Serial number of the VF to team with */
+	u32 vf_serial;
 };
 
 /* Per netvsc device */
@@ -733,15 +742,7 @@ struct netvsc_device {
 	u32 max_pkt; /* max number of pkt in one send, e.g. 8 */
 	u32 pkt_align; /* alignment bytes, e.g. 8 */
 
-	/* 1: allocated, serial number is valid. 0: not allocated */
-	u32 vf_alloc;
-	/* Serial number of the VF to team with */
-	u32 vf_serial;
 	atomic_t open_cnt;
-	/* State to manage the associated VF interface. */
-	bool vf_inject;
-	struct net_device *vf_netdev;
-	atomic_t vf_use_cnt;
 };
 
 static inline struct netvsc_device *
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 20e0917..410fb8e8 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -77,13 +77,9 @@ static struct netvsc_device *alloc_net_device(void)
 	init_waitqueue_head(&net_device->wait_drain);
 	net_device->destroy = false;
 	atomic_set(&net_device->open_cnt, 0);
-	atomic_set(&net_device->vf_use_cnt, 0);
 	net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
 	net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
 
-	net_device->vf_netdev = NULL;
-	net_device->vf_inject = false;
-
 	return net_device;
 }
 
@@ -1106,16 +1102,16 @@ static void netvsc_send_table(struct hv_device *hdev,
 		nvscdev->send_table[i] = tab[i];
 }
 
-static void netvsc_send_vf(struct netvsc_device *nvdev,
+static void netvsc_send_vf(struct net_device_context *net_device_ctx,
 			   struct nvsp_message *nvmsg)
 {
-	nvdev->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
-	nvdev->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
+	net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
+	net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
 }
 
 static inline void netvsc_receive_inband(struct hv_device *hdev,
-					 struct netvsc_device *nvdev,
-					 struct nvsp_message *nvmsg)
+				 struct net_device_context *net_device_ctx,
+				 struct nvsp_message *nvmsg)
 {
 	switch (nvmsg->hdr.msg_type) {
 	case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE:
@@ -1123,7 +1119,7 @@ static inline void netvsc_receive_inband(struct hv_device *hdev,
 		break;
 
 	case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION:
-		netvsc_send_vf(nvdev, nvmsg);
+		netvsc_send_vf(net_device_ctx, nvmsg);
 		break;
 	}
 }
@@ -1136,6 +1132,7 @@ static void netvsc_process_raw_pkt(struct hv_device *device,
 				   struct vmpacket_descriptor *desc)
 {
 	struct nvsp_message *nvmsg;
+	struct net_device_context *net_device_ctx = netdev_priv(ndev);
 
 	nvmsg = (struct nvsp_message *)((unsigned long)
 		desc + (desc->offset8 << 3));
@@ -1150,7 +1147,7 @@ static void netvsc_process_raw_pkt(struct hv_device *device,
 		break;
 
 	case VM_PKT_DATA_INBAND:
-		netvsc_receive_inband(device, net_device, nvmsg);
+		netvsc_receive_inband(device, net_device_ctx, nvmsg);
 		break;
 
 	default:
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 41bd952..794139b 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -658,20 +658,19 @@ int netvsc_recv_callback(struct hv_device *device_obj,
 	struct sk_buff *skb;
 	struct sk_buff *vf_skb;
 	struct netvsc_stats *rx_stats;
-	struct netvsc_device *netvsc_dev = net_device_ctx->nvdev;
 	u32 bytes_recvd = packet->total_data_buflen;
 	int ret = 0;
 
 	if (!net || net->reg_state != NETREG_REGISTERED)
 		return NVSP_STAT_FAIL;
 
-	if (READ_ONCE(netvsc_dev->vf_inject)) {
-		atomic_inc(&netvsc_dev->vf_use_cnt);
-		if (!READ_ONCE(netvsc_dev->vf_inject)) {
+	if (READ_ONCE(net_device_ctx->vf_inject)) {
+		atomic_inc(&net_device_ctx->vf_use_cnt);
+		if (!READ_ONCE(net_device_ctx->vf_inject)) {
 			/*
 			 * We raced; just move on.
 			 */
-			atomic_dec(&netvsc_dev->vf_use_cnt);
+			atomic_dec(&net_device_ctx->vf_use_cnt);
 			goto vf_injection_done;
 		}
 
@@ -683,17 +682,19 @@ int netvsc_recv_callback(struct hv_device *device_obj,
 		 * the host). Deliver these via the VF interface
 		 * in the guest.
 		 */
-		vf_skb = netvsc_alloc_recv_skb(netvsc_dev->vf_netdev, packet,
-					       csum_info, *data, vlan_tci);
+		vf_skb = netvsc_alloc_recv_skb(net_device_ctx->vf_netdev,
+					       packet, csum_info, *data,
+					       vlan_tci);
 		if (vf_skb != NULL) {
-			++netvsc_dev->vf_netdev->stats.rx_packets;
-			netvsc_dev->vf_netdev->stats.rx_bytes += bytes_recvd;
+			++net_device_ctx->vf_netdev->stats.rx_packets;
+			net_device_ctx->vf_netdev->stats.rx_bytes +=
+				bytes_recvd;
 			netif_receive_skb(vf_skb);
 		} else {
 			++net->stats.rx_dropped;
 			ret = NVSP_STAT_FAIL;
 		}
-		atomic_dec(&netvsc_dev->vf_use_cnt);
+		atomic_dec(&net_device_ctx->vf_use_cnt);
 		return ret;
 	}
 
@@ -1158,7 +1159,7 @@ static void netvsc_notify_peers(struct work_struct *wrk)
 
 	netdev_notify_peers(gwrk->netdev);
 
-	atomic_dec(&gwrk->netvsc_dev->vf_use_cnt);
+	atomic_dec(&gwrk->net_device_ctx->vf_use_cnt);
 }
 
 static struct net_device *get_netvsc_net_device(char *mac)
@@ -1211,7 +1212,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
 	 * Take a reference on the module.
 	 */
 	try_module_get(THIS_MODULE);
-	netvsc_dev->vf_netdev = vf_netdev;
+	net_device_ctx->vf_netdev = vf_netdev;
 	return NOTIFY_OK;
 }
 
@@ -1233,11 +1234,11 @@ static int netvsc_vf_up(struct net_device *vf_netdev)
 	net_device_ctx = netdev_priv(ndev);
 	netvsc_dev = net_device_ctx->nvdev;
 
-	if ((netvsc_dev == NULL) || (netvsc_dev->vf_netdev == NULL))
+	if (!netvsc_dev || !net_device_ctx->vf_netdev)
 		return NOTIFY_DONE;
 
 	netdev_info(ndev, "VF up: %s\n", vf_netdev->name);
-	netvsc_dev->vf_inject = true;
+	net_device_ctx->vf_inject = true;
 
 	/*
 	 * Open the device before switching data path.
@@ -1257,9 +1258,9 @@ static int netvsc_vf_up(struct net_device *vf_netdev)
 	 * notify peers; take a reference to prevent
 	 * the VF interface from vanishing.
 	 */
-	atomic_inc(&netvsc_dev->vf_use_cnt);
+	atomic_inc(&net_device_ctx->vf_use_cnt);
 	net_device_ctx->gwrk.netdev = vf_netdev;
-	net_device_ctx->gwrk.netvsc_dev = netvsc_dev;
+	net_device_ctx->gwrk.net_device_ctx = net_device_ctx;
 	schedule_work(&net_device_ctx->gwrk.dwrk);
 
 	return NOTIFY_OK;
@@ -1283,17 +1284,17 @@ static int netvsc_vf_down(struct net_device *vf_netdev)
 	net_device_ctx = netdev_priv(ndev);
 	netvsc_dev = net_device_ctx->nvdev;
 
-	if ((netvsc_dev == NULL) || (netvsc_dev->vf_netdev == NULL))
+	if (!netvsc_dev || !net_device_ctx->vf_netdev)
 		return NOTIFY_DONE;
 
 	netdev_info(ndev, "VF down: %s\n", vf_netdev->name);
-	netvsc_dev->vf_inject = false;
+	net_device_ctx->vf_inject = false;
 	/*
 	 * Wait for currently active users to
 	 * drain out.
 	 */
 
-	while (atomic_read(&netvsc_dev->vf_use_cnt) != 0)
+	while (atomic_read(&net_device_ctx->vf_use_cnt) != 0)
 		udelay(50);
 	netvsc_switch_datapath(ndev, false);
 	netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name);
@@ -1302,9 +1303,9 @@ static int netvsc_vf_down(struct net_device *vf_netdev)
 	/*
 	 * Notify peers.
 	 */
-	atomic_inc(&netvsc_dev->vf_use_cnt);
+	atomic_inc(&net_device_ctx->vf_use_cnt);
 	net_device_ctx->gwrk.netdev = ndev;
-	net_device_ctx->gwrk.netvsc_dev = netvsc_dev;
+	net_device_ctx->gwrk.net_device_ctx = net_device_ctx;
 	schedule_work(&net_device_ctx->gwrk.dwrk);
 
 	return NOTIFY_OK;
@@ -1331,7 +1332,7 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
 		return NOTIFY_DONE;
 	netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
 
-	netvsc_dev->vf_netdev = NULL;
+	net_device_ctx->vf_netdev = NULL;
 	module_put(THIS_MODULE);
 	return NOTIFY_OK;
 }
@@ -1382,6 +1383,10 @@ static int netvsc_probe(struct hv_device *dev,
 	spin_lock_init(&net_device_ctx->lock);
 	INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
 
+	atomic_set(&net_device_ctx->vf_use_cnt, 0);
+	net_device_ctx->vf_netdev = NULL;
+	net_device_ctx->vf_inject = false;
+
 	net->netdev_ops = &device_ops;
 
 	net->hw_features = NETVSC_HW_FEATURES;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH net 1/4] hv_netvsc: don't lose VF information
@ 2016-08-11 10:58   ` Vitaly Kuznetsov
  0 siblings, 0 replies; 31+ messages in thread
From: Vitaly Kuznetsov @ 2016-08-11 10:58 UTC (permalink / raw)
  To: netdev; +Cc: devel, Haiyang Zhang, linux-kernel

struct netvsc_device is not suitable for storing VF information as this
structure is being destroyed on MTU change / set channel operation (see
rndis_filter_device_remove()). Move all VF related stuff to struct
net_device_context which is persistent.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 drivers/net/hyperv/hyperv_net.h | 19 ++++++++--------
 drivers/net/hyperv/netvsc.c     | 19 +++++++---------
 drivers/net/hyperv/netvsc_drv.c | 49 +++++++++++++++++++++++------------------
 3 files changed, 45 insertions(+), 42 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 467fb8b..3b3ecf2 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -647,7 +647,7 @@ struct netvsc_reconfig {
 struct garp_wrk {
 	struct work_struct dwrk;
 	struct net_device *netdev;
-	struct netvsc_device *netvsc_dev;
+	struct net_device_context *net_device_ctx;
 };
 
 /* The context of the netvsc device  */
@@ -678,6 +678,15 @@ struct net_device_context {
 
 	/* the device is going away */
 	bool start_remove;
+
+	/* State to manage the associated VF interface. */
+	struct net_device *vf_netdev;
+	bool vf_inject;
+	atomic_t vf_use_cnt;
+	/* 1: allocated, serial number is valid. 0: not allocated */
+	u32 vf_alloc;
+	/* Serial number of the VF to team with */
+	u32 vf_serial;
 };
 
 /* Per netvsc device */
@@ -733,15 +742,7 @@ struct netvsc_device {
 	u32 max_pkt; /* max number of pkt in one send, e.g. 8 */
 	u32 pkt_align; /* alignment bytes, e.g. 8 */
 
-	/* 1: allocated, serial number is valid. 0: not allocated */
-	u32 vf_alloc;
-	/* Serial number of the VF to team with */
-	u32 vf_serial;
 	atomic_t open_cnt;
-	/* State to manage the associated VF interface. */
-	bool vf_inject;
-	struct net_device *vf_netdev;
-	atomic_t vf_use_cnt;
 };
 
 static inline struct netvsc_device *
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 20e0917..410fb8e8 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -77,13 +77,9 @@ static struct netvsc_device *alloc_net_device(void)
 	init_waitqueue_head(&net_device->wait_drain);
 	net_device->destroy = false;
 	atomic_set(&net_device->open_cnt, 0);
-	atomic_set(&net_device->vf_use_cnt, 0);
 	net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
 	net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
 
-	net_device->vf_netdev = NULL;
-	net_device->vf_inject = false;
-
 	return net_device;
 }
 
@@ -1106,16 +1102,16 @@ static void netvsc_send_table(struct hv_device *hdev,
 		nvscdev->send_table[i] = tab[i];
 }
 
-static void netvsc_send_vf(struct netvsc_device *nvdev,
+static void netvsc_send_vf(struct net_device_context *net_device_ctx,
 			   struct nvsp_message *nvmsg)
 {
-	nvdev->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
-	nvdev->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
+	net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
+	net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
 }
 
 static inline void netvsc_receive_inband(struct hv_device *hdev,
-					 struct netvsc_device *nvdev,
-					 struct nvsp_message *nvmsg)
+				 struct net_device_context *net_device_ctx,
+				 struct nvsp_message *nvmsg)
 {
 	switch (nvmsg->hdr.msg_type) {
 	case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE:
@@ -1123,7 +1119,7 @@ static inline void netvsc_receive_inband(struct hv_device *hdev,
 		break;
 
 	case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION:
-		netvsc_send_vf(nvdev, nvmsg);
+		netvsc_send_vf(net_device_ctx, nvmsg);
 		break;
 	}
 }
@@ -1136,6 +1132,7 @@ static void netvsc_process_raw_pkt(struct hv_device *device,
 				   struct vmpacket_descriptor *desc)
 {
 	struct nvsp_message *nvmsg;
+	struct net_device_context *net_device_ctx = netdev_priv(ndev);
 
 	nvmsg = (struct nvsp_message *)((unsigned long)
 		desc + (desc->offset8 << 3));
@@ -1150,7 +1147,7 @@ static void netvsc_process_raw_pkt(struct hv_device *device,
 		break;
 
 	case VM_PKT_DATA_INBAND:
-		netvsc_receive_inband(device, net_device, nvmsg);
+		netvsc_receive_inband(device, net_device_ctx, nvmsg);
 		break;
 
 	default:
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 41bd952..794139b 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -658,20 +658,19 @@ int netvsc_recv_callback(struct hv_device *device_obj,
 	struct sk_buff *skb;
 	struct sk_buff *vf_skb;
 	struct netvsc_stats *rx_stats;
-	struct netvsc_device *netvsc_dev = net_device_ctx->nvdev;
 	u32 bytes_recvd = packet->total_data_buflen;
 	int ret = 0;
 
 	if (!net || net->reg_state != NETREG_REGISTERED)
 		return NVSP_STAT_FAIL;
 
-	if (READ_ONCE(netvsc_dev->vf_inject)) {
-		atomic_inc(&netvsc_dev->vf_use_cnt);
-		if (!READ_ONCE(netvsc_dev->vf_inject)) {
+	if (READ_ONCE(net_device_ctx->vf_inject)) {
+		atomic_inc(&net_device_ctx->vf_use_cnt);
+		if (!READ_ONCE(net_device_ctx->vf_inject)) {
 			/*
 			 * We raced; just move on.
 			 */
-			atomic_dec(&netvsc_dev->vf_use_cnt);
+			atomic_dec(&net_device_ctx->vf_use_cnt);
 			goto vf_injection_done;
 		}
 
@@ -683,17 +682,19 @@ int netvsc_recv_callback(struct hv_device *device_obj,
 		 * the host). Deliver these via the VF interface
 		 * in the guest.
 		 */
-		vf_skb = netvsc_alloc_recv_skb(netvsc_dev->vf_netdev, packet,
-					       csum_info, *data, vlan_tci);
+		vf_skb = netvsc_alloc_recv_skb(net_device_ctx->vf_netdev,
+					       packet, csum_info, *data,
+					       vlan_tci);
 		if (vf_skb != NULL) {
-			++netvsc_dev->vf_netdev->stats.rx_packets;
-			netvsc_dev->vf_netdev->stats.rx_bytes += bytes_recvd;
+			++net_device_ctx->vf_netdev->stats.rx_packets;
+			net_device_ctx->vf_netdev->stats.rx_bytes +=
+				bytes_recvd;
 			netif_receive_skb(vf_skb);
 		} else {
 			++net->stats.rx_dropped;
 			ret = NVSP_STAT_FAIL;
 		}
-		atomic_dec(&netvsc_dev->vf_use_cnt);
+		atomic_dec(&net_device_ctx->vf_use_cnt);
 		return ret;
 	}
 
@@ -1158,7 +1159,7 @@ static void netvsc_notify_peers(struct work_struct *wrk)
 
 	netdev_notify_peers(gwrk->netdev);
 
-	atomic_dec(&gwrk->netvsc_dev->vf_use_cnt);
+	atomic_dec(&gwrk->net_device_ctx->vf_use_cnt);
 }
 
 static struct net_device *get_netvsc_net_device(char *mac)
@@ -1211,7 +1212,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
 	 * Take a reference on the module.
 	 */
 	try_module_get(THIS_MODULE);
-	netvsc_dev->vf_netdev = vf_netdev;
+	net_device_ctx->vf_netdev = vf_netdev;
 	return NOTIFY_OK;
 }
 
@@ -1233,11 +1234,11 @@ static int netvsc_vf_up(struct net_device *vf_netdev)
 	net_device_ctx = netdev_priv(ndev);
 	netvsc_dev = net_device_ctx->nvdev;
 
-	if ((netvsc_dev == NULL) || (netvsc_dev->vf_netdev == NULL))
+	if (!netvsc_dev || !net_device_ctx->vf_netdev)
 		return NOTIFY_DONE;
 
 	netdev_info(ndev, "VF up: %s\n", vf_netdev->name);
-	netvsc_dev->vf_inject = true;
+	net_device_ctx->vf_inject = true;
 
 	/*
 	 * Open the device before switching data path.
@@ -1257,9 +1258,9 @@ static int netvsc_vf_up(struct net_device *vf_netdev)
 	 * notify peers; take a reference to prevent
 	 * the VF interface from vanishing.
 	 */
-	atomic_inc(&netvsc_dev->vf_use_cnt);
+	atomic_inc(&net_device_ctx->vf_use_cnt);
 	net_device_ctx->gwrk.netdev = vf_netdev;
-	net_device_ctx->gwrk.netvsc_dev = netvsc_dev;
+	net_device_ctx->gwrk.net_device_ctx = net_device_ctx;
 	schedule_work(&net_device_ctx->gwrk.dwrk);
 
 	return NOTIFY_OK;
@@ -1283,17 +1284,17 @@ static int netvsc_vf_down(struct net_device *vf_netdev)
 	net_device_ctx = netdev_priv(ndev);
 	netvsc_dev = net_device_ctx->nvdev;
 
-	if ((netvsc_dev == NULL) || (netvsc_dev->vf_netdev == NULL))
+	if (!netvsc_dev || !net_device_ctx->vf_netdev)
 		return NOTIFY_DONE;
 
 	netdev_info(ndev, "VF down: %s\n", vf_netdev->name);
-	netvsc_dev->vf_inject = false;
+	net_device_ctx->vf_inject = false;
 	/*
 	 * Wait for currently active users to
 	 * drain out.
 	 */
 
-	while (atomic_read(&netvsc_dev->vf_use_cnt) != 0)
+	while (atomic_read(&net_device_ctx->vf_use_cnt) != 0)
 		udelay(50);
 	netvsc_switch_datapath(ndev, false);
 	netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name);
@@ -1302,9 +1303,9 @@ static int netvsc_vf_down(struct net_device *vf_netdev)
 	/*
 	 * Notify peers.
 	 */
-	atomic_inc(&netvsc_dev->vf_use_cnt);
+	atomic_inc(&net_device_ctx->vf_use_cnt);
 	net_device_ctx->gwrk.netdev = ndev;
-	net_device_ctx->gwrk.netvsc_dev = netvsc_dev;
+	net_device_ctx->gwrk.net_device_ctx = net_device_ctx;
 	schedule_work(&net_device_ctx->gwrk.dwrk);
 
 	return NOTIFY_OK;
@@ -1331,7 +1332,7 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
 		return NOTIFY_DONE;
 	netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
 
-	netvsc_dev->vf_netdev = NULL;
+	net_device_ctx->vf_netdev = NULL;
 	module_put(THIS_MODULE);
 	return NOTIFY_OK;
 }
@@ -1382,6 +1383,10 @@ static int netvsc_probe(struct hv_device *dev,
 	spin_lock_init(&net_device_ctx->lock);
 	INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
 
+	atomic_set(&net_device_ctx->vf_use_cnt, 0);
+	net_device_ctx->vf_netdev = NULL;
+	net_device_ctx->vf_inject = false;
+
 	net->netdev_ops = &device_ops;
 
 	net->hw_features = NETVSC_HW_FEATURES;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH net 2/4] hv_netvsc: reset vf_inject on VF removal
  2016-08-11 10:58 ` Vitaly Kuznetsov
  (?)
  (?)
@ 2016-08-11 10:58 ` Vitaly Kuznetsov
  2016-08-11 11:46   ` Yuval Mintz
                     ` (3 more replies)
  -1 siblings, 4 replies; 31+ messages in thread
From: Vitaly Kuznetsov @ 2016-08-11 10:58 UTC (permalink / raw)
  To: netdev; +Cc: devel, linux-kernel, Haiyang Zhang, K. Y. Srinivasan

We reset vf_inject on VF going down (netvsc_vf_down()) but we don't on
VF removal (netvsc_unregister_vf()) so vf_inject stays 'true' while
vf_netdev is already NULL and we're trying to inject packets into NULL
net device in netvsc_recv_callback() causing kernel to crash.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 drivers/net/hyperv/netvsc_drv.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 794139b..b3c31e3 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1216,6 +1216,19 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
 	return NOTIFY_OK;
 }
 
+static void netvsc_inject_enable(struct net_device_context *net_device_ctx)
+{
+	net_device_ctx->vf_inject = true;
+}
+
+static void netvsc_inject_disable(struct net_device_context *net_device_ctx)
+{
+	net_device_ctx->vf_inject = false;
+
+	/* Wait for currently active users to drain out. */
+	while (atomic_read(&net_device_ctx->vf_use_cnt) != 0)
+		udelay(50);
+}
 
 static int netvsc_vf_up(struct net_device *vf_netdev)
 {
@@ -1238,7 +1251,7 @@ static int netvsc_vf_up(struct net_device *vf_netdev)
 		return NOTIFY_DONE;
 
 	netdev_info(ndev, "VF up: %s\n", vf_netdev->name);
-	net_device_ctx->vf_inject = true;
+	netvsc_inject_enable(net_device_ctx);
 
 	/*
 	 * Open the device before switching data path.
@@ -1288,14 +1301,7 @@ static int netvsc_vf_down(struct net_device *vf_netdev)
 		return NOTIFY_DONE;
 
 	netdev_info(ndev, "VF down: %s\n", vf_netdev->name);
-	net_device_ctx->vf_inject = false;
-	/*
-	 * Wait for currently active users to
-	 * drain out.
-	 */
-
-	while (atomic_read(&net_device_ctx->vf_use_cnt) != 0)
-		udelay(50);
+	netvsc_inject_disable(net_device_ctx);
 	netvsc_switch_datapath(ndev, false);
 	netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name);
 	rndis_filter_close(netvsc_dev);
@@ -1331,7 +1337,7 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
 	if (netvsc_dev == NULL)
 		return NOTIFY_DONE;
 	netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
-
+	netvsc_inject_disable(net_device_ctx);
 	net_device_ctx->vf_netdev = NULL;
 	module_put(THIS_MODULE);
 	return NOTIFY_OK;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH net 3/4] hv_netvsc: protect module refcount by checking net_device_ctx->vf_netdev
  2016-08-11 10:58 ` Vitaly Kuznetsov
@ 2016-08-11 10:58   ` Vitaly Kuznetsov
  -1 siblings, 0 replies; 31+ messages in thread
From: Vitaly Kuznetsov @ 2016-08-11 10:58 UTC (permalink / raw)
  To: netdev; +Cc: devel, linux-kernel, Haiyang Zhang, K. Y. Srinivasan

We're not guaranteed to see NETDEV_REGISTER/NETDEV_UNREGISTER notifications
only once per VF but we increase/decrease module refcount unconditionally.
Check vf_netdev to make sure we don't take/release it twice. We presume
that only one VF per netvsc device may exist.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 drivers/net/hyperv/netvsc_drv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index b3c31e3..874829a 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1204,7 +1204,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
 
 	net_device_ctx = netdev_priv(ndev);
 	netvsc_dev = net_device_ctx->nvdev;
-	if (netvsc_dev == NULL)
+	if (!netvsc_dev || net_device_ctx->vf_netdev)
 		return NOTIFY_DONE;
 
 	netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);
@@ -1334,7 +1334,7 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
 
 	net_device_ctx = netdev_priv(ndev);
 	netvsc_dev = net_device_ctx->nvdev;
-	if (netvsc_dev == NULL)
+	if (!netvsc_dev || !net_device_ctx->vf_netdev)
 		return NOTIFY_DONE;
 	netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
 	netvsc_inject_disable(net_device_ctx);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH net 3/4] hv_netvsc: protect module refcount by checking net_device_ctx->vf_netdev
@ 2016-08-11 10:58   ` Vitaly Kuznetsov
  0 siblings, 0 replies; 31+ messages in thread
From: Vitaly Kuznetsov @ 2016-08-11 10:58 UTC (permalink / raw)
  To: netdev; +Cc: devel, Haiyang Zhang, linux-kernel

We're not guaranteed to see NETDEV_REGISTER/NETDEV_UNREGISTER notifications
only once per VF but we increase/decrease module refcount unconditionally.
Check vf_netdev to make sure we don't take/release it twice. We presume
that only one VF per netvsc device may exist.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 drivers/net/hyperv/netvsc_drv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index b3c31e3..874829a 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1204,7 +1204,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
 
 	net_device_ctx = netdev_priv(ndev);
 	netvsc_dev = net_device_ctx->nvdev;
-	if (netvsc_dev == NULL)
+	if (!netvsc_dev || net_device_ctx->vf_netdev)
 		return NOTIFY_DONE;
 
 	netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);
@@ -1334,7 +1334,7 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
 
 	net_device_ctx = netdev_priv(ndev);
 	netvsc_dev = net_device_ctx->nvdev;
-	if (netvsc_dev == NULL)
+	if (!netvsc_dev || !net_device_ctx->vf_netdev)
 		return NOTIFY_DONE;
 	netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
 	netvsc_inject_disable(net_device_ctx);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH net 4/4] hv_netvsc: avoid deadlocks between rtnl lock and netvsc_inject_disable()
  2016-08-11 10:58 ` Vitaly Kuznetsov
@ 2016-08-11 10:58   ` Vitaly Kuznetsov
  -1 siblings, 0 replies; 31+ messages in thread
From: Vitaly Kuznetsov @ 2016-08-11 10:58 UTC (permalink / raw)
  To: netdev; +Cc: devel, linux-kernel, Haiyang Zhang, K. Y. Srinivasan

Here is a deadlock scenario:
- netvsc_vf_up() schedules netvsc_notify_peers() work and quits.
- netvsc_vf_down() runs before netvsc_notify_peers() gets executed. As it
  is being executed from netdev notifier chain we hold rtnl lock when we
  get here.
- we enter netvsc_inject_disable() and loop and wait till
  netvsc_notify_peers() drops vf_use_cnt.
- netvsc_notify_peers() starts on some other CPU but netdev_notify_peers()
  will hang on rtnl_lock().
- deadlock!

Similar deadlocks are possible between netvsc_vf_{up,down}() and
netvsc_unregister_vf() as it also waits till vf_use_cnt drops to zero.
Instead of introducing additional synchronization I suggest we drop
gwrk.dwrk completely and call NETDEV_NOTIFY_PEERS directly. As we're
acting under rtnl lock this is legitimate.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 drivers/net/hyperv/hyperv_net.h |  7 -------
 drivers/net/hyperv/netvsc_drv.c | 33 +++++----------------------------
 2 files changed, 5 insertions(+), 35 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 3b3ecf2..591af71 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -644,12 +644,6 @@ struct netvsc_reconfig {
 	u32 event;
 };
 
-struct garp_wrk {
-	struct work_struct dwrk;
-	struct net_device *netdev;
-	struct net_device_context *net_device_ctx;
-};
-
 /* The context of the netvsc device  */
 struct net_device_context {
 	/* point back to our device context */
@@ -667,7 +661,6 @@ struct net_device_context {
 
 	struct work_struct work;
 	u32 msg_enable; /* debug level */
-	struct garp_wrk gwrk;
 
 	struct netvsc_stats __percpu *tx_stats;
 	struct netvsc_stats __percpu *rx_stats;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 874829a..62a4e6e 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1151,17 +1151,6 @@ static void netvsc_free_netdev(struct net_device *netdev)
 	free_netdev(netdev);
 }
 
-static void netvsc_notify_peers(struct work_struct *wrk)
-{
-	struct garp_wrk *gwrk;
-
-	gwrk = container_of(wrk, struct garp_wrk, dwrk);
-
-	netdev_notify_peers(gwrk->netdev);
-
-	atomic_dec(&gwrk->net_device_ctx->vf_use_cnt);
-}
-
 static struct net_device *get_netvsc_net_device(char *mac)
 {
 	struct net_device *dev, *found = NULL;
@@ -1266,15 +1255,8 @@ static int netvsc_vf_up(struct net_device *vf_netdev)
 
 	netif_carrier_off(ndev);
 
-	/*
-	 * Now notify peers. We are scheduling work to
-	 * notify peers; take a reference to prevent
-	 * the VF interface from vanishing.
-	 */
-	atomic_inc(&net_device_ctx->vf_use_cnt);
-	net_device_ctx->gwrk.netdev = vf_netdev;
-	net_device_ctx->gwrk.net_device_ctx = net_device_ctx;
-	schedule_work(&net_device_ctx->gwrk.dwrk);
+	/* Now notify peers through VF device. */
+	call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, vf_netdev);
 
 	return NOTIFY_OK;
 }
@@ -1306,13 +1288,9 @@ static int netvsc_vf_down(struct net_device *vf_netdev)
 	netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name);
 	rndis_filter_close(netvsc_dev);
 	netif_carrier_on(ndev);
-	/*
-	 * Notify peers.
-	 */
-	atomic_inc(&net_device_ctx->vf_use_cnt);
-	net_device_ctx->gwrk.netdev = ndev;
-	net_device_ctx->gwrk.net_device_ctx = net_device_ctx;
-	schedule_work(&net_device_ctx->gwrk.dwrk);
+
+	/* Now notify peers through netvsc device. */
+	call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, ndev);
 
 	return NOTIFY_OK;
 }
@@ -1384,7 +1362,6 @@ static int netvsc_probe(struct hv_device *dev,
 
 	INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change);
 	INIT_WORK(&net_device_ctx->work, do_set_multicast);
-	INIT_WORK(&net_device_ctx->gwrk.dwrk, netvsc_notify_peers);
 
 	spin_lock_init(&net_device_ctx->lock);
 	INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH net 4/4] hv_netvsc: avoid deadlocks between rtnl lock and netvsc_inject_disable()
@ 2016-08-11 10:58   ` Vitaly Kuznetsov
  0 siblings, 0 replies; 31+ messages in thread
From: Vitaly Kuznetsov @ 2016-08-11 10:58 UTC (permalink / raw)
  To: netdev; +Cc: devel, Haiyang Zhang, linux-kernel

Here is a deadlock scenario:
- netvsc_vf_up() schedules netvsc_notify_peers() work and quits.
- netvsc_vf_down() runs before netvsc_notify_peers() gets executed. As it
  is being executed from netdev notifier chain we hold rtnl lock when we
  get here.
- we enter netvsc_inject_disable() and loop and wait till
  netvsc_notify_peers() drops vf_use_cnt.
- netvsc_notify_peers() starts on some other CPU but netdev_notify_peers()
  will hang on rtnl_lock().
- deadlock!

Similar deadlocks are possible between netvsc_vf_{up,down}() and
netvsc_unregister_vf() as it also waits till vf_use_cnt drops to zero.
Instead of introducing additional synchronization I suggest we drop
gwrk.dwrk completely and call NETDEV_NOTIFY_PEERS directly. As we're
acting under rtnl lock this is legitimate.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 drivers/net/hyperv/hyperv_net.h |  7 -------
 drivers/net/hyperv/netvsc_drv.c | 33 +++++----------------------------
 2 files changed, 5 insertions(+), 35 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 3b3ecf2..591af71 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -644,12 +644,6 @@ struct netvsc_reconfig {
 	u32 event;
 };
 
-struct garp_wrk {
-	struct work_struct dwrk;
-	struct net_device *netdev;
-	struct net_device_context *net_device_ctx;
-};
-
 /* The context of the netvsc device  */
 struct net_device_context {
 	/* point back to our device context */
@@ -667,7 +661,6 @@ struct net_device_context {
 
 	struct work_struct work;
 	u32 msg_enable; /* debug level */
-	struct garp_wrk gwrk;
 
 	struct netvsc_stats __percpu *tx_stats;
 	struct netvsc_stats __percpu *rx_stats;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 874829a..62a4e6e 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1151,17 +1151,6 @@ static void netvsc_free_netdev(struct net_device *netdev)
 	free_netdev(netdev);
 }
 
-static void netvsc_notify_peers(struct work_struct *wrk)
-{
-	struct garp_wrk *gwrk;
-
-	gwrk = container_of(wrk, struct garp_wrk, dwrk);
-
-	netdev_notify_peers(gwrk->netdev);
-
-	atomic_dec(&gwrk->net_device_ctx->vf_use_cnt);
-}
-
 static struct net_device *get_netvsc_net_device(char *mac)
 {
 	struct net_device *dev, *found = NULL;
@@ -1266,15 +1255,8 @@ static int netvsc_vf_up(struct net_device *vf_netdev)
 
 	netif_carrier_off(ndev);
 
-	/*
-	 * Now notify peers. We are scheduling work to
-	 * notify peers; take a reference to prevent
-	 * the VF interface from vanishing.
-	 */
-	atomic_inc(&net_device_ctx->vf_use_cnt);
-	net_device_ctx->gwrk.netdev = vf_netdev;
-	net_device_ctx->gwrk.net_device_ctx = net_device_ctx;
-	schedule_work(&net_device_ctx->gwrk.dwrk);
+	/* Now notify peers through VF device. */
+	call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, vf_netdev);
 
 	return NOTIFY_OK;
 }
@@ -1306,13 +1288,9 @@ static int netvsc_vf_down(struct net_device *vf_netdev)
 	netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name);
 	rndis_filter_close(netvsc_dev);
 	netif_carrier_on(ndev);
-	/*
-	 * Notify peers.
-	 */
-	atomic_inc(&net_device_ctx->vf_use_cnt);
-	net_device_ctx->gwrk.netdev = ndev;
-	net_device_ctx->gwrk.net_device_ctx = net_device_ctx;
-	schedule_work(&net_device_ctx->gwrk.dwrk);
+
+	/* Now notify peers through netvsc device. */
+	call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, ndev);
 
 	return NOTIFY_OK;
 }
@@ -1384,7 +1362,6 @@ static int netvsc_probe(struct hv_device *dev,
 
 	INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change);
 	INIT_WORK(&net_device_ctx->work, do_set_multicast);
-	INIT_WORK(&net_device_ctx->gwrk.dwrk, netvsc_notify_peers);
 
 	spin_lock_init(&net_device_ctx->lock);
 	INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* RE: [PATCH net 2/4] hv_netvsc: reset vf_inject on VF removal
  2016-08-11 10:58 ` [PATCH net 2/4] hv_netvsc: reset vf_inject on VF removal Vitaly Kuznetsov
@ 2016-08-11 11:46   ` Yuval Mintz
  2016-08-11 12:09       ` Vitaly Kuznetsov
  2016-08-11 15:38     ` Haiyang Zhang
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 31+ messages in thread
From: Yuval Mintz @ 2016-08-11 11:46 UTC (permalink / raw)
  To: Vitaly Kuznetsov, netdev
  Cc: devel, linux-kernel, Haiyang Zhang, K. Y. Srinivasan

> +static void netvsc_inject_enable(struct net_device_context
> +*net_device_ctx) {
> +	net_device_ctx->vf_inject = true;
> +}
> +
> +static void netvsc_inject_disable(struct net_device_context
> +*net_device_ctx) {
> +	net_device_ctx->vf_inject = false;
> +
> +	/* Wait for currently active users to drain out. */
> +	while (atomic_read(&net_device_ctx->vf_use_cnt) != 0)
> +		udelay(50);
> +}

That was already the behavior before, but are you certain you
want to unconditionally block without any possible timeout?

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH net 2/4] hv_netvsc: reset vf_inject on VF removal
  2016-08-11 11:46   ` Yuval Mintz
@ 2016-08-11 12:09       ` Vitaly Kuznetsov
  0 siblings, 0 replies; 31+ messages in thread
From: Vitaly Kuznetsov @ 2016-08-11 12:09 UTC (permalink / raw)
  To: Yuval Mintz; +Cc: netdev, devel, linux-kernel, Haiyang Zhang, K. Y. Srinivasan

Yuval Mintz <Yuval.Mintz@qlogic.com> writes:

>> +static void netvsc_inject_enable(struct net_device_context
>> +*net_device_ctx) {
>> +	net_device_ctx->vf_inject = true;
>> +}
>> +
>> +static void netvsc_inject_disable(struct net_device_context
>> +*net_device_ctx) {
>> +	net_device_ctx->vf_inject = false;
>> +
>> +	/* Wait for currently active users to drain out. */
>> +	while (atomic_read(&net_device_ctx->vf_use_cnt) != 0)
>> +		udelay(50);
>> +}
>
> That was already the behavior before, but are you certain you
> want to unconditionally block without any possible timeout?

Yes, this is OK. After PATCH4 of this series there is only one place
which takes the vf_use_cnt (netvsc_recv_callback()) and it is an
interrupt handler, there are no sleepable operations there.

-- 
  Vitaly

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH net 2/4] hv_netvsc: reset vf_inject on VF removal
@ 2016-08-11 12:09       ` Vitaly Kuznetsov
  0 siblings, 0 replies; 31+ messages in thread
From: Vitaly Kuznetsov @ 2016-08-11 12:09 UTC (permalink / raw)
  To: Yuval Mintz; +Cc: netdev, Haiyang Zhang, linux-kernel, devel

Yuval Mintz <Yuval.Mintz@qlogic.com> writes:

>> +static void netvsc_inject_enable(struct net_device_context
>> +*net_device_ctx) {
>> +	net_device_ctx->vf_inject = true;
>> +}
>> +
>> +static void netvsc_inject_disable(struct net_device_context
>> +*net_device_ctx) {
>> +	net_device_ctx->vf_inject = false;
>> +
>> +	/* Wait for currently active users to drain out. */
>> +	while (atomic_read(&net_device_ctx->vf_use_cnt) != 0)
>> +		udelay(50);
>> +}
>
> That was already the behavior before, but are you certain you
> want to unconditionally block without any possible timeout?

Yes, this is OK. After PATCH4 of this series there is only one place
which takes the vf_use_cnt (netvsc_recv_callback()) and it is an
interrupt handler, there are no sleepable operations there.

-- 
  Vitaly

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH net 1/4] hv_netvsc: don't lose VF information
  2016-08-11 10:58   ` Vitaly Kuznetsov
@ 2016-08-11 15:38     ` Haiyang Zhang
  -1 siblings, 0 replies; 31+ messages in thread
From: Haiyang Zhang @ 2016-08-11 15:38 UTC (permalink / raw)
  To: Vitaly Kuznetsov, netdev; +Cc: devel, linux-kernel, KY Srinivasan



> -----Original Message-----
> From: Vitaly Kuznetsov [mailto:vkuznets@redhat.com]
> Sent: Thursday, August 11, 2016 6:59 AM
> To: netdev@vger.kernel.org
> Cc: devel@linuxdriverproject.org; linux-kernel@vger.kernel.org; Haiyang
> Zhang <haiyangz@microsoft.com>; KY Srinivasan <kys@microsoft.com>
> Subject: [PATCH net 1/4] hv_netvsc: don't lose VF information
> 
> struct netvsc_device is not suitable for storing VF information as this
> structure is being destroyed on MTU change / set channel operation (see
> rndis_filter_device_remove()). Move all VF related stuff to struct
> net_device_context which is persistent.
> 
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>

Acked-by: Haiyang Zhang <haiyangz@microsoft.com>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH net 1/4] hv_netvsc: don't lose VF information
@ 2016-08-11 15:38     ` Haiyang Zhang
  0 siblings, 0 replies; 31+ messages in thread
From: Haiyang Zhang @ 2016-08-11 15:38 UTC (permalink / raw)
  To: Vitaly Kuznetsov, netdev; +Cc: devel, linux-kernel



> -----Original Message-----
> From: Vitaly Kuznetsov [mailto:vkuznets@redhat.com]
> Sent: Thursday, August 11, 2016 6:59 AM
> To: netdev@vger.kernel.org
> Cc: devel@linuxdriverproject.org; linux-kernel@vger.kernel.org; Haiyang
> Zhang <haiyangz@microsoft.com>; KY Srinivasan <kys@microsoft.com>
> Subject: [PATCH net 1/4] hv_netvsc: don't lose VF information
> 
> struct netvsc_device is not suitable for storing VF information as this
> structure is being destroyed on MTU change / set channel operation (see
> rndis_filter_device_remove()). Move all VF related stuff to struct
> net_device_context which is persistent.
> 
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>

Acked-by: Haiyang Zhang <haiyangz@microsoft.com>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH net 3/4] hv_netvsc: protect module refcount by checking net_device_ctx->vf_netdev
  2016-08-11 10:58   ` Vitaly Kuznetsov
@ 2016-08-11 15:38     ` Haiyang Zhang
  -1 siblings, 0 replies; 31+ messages in thread
From: Haiyang Zhang @ 2016-08-11 15:38 UTC (permalink / raw)
  To: Vitaly Kuznetsov, netdev; +Cc: devel, linux-kernel, KY Srinivasan



> -----Original Message-----
> From: Vitaly Kuznetsov [mailto:vkuznets@redhat.com]
> Sent: Thursday, August 11, 2016 6:59 AM
> To: netdev@vger.kernel.org
> Cc: devel@linuxdriverproject.org; linux-kernel@vger.kernel.org; Haiyang
> Zhang <haiyangz@microsoft.com>; KY Srinivasan <kys@microsoft.com>
> Subject: [PATCH net 3/4] hv_netvsc: protect module refcount by checking
> net_device_ctx->vf_netdev
> 
> We're not guaranteed to see NETDEV_REGISTER/NETDEV_UNREGISTER
> notifications
> only once per VF but we increase/decrease module refcount unconditionally.
> Check vf_netdev to make sure we don't take/release it twice. We presume
> that only one VF per netvsc device may exist.
> 
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>

Acked-by: Haiyang Zhang <haiyangz@microsoft.com>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH net 2/4] hv_netvsc: reset vf_inject on VF removal
  2016-08-11 10:58 ` [PATCH net 2/4] hv_netvsc: reset vf_inject on VF removal Vitaly Kuznetsov
@ 2016-08-11 15:38     ` Haiyang Zhang
  2016-08-11 15:38     ` Haiyang Zhang
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 31+ messages in thread
From: Haiyang Zhang @ 2016-08-11 15:38 UTC (permalink / raw)
  To: Vitaly Kuznetsov, netdev; +Cc: devel, linux-kernel, KY Srinivasan



> -----Original Message-----
> From: Vitaly Kuznetsov [mailto:vkuznets@redhat.com]
> Sent: Thursday, August 11, 2016 6:59 AM
> To: netdev@vger.kernel.org
> Cc: devel@linuxdriverproject.org; linux-kernel@vger.kernel.org; Haiyang
> Zhang <haiyangz@microsoft.com>; KY Srinivasan <kys@microsoft.com>
> Subject: [PATCH net 2/4] hv_netvsc: reset vf_inject on VF removal
> 
> We reset vf_inject on VF going down (netvsc_vf_down()) but we don't on
> VF removal (netvsc_unregister_vf()) so vf_inject stays 'true' while
> vf_netdev is already NULL and we're trying to inject packets into NULL
> net device in netvsc_recv_callback() causing kernel to crash.
> 
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>

Acked-by: Haiyang Zhang <haiyangz@microsoft.com>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH net 2/4] hv_netvsc: reset vf_inject on VF removal
@ 2016-08-11 15:38     ` Haiyang Zhang
  0 siblings, 0 replies; 31+ messages in thread
From: Haiyang Zhang @ 2016-08-11 15:38 UTC (permalink / raw)
  To: Vitaly Kuznetsov, netdev; +Cc: devel, linux-kernel



> -----Original Message-----
> From: Vitaly Kuznetsov [mailto:vkuznets@redhat.com]
> Sent: Thursday, August 11, 2016 6:59 AM
> To: netdev@vger.kernel.org
> Cc: devel@linuxdriverproject.org; linux-kernel@vger.kernel.org; Haiyang
> Zhang <haiyangz@microsoft.com>; KY Srinivasan <kys@microsoft.com>
> Subject: [PATCH net 2/4] hv_netvsc: reset vf_inject on VF removal
> 
> We reset vf_inject on VF going down (netvsc_vf_down()) but we don't on
> VF removal (netvsc_unregister_vf()) so vf_inject stays 'true' while
> vf_netdev is already NULL and we're trying to inject packets into NULL
> net device in netvsc_recv_callback() causing kernel to crash.
> 
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>

Acked-by: Haiyang Zhang <haiyangz@microsoft.com>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH net 3/4] hv_netvsc: protect module refcount by checking net_device_ctx->vf_netdev
@ 2016-08-11 15:38     ` Haiyang Zhang
  0 siblings, 0 replies; 31+ messages in thread
From: Haiyang Zhang @ 2016-08-11 15:38 UTC (permalink / raw)
  To: Vitaly Kuznetsov, netdev; +Cc: devel, linux-kernel



> -----Original Message-----
> From: Vitaly Kuznetsov [mailto:vkuznets@redhat.com]
> Sent: Thursday, August 11, 2016 6:59 AM
> To: netdev@vger.kernel.org
> Cc: devel@linuxdriverproject.org; linux-kernel@vger.kernel.org; Haiyang
> Zhang <haiyangz@microsoft.com>; KY Srinivasan <kys@microsoft.com>
> Subject: [PATCH net 3/4] hv_netvsc: protect module refcount by checking
> net_device_ctx->vf_netdev
> 
> We're not guaranteed to see NETDEV_REGISTER/NETDEV_UNREGISTER
> notifications
> only once per VF but we increase/decrease module refcount unconditionally.
> Check vf_netdev to make sure we don't take/release it twice. We presume
> that only one VF per netvsc device may exist.
> 
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>

Acked-by: Haiyang Zhang <haiyangz@microsoft.com>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH net 4/4] hv_netvsc: avoid deadlocks between rtnl lock and netvsc_inject_disable()
  2016-08-11 10:58   ` Vitaly Kuznetsov
@ 2016-08-11 15:38     ` Haiyang Zhang
  -1 siblings, 0 replies; 31+ messages in thread
From: Haiyang Zhang @ 2016-08-11 15:38 UTC (permalink / raw)
  To: Vitaly Kuznetsov, netdev; +Cc: devel, linux-kernel, KY Srinivasan



> -----Original Message-----
> From: Vitaly Kuznetsov [mailto:vkuznets@redhat.com]
> Sent: Thursday, August 11, 2016 6:59 AM
> To: netdev@vger.kernel.org
> Cc: devel@linuxdriverproject.org; linux-kernel@vger.kernel.org; Haiyang
> Zhang <haiyangz@microsoft.com>; KY Srinivasan <kys@microsoft.com>
> Subject: [PATCH net 4/4] hv_netvsc: avoid deadlocks between rtnl lock and
> netvsc_inject_disable()
> 
> Here is a deadlock scenario:
> - netvsc_vf_up() schedules netvsc_notify_peers() work and quits.
> - netvsc_vf_down() runs before netvsc_notify_peers() gets executed. As it
>   is being executed from netdev notifier chain we hold rtnl lock when we
>   get here.
> - we enter netvsc_inject_disable() and loop and wait till
>   netvsc_notify_peers() drops vf_use_cnt.
> - netvsc_notify_peers() starts on some other CPU but netdev_notify_peers()
>   will hang on rtnl_lock().
> - deadlock!
> 
> Similar deadlocks are possible between netvsc_vf_{up,down}() and
> netvsc_unregister_vf() as it also waits till vf_use_cnt drops to zero.
> Instead of introducing additional synchronization I suggest we drop
> gwrk.dwrk completely and call NETDEV_NOTIFY_PEERS directly. As we're
> acting under rtnl lock this is legitimate.
> 
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>

Acked-by: Haiyang Zhang <haiyangz@microsoft.com>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH net 4/4] hv_netvsc: avoid deadlocks between rtnl lock and netvsc_inject_disable()
@ 2016-08-11 15:38     ` Haiyang Zhang
  0 siblings, 0 replies; 31+ messages in thread
From: Haiyang Zhang @ 2016-08-11 15:38 UTC (permalink / raw)
  To: Vitaly Kuznetsov, netdev; +Cc: devel, linux-kernel



> -----Original Message-----
> From: Vitaly Kuznetsov [mailto:vkuznets@redhat.com]
> Sent: Thursday, August 11, 2016 6:59 AM
> To: netdev@vger.kernel.org
> Cc: devel@linuxdriverproject.org; linux-kernel@vger.kernel.org; Haiyang
> Zhang <haiyangz@microsoft.com>; KY Srinivasan <kys@microsoft.com>
> Subject: [PATCH net 4/4] hv_netvsc: avoid deadlocks between rtnl lock and
> netvsc_inject_disable()
> 
> Here is a deadlock scenario:
> - netvsc_vf_up() schedules netvsc_notify_peers() work and quits.
> - netvsc_vf_down() runs before netvsc_notify_peers() gets executed. As it
>   is being executed from netdev notifier chain we hold rtnl lock when we
>   get here.
> - we enter netvsc_inject_disable() and loop and wait till
>   netvsc_notify_peers() drops vf_use_cnt.
> - netvsc_notify_peers() starts on some other CPU but netdev_notify_peers()
>   will hang on rtnl_lock().
> - deadlock!
> 
> Similar deadlocks are possible between netvsc_vf_{up,down}() and
> netvsc_unregister_vf() as it also waits till vf_use_cnt drops to zero.
> Instead of introducing additional synchronization I suggest we drop
> gwrk.dwrk completely and call NETDEV_NOTIFY_PEERS directly. As we're
> acting under rtnl lock this is legitimate.
> 
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>

Acked-by: Haiyang Zhang <haiyangz@microsoft.com>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH net 2/4] hv_netvsc: reset vf_inject on VF removal
  2016-08-11 12:09       ` Vitaly Kuznetsov
@ 2016-08-12 14:47         ` Stephen Hemminger
  -1 siblings, 0 replies; 31+ messages in thread
From: Stephen Hemminger @ 2016-08-12 14:47 UTC (permalink / raw)
  To: Vitaly Kuznetsov; +Cc: Yuval Mintz, netdev, Haiyang Zhang, linux-kernel, devel

On Thu, 11 Aug 2016 14:09:53 +0200
Vitaly Kuznetsov <vkuznets@redhat.com> wrote:

> Yuval Mintz <Yuval.Mintz@qlogic.com> writes:
> 
> >> +static void netvsc_inject_enable(struct net_device_context
> >> +*net_device_ctx) {
> >> +	net_device_ctx->vf_inject = true;
> >> +}
> >> +
> >> +static void netvsc_inject_disable(struct net_device_context
> >> +*net_device_ctx) {
> >> +	net_device_ctx->vf_inject = false;
> >> +
> >> +	/* Wait for currently active users to drain out. */
> >> +	while (atomic_read(&net_device_ctx->vf_use_cnt) != 0)
> >> +		udelay(50);
> >> +}  
> >
> > That was already the behavior before, but are you certain you
> > want to unconditionally block without any possible timeout?  
> 
> Yes, this is OK. After PATCH4 of this series there is only one place
> which takes the vf_use_cnt (netvsc_recv_callback()) and it is an
> interrupt handler, there are no sleepable operations there.
> 

Since network devices are protected by RCU, it looks like the refcount
is not necessary.  I think vf_inject flag and vf_use_cnt could just be replaced
by doing RCU on vf_netdev.

The callback is invoked from tasklet (softirq) context.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH net 2/4] hv_netvsc: reset vf_inject on VF removal
@ 2016-08-12 14:47         ` Stephen Hemminger
  0 siblings, 0 replies; 31+ messages in thread
From: Stephen Hemminger @ 2016-08-12 14:47 UTC (permalink / raw)
  To: Vitaly Kuznetsov; +Cc: netdev, Haiyang Zhang, Yuval Mintz, linux-kernel, devel

On Thu, 11 Aug 2016 14:09:53 +0200
Vitaly Kuznetsov <vkuznets@redhat.com> wrote:

> Yuval Mintz <Yuval.Mintz@qlogic.com> writes:
> 
> >> +static void netvsc_inject_enable(struct net_device_context
> >> +*net_device_ctx) {
> >> +	net_device_ctx->vf_inject = true;
> >> +}
> >> +
> >> +static void netvsc_inject_disable(struct net_device_context
> >> +*net_device_ctx) {
> >> +	net_device_ctx->vf_inject = false;
> >> +
> >> +	/* Wait for currently active users to drain out. */
> >> +	while (atomic_read(&net_device_ctx->vf_use_cnt) != 0)
> >> +		udelay(50);
> >> +}  
> >
> > That was already the behavior before, but are you certain you
> > want to unconditionally block without any possible timeout?  
> 
> Yes, this is OK. After PATCH4 of this series there is only one place
> which takes the vf_use_cnt (netvsc_recv_callback()) and it is an
> interrupt handler, there are no sleepable operations there.
> 

Since network devices are protected by RCU, it looks like the refcount
is not necessary.  I think vf_inject flag and vf_use_cnt could just be replaced
by doing RCU on vf_netdev.

The callback is invoked from tasklet (softirq) context.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH net 2/4] hv_netvsc: reset vf_inject on VF removal
  2016-08-11 10:58 ` [PATCH net 2/4] hv_netvsc: reset vf_inject on VF removal Vitaly Kuznetsov
@ 2016-08-13  3:40     ` David Miller
  2016-08-11 15:38     ` Haiyang Zhang
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 31+ messages in thread
From: David Miller @ 2016-08-13  3:40 UTC (permalink / raw)
  To: vkuznets; +Cc: netdev, devel, linux-kernel, haiyangz, kys

From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 11 Aug 2016 12:58:55 +0200

> We reset vf_inject on VF going down (netvsc_vf_down()) but we don't on
> VF removal (netvsc_unregister_vf()) so vf_inject stays 'true' while
> vf_netdev is already NULL and we're trying to inject packets into NULL
> net device in netvsc_recv_callback() causing kernel to crash.
> 
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>

You can't create a blocking operation problem knowingly in this
patch just because you fix it in patch #4.

You must order your patches such that the driver does not regress
at any intermediate stage of your patch series.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH net 2/4] hv_netvsc: reset vf_inject on VF removal
@ 2016-08-13  3:40     ` David Miller
  0 siblings, 0 replies; 31+ messages in thread
From: David Miller @ 2016-08-13  3:40 UTC (permalink / raw)
  To: vkuznets; +Cc: netdev, haiyangz, linux-kernel, devel

From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 11 Aug 2016 12:58:55 +0200

> We reset vf_inject on VF going down (netvsc_vf_down()) but we don't on
> VF removal (netvsc_unregister_vf()) so vf_inject stays 'true' while
> vf_netdev is already NULL and we're trying to inject packets into NULL
> net device in netvsc_recv_callback() causing kernel to crash.
> 
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>

You can't create a blocking operation problem knowingly in this
patch just because you fix it in patch #4.

You must order your patches such that the driver does not regress
at any intermediate stage of your patch series.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [RFC 1/2] netvsc: reference counting fix
  2016-08-11 10:58 ` [PATCH net 2/4] hv_netvsc: reset vf_inject on VF removal Vitaly Kuznetsov
@ 2016-08-13 18:35     ` Stephen Hemminger
  2016-08-11 15:38     ` Haiyang Zhang
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 31+ messages in thread
From: Stephen Hemminger @ 2016-08-13 18:35 UTC (permalink / raw)
  To: Vitaly Kuznetsov
  Cc: netdev, devel, linux-kernel, Haiyang Zhang, K. Y. Srinivasan

This is how I think it should be fixed, but not tested yet.

Subjec: netvsc: use device not module reference counts

Fix how the cross-device reference counting is handled.  When VF is
associated with the synthetic interface, the VF driver module should
still be able to be unloaded.  The module unload code will callback
with NETDEV_UNREGISTER event which breaks the connection safely.
(Fixes 9f4b5ba5db4 hv_netvsc: Implement support for VF drivers on Hyper-V)

Signed-off-by: Stephen Hemminger <sthemmin@linuxonhyperv.com>


--- a/drivers/net/hyperv/netvsc_drv.c	2016-08-13 11:25:40.243995863 -0700
+++ b/drivers/net/hyperv/netvsc_drv.c	2016-08-13 11:25:40.239995844 -0700
@@ -1220,10 +1220,8 @@ static int netvsc_register_vf(struct net
 		return NOTIFY_DONE;
 
 	netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);
-	/*
-	 * Take a reference on the module.
-	 */
-	try_module_get(THIS_MODULE);
+
+	dev_hold(vf_netdev);
 	netvsc_dev->vf_netdev = vf_netdev;
 	return NOTIFY_OK;
 }
@@ -1345,7 +1343,7 @@ static int netvsc_unregister_vf(struct n
 	netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
 
 	netvsc_dev->vf_netdev = NULL;
-	module_put(THIS_MODULE);
+	dev_put(vf_netdev);
 	return NOTIFY_OK;
 }
 

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [RFC 1/2] netvsc: reference counting fix
@ 2016-08-13 18:35     ` Stephen Hemminger
  0 siblings, 0 replies; 31+ messages in thread
From: Stephen Hemminger @ 2016-08-13 18:35 UTC (permalink / raw)
  To: Vitaly Kuznetsov; +Cc: netdev, Haiyang Zhang, linux-kernel, devel

This is how I think it should be fixed, but not tested yet.

Subjec: netvsc: use device not module reference counts

Fix how the cross-device reference counting is handled.  When VF is
associated with the synthetic interface, the VF driver module should
still be able to be unloaded.  The module unload code will callback
with NETDEV_UNREGISTER event which breaks the connection safely.
(Fixes 9f4b5ba5db4 hv_netvsc: Implement support for VF drivers on Hyper-V)

Signed-off-by: Stephen Hemminger <sthemmin@linuxonhyperv.com>


--- a/drivers/net/hyperv/netvsc_drv.c	2016-08-13 11:25:40.243995863 -0700
+++ b/drivers/net/hyperv/netvsc_drv.c	2016-08-13 11:25:40.239995844 -0700
@@ -1220,10 +1220,8 @@ static int netvsc_register_vf(struct net
 		return NOTIFY_DONE;
 
 	netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);
-	/*
-	 * Take a reference on the module.
-	 */
-	try_module_get(THIS_MODULE);
+
+	dev_hold(vf_netdev);
 	netvsc_dev->vf_netdev = vf_netdev;
 	return NOTIFY_OK;
 }
@@ -1345,7 +1343,7 @@ static int netvsc_unregister_vf(struct n
 	netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
 
 	netvsc_dev->vf_netdev = NULL;
-	module_put(THIS_MODULE);
+	dev_put(vf_netdev);
 	return NOTIFY_OK;
 }

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [RFC 2/2] netvsc: use RCU for VF net device reference
  2016-08-13 18:35     ` Stephen Hemminger
@ 2016-08-13 18:38       ` Stephen Hemminger
  -1 siblings, 0 replies; 31+ messages in thread
From: Stephen Hemminger @ 2016-08-13 18:38 UTC (permalink / raw)
  To: Vitaly Kuznetsov
  Cc: netdev, devel, linux-kernel, Haiyang Zhang, K. Y. Srinivasan


Rather than keeping a pointer, a flag, and reference count, use RCU and existing
device reference count to protect the synthetic to VF relationship.

One other change is that injected packets must be accounted for on the synthetic
device otherwise the statistics will be lost. The VF device driver (for most devices)
creates the statistics based on device registers and therefore would ignore any direct
manipulation of network device stats.

Also, rx_dropped is not atomic_long.

Signed-off-by: Stephen Hemminger <sthemmin@linuxonhyperv.com>


--- a/drivers/net/hyperv/hyperv_net.h	2016-08-13 11:25:59.764085593 -0700
+++ b/drivers/net/hyperv/hyperv_net.h	2016-08-13 11:25:59.736085464 -0700
@@ -689,6 +689,9 @@ struct netvsc_device {
 	wait_queue_head_t wait_drain;
 	bool destroy;
 
+	/* State to manage the associated VF interface. */
+	struct net_device *vf_netdev __rcu;
+
 	/* Receive buffer allocated by us but manages by NetVSP */
 	void *recv_buf;
 	u32 recv_buf_size;
@@ -739,10 +742,6 @@ struct netvsc_device {
 	/* Serial number of the VF to team with */
 	u32 vf_serial;
 	atomic_t open_cnt;
-	/* State to manage the associated VF interface. */
-	bool vf_inject;
-	struct net_device *vf_netdev;
-	atomic_t vf_use_cnt;
 };
 
 static inline struct netvsc_device *
--- a/drivers/net/hyperv/netvsc.c	2016-08-13 11:25:59.764085593 -0700
+++ b/drivers/net/hyperv/netvsc.c	2016-08-13 11:25:59.736085464 -0700
@@ -77,13 +77,10 @@ static struct netvsc_device *alloc_net_d
 	init_waitqueue_head(&net_device->wait_drain);
 	net_device->destroy = false;
 	atomic_set(&net_device->open_cnt, 0);
-	atomic_set(&net_device->vf_use_cnt, 0);
+
 	net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
 	net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
 
-	net_device->vf_netdev = NULL;
-	net_device->vf_inject = false;
-
 	return net_device;
 }
 
--- a/drivers/net/hyperv/netvsc_drv.c	2016-08-13 11:25:59.764085593 -0700
+++ b/drivers/net/hyperv/netvsc_drv.c	2016-08-13 11:31:47.733685146 -0700
@@ -668,59 +668,45 @@ int netvsc_recv_callback(struct hv_devic
 {
 	struct net_device *net = hv_get_drvdata(device_obj);
 	struct net_device_context *net_device_ctx = netdev_priv(net);
-	struct sk_buff *skb;
-	struct sk_buff *vf_skb;
-	struct netvsc_stats *rx_stats;
+	struct netvsc_stats *rx_stats = this_cpu_ptr(net_device_ctx->rx_stats);
 	struct netvsc_device *netvsc_dev = net_device_ctx->nvdev;
-	u32 bytes_recvd = packet->total_data_buflen;
-	int ret = 0;
+	struct net_device *vf_netdev;
+	struct sk_buff *skb;
 
 	if (!net || net->reg_state != NETREG_REGISTERED)
 		return NVSP_STAT_FAIL;
 
-	if (READ_ONCE(netvsc_dev->vf_inject)) {
-		atomic_inc(&netvsc_dev->vf_use_cnt);
-		if (!READ_ONCE(netvsc_dev->vf_inject)) {
-			/*
-			 * We raced; just move on.
-			 */
-			atomic_dec(&netvsc_dev->vf_use_cnt);
-			goto vf_injection_done;
-		}
+	vf_netdev = rcu_dereference(netvsc_dev->vf_netdev);
+	if (vf_netdev) {
+		/* Inject this packet into the VF interface.  On
+		 * Hyper-V, multicast and broadcast packets are only
+		 * delivered on the synthetic interface (after
+		 * subjecting these to policy filters on the
+		 * host). Deliver these via the VF interface in the
+		 * guest if up, otherwise drop.
+		 */
+		if (!netif_running(vf_netdev))
+			goto drop;
 
-		/*
-		 * Inject this packet into the VF inerface.
-		 * On Hyper-V, multicast and brodcast packets
-		 * are only delivered on the synthetic interface
-		 * (after subjecting these to policy filters on
-		 * the host). Deliver these via the VF interface
-		 * in the guest.
+		/* Account for this on the synthetic interface
+		 * otherwise likely to be not accounted for since
+		 * device statistics on the VF are driver dependent.
 		 */
-		vf_skb = netvsc_alloc_recv_skb(netvsc_dev->vf_netdev, packet,
-					       csum_info, *data, vlan_tci);
-		if (vf_skb != NULL) {
-			++netvsc_dev->vf_netdev->stats.rx_packets;
-			netvsc_dev->vf_netdev->stats.rx_bytes += bytes_recvd;
-			netif_receive_skb(vf_skb);
-		} else {
-			++net->stats.rx_dropped;
-			ret = NVSP_STAT_FAIL;
-		}
-		atomic_dec(&netvsc_dev->vf_use_cnt);
-		return ret;
+		++net->stats.multicast;
+		net = vf_netdev;
 	}
 
-vf_injection_done:
-	rx_stats = this_cpu_ptr(net_device_ctx->rx_stats);
-
 	/* Allocate a skb - TODO direct I/O to pages? */
 	skb = netvsc_alloc_recv_skb(net, packet, csum_info, *data, vlan_tci);
 	if (unlikely(!skb)) {
+drop:
 		++net->stats.rx_dropped;
 		return NVSP_STAT_FAIL;
 	}
-	skb_record_rx_queue(skb, channel->
-			    offermsg.offer.sub_channel_index);
+
+	if (likely(!vf_netdev))
+		skb_record_rx_queue(skb,
+				    channel->offermsg.offer.sub_channel_index);
 
 	u64_stats_update_begin(&rx_stats->syncp);
 	rx_stats->packets++;
@@ -989,6 +975,7 @@ static struct rtnl_link_stats64 *netvsc_
 
 	t->rx_dropped	= net->stats.rx_dropped;
 	t->rx_errors	= net->stats.rx_errors;
+	t->multicast 	= net->stats.multicast;
 
 	return t;
 }
@@ -1170,8 +1157,7 @@ static void netvsc_notify_peers(struct w
 	gwrk = container_of(wrk, struct garp_wrk, dwrk);
 
 	netdev_notify_peers(gwrk->netdev);
-
-	atomic_dec(&gwrk->netvsc_dev->vf_use_cnt);
+	dev_put(gwrk->netdev);
 }
 
 static struct net_device *get_netvsc_net_device(char *mac)
@@ -1222,7 +1208,7 @@ static int netvsc_register_vf(struct net
 	netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);
 
 	dev_hold(vf_netdev);
-	netvsc_dev->vf_netdev = vf_netdev;
+	rcu_assign_pointer(netvsc_dev->vf_netdev, vf_netdev);
 	return NOTIFY_OK;
 }
 
@@ -1248,7 +1234,6 @@ static int netvsc_vf_up(struct net_devic
 		return NOTIFY_DONE;
 
 	netdev_info(ndev, "VF up: %s\n", vf_netdev->name);
-	netvsc_dev->vf_inject = true;
 
 	/*
 	 * Open the device before switching data path.
@@ -1268,7 +1253,7 @@ static int netvsc_vf_up(struct net_devic
 	 * notify peers; take a reference to prevent
 	 * the VF interface from vanishing.
 	 */
-	atomic_inc(&netvsc_dev->vf_use_cnt);
+	dev_hold(vf_netdev);
 	net_device_ctx->gwrk.netdev = vf_netdev;
 	net_device_ctx->gwrk.netvsc_dev = netvsc_dev;
 	schedule_work(&net_device_ctx->gwrk.dwrk);
@@ -1298,14 +1283,7 @@ static int netvsc_vf_down(struct net_dev
 		return NOTIFY_DONE;
 
 	netdev_info(ndev, "VF down: %s\n", vf_netdev->name);
-	netvsc_dev->vf_inject = false;
-	/*
-	 * Wait for currently active users to
-	 * drain out.
-	 */
 
-	while (atomic_read(&netvsc_dev->vf_use_cnt) != 0)
-		udelay(50);
 	netvsc_switch_datapath(ndev, false);
 	netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name);
 	rndis_filter_close(netvsc_dev);
@@ -1313,7 +1291,7 @@ static int netvsc_vf_down(struct net_dev
 	/*
 	 * Notify peers.
 	 */
-	atomic_inc(&netvsc_dev->vf_use_cnt);
+	dev_hold(ndev);
 	net_device_ctx->gwrk.netdev = ndev;
 	net_device_ctx->gwrk.netvsc_dev = netvsc_dev;
 	schedule_work(&net_device_ctx->gwrk.dwrk);
@@ -1342,7 +1320,7 @@ static int netvsc_unregister_vf(struct n
 		return NOTIFY_DONE;
 	netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
 
-	netvsc_dev->vf_netdev = NULL;
+	RCU_INIT_POINTER(netvsc_dev->vf_netdev, NULL);
 	dev_put(vf_netdev);
 	return NOTIFY_OK;
 }

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [RFC 2/2] netvsc: use RCU for VF net device reference
@ 2016-08-13 18:38       ` Stephen Hemminger
  0 siblings, 0 replies; 31+ messages in thread
From: Stephen Hemminger @ 2016-08-13 18:38 UTC (permalink / raw)
  To: Vitaly Kuznetsov; +Cc: netdev, Haiyang Zhang, linux-kernel, devel


Rather than keeping a pointer, a flag, and reference count, use RCU and existing
device reference count to protect the synthetic to VF relationship.

One other change is that injected packets must be accounted for on the synthetic
device otherwise the statistics will be lost. The VF device driver (for most devices)
creates the statistics based on device registers and therefore would ignore any direct
manipulation of network device stats.

Also, rx_dropped is not atomic_long.

Signed-off-by: Stephen Hemminger <sthemmin@linuxonhyperv.com>


--- a/drivers/net/hyperv/hyperv_net.h	2016-08-13 11:25:59.764085593 -0700
+++ b/drivers/net/hyperv/hyperv_net.h	2016-08-13 11:25:59.736085464 -0700
@@ -689,6 +689,9 @@ struct netvsc_device {
 	wait_queue_head_t wait_drain;
 	bool destroy;
 
+	/* State to manage the associated VF interface. */
+	struct net_device *vf_netdev __rcu;
+
 	/* Receive buffer allocated by us but manages by NetVSP */
 	void *recv_buf;
 	u32 recv_buf_size;
@@ -739,10 +742,6 @@ struct netvsc_device {
 	/* Serial number of the VF to team with */
 	u32 vf_serial;
 	atomic_t open_cnt;
-	/* State to manage the associated VF interface. */
-	bool vf_inject;
-	struct net_device *vf_netdev;
-	atomic_t vf_use_cnt;
 };
 
 static inline struct netvsc_device *
--- a/drivers/net/hyperv/netvsc.c	2016-08-13 11:25:59.764085593 -0700
+++ b/drivers/net/hyperv/netvsc.c	2016-08-13 11:25:59.736085464 -0700
@@ -77,13 +77,10 @@ static struct netvsc_device *alloc_net_d
 	init_waitqueue_head(&net_device->wait_drain);
 	net_device->destroy = false;
 	atomic_set(&net_device->open_cnt, 0);
-	atomic_set(&net_device->vf_use_cnt, 0);
+
 	net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
 	net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
 
-	net_device->vf_netdev = NULL;
-	net_device->vf_inject = false;
-
 	return net_device;
 }
 
--- a/drivers/net/hyperv/netvsc_drv.c	2016-08-13 11:25:59.764085593 -0700
+++ b/drivers/net/hyperv/netvsc_drv.c	2016-08-13 11:31:47.733685146 -0700
@@ -668,59 +668,45 @@ int netvsc_recv_callback(struct hv_devic
 {
 	struct net_device *net = hv_get_drvdata(device_obj);
 	struct net_device_context *net_device_ctx = netdev_priv(net);
-	struct sk_buff *skb;
-	struct sk_buff *vf_skb;
-	struct netvsc_stats *rx_stats;
+	struct netvsc_stats *rx_stats = this_cpu_ptr(net_device_ctx->rx_stats);
 	struct netvsc_device *netvsc_dev = net_device_ctx->nvdev;
-	u32 bytes_recvd = packet->total_data_buflen;
-	int ret = 0;
+	struct net_device *vf_netdev;
+	struct sk_buff *skb;
 
 	if (!net || net->reg_state != NETREG_REGISTERED)
 		return NVSP_STAT_FAIL;
 
-	if (READ_ONCE(netvsc_dev->vf_inject)) {
-		atomic_inc(&netvsc_dev->vf_use_cnt);
-		if (!READ_ONCE(netvsc_dev->vf_inject)) {
-			/*
-			 * We raced; just move on.
-			 */
-			atomic_dec(&netvsc_dev->vf_use_cnt);
-			goto vf_injection_done;
-		}
+	vf_netdev = rcu_dereference(netvsc_dev->vf_netdev);
+	if (vf_netdev) {
+		/* Inject this packet into the VF interface.  On
+		 * Hyper-V, multicast and broadcast packets are only
+		 * delivered on the synthetic interface (after
+		 * subjecting these to policy filters on the
+		 * host). Deliver these via the VF interface in the
+		 * guest if up, otherwise drop.
+		 */
+		if (!netif_running(vf_netdev))
+			goto drop;
 
-		/*
-		 * Inject this packet into the VF inerface.
-		 * On Hyper-V, multicast and brodcast packets
-		 * are only delivered on the synthetic interface
-		 * (after subjecting these to policy filters on
-		 * the host). Deliver these via the VF interface
-		 * in the guest.
+		/* Account for this on the synthetic interface
+		 * otherwise likely to be not accounted for since
+		 * device statistics on the VF are driver dependent.
 		 */
-		vf_skb = netvsc_alloc_recv_skb(netvsc_dev->vf_netdev, packet,
-					       csum_info, *data, vlan_tci);
-		if (vf_skb != NULL) {
-			++netvsc_dev->vf_netdev->stats.rx_packets;
-			netvsc_dev->vf_netdev->stats.rx_bytes += bytes_recvd;
-			netif_receive_skb(vf_skb);
-		} else {
-			++net->stats.rx_dropped;
-			ret = NVSP_STAT_FAIL;
-		}
-		atomic_dec(&netvsc_dev->vf_use_cnt);
-		return ret;
+		++net->stats.multicast;
+		net = vf_netdev;
 	}
 
-vf_injection_done:
-	rx_stats = this_cpu_ptr(net_device_ctx->rx_stats);
-
 	/* Allocate a skb - TODO direct I/O to pages? */
 	skb = netvsc_alloc_recv_skb(net, packet, csum_info, *data, vlan_tci);
 	if (unlikely(!skb)) {
+drop:
 		++net->stats.rx_dropped;
 		return NVSP_STAT_FAIL;
 	}
-	skb_record_rx_queue(skb, channel->
-			    offermsg.offer.sub_channel_index);
+
+	if (likely(!vf_netdev))
+		skb_record_rx_queue(skb,
+				    channel->offermsg.offer.sub_channel_index);
 
 	u64_stats_update_begin(&rx_stats->syncp);
 	rx_stats->packets++;
@@ -989,6 +975,7 @@ static struct rtnl_link_stats64 *netvsc_
 
 	t->rx_dropped	= net->stats.rx_dropped;
 	t->rx_errors	= net->stats.rx_errors;
+	t->multicast 	= net->stats.multicast;
 
 	return t;
 }
@@ -1170,8 +1157,7 @@ static void netvsc_notify_peers(struct w
 	gwrk = container_of(wrk, struct garp_wrk, dwrk);
 
 	netdev_notify_peers(gwrk->netdev);
-
-	atomic_dec(&gwrk->netvsc_dev->vf_use_cnt);
+	dev_put(gwrk->netdev);
 }
 
 static struct net_device *get_netvsc_net_device(char *mac)
@@ -1222,7 +1208,7 @@ static int netvsc_register_vf(struct net
 	netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);
 
 	dev_hold(vf_netdev);
-	netvsc_dev->vf_netdev = vf_netdev;
+	rcu_assign_pointer(netvsc_dev->vf_netdev, vf_netdev);
 	return NOTIFY_OK;
 }
 
@@ -1248,7 +1234,6 @@ static int netvsc_vf_up(struct net_devic
 		return NOTIFY_DONE;
 
 	netdev_info(ndev, "VF up: %s\n", vf_netdev->name);
-	netvsc_dev->vf_inject = true;
 
 	/*
 	 * Open the device before switching data path.
@@ -1268,7 +1253,7 @@ static int netvsc_vf_up(struct net_devic
 	 * notify peers; take a reference to prevent
 	 * the VF interface from vanishing.
 	 */
-	atomic_inc(&netvsc_dev->vf_use_cnt);
+	dev_hold(vf_netdev);
 	net_device_ctx->gwrk.netdev = vf_netdev;
 	net_device_ctx->gwrk.netvsc_dev = netvsc_dev;
 	schedule_work(&net_device_ctx->gwrk.dwrk);
@@ -1298,14 +1283,7 @@ static int netvsc_vf_down(struct net_dev
 		return NOTIFY_DONE;
 
 	netdev_info(ndev, "VF down: %s\n", vf_netdev->name);
-	netvsc_dev->vf_inject = false;
-	/*
-	 * Wait for currently active users to
-	 * drain out.
-	 */
 
-	while (atomic_read(&netvsc_dev->vf_use_cnt) != 0)
-		udelay(50);
 	netvsc_switch_datapath(ndev, false);
 	netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name);
 	rndis_filter_close(netvsc_dev);
@@ -1313,7 +1291,7 @@ static int netvsc_vf_down(struct net_dev
 	/*
 	 * Notify peers.
 	 */
-	atomic_inc(&netvsc_dev->vf_use_cnt);
+	dev_hold(ndev);
 	net_device_ctx->gwrk.netdev = ndev;
 	net_device_ctx->gwrk.netvsc_dev = netvsc_dev;
 	schedule_work(&net_device_ctx->gwrk.dwrk);
@@ -1342,7 +1320,7 @@ static int netvsc_unregister_vf(struct n
 		return NOTIFY_DONE;
 	netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
 
-	netvsc_dev->vf_netdev = NULL;
+	RCU_INIT_POINTER(netvsc_dev->vf_netdev, NULL);
 	dev_put(vf_netdev);
 	return NOTIFY_OK;
 }

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC 1/2] netvsc: reference counting fix
  2016-08-13 18:35     ` Stephen Hemminger
@ 2016-08-15  4:31       ` David Miller
  -1 siblings, 0 replies; 31+ messages in thread
From: David Miller @ 2016-08-15  4:31 UTC (permalink / raw)
  To: stephen; +Cc: vkuznets, netdev, devel, linux-kernel, haiyangz, kys

From: Stephen Hemminger <stephen@networkplumber.org>
Date: Sat, 13 Aug 2016 11:35:59 -0700

> This is how I think it should be fixed, but not tested yet.
> 
> Subjec: netvsc: use device not module reference counts
> 
> Fix how the cross-device reference counting is handled.  When VF is
> associated with the synthetic interface, the VF driver module should
> still be able to be unloaded.  The module unload code will callback
> with NETDEV_UNREGISTER event which breaks the connection safely.
> (Fixes 9f4b5ba5db4 hv_netvsc: Implement support for VF drivers on Hyper-V)
> 
> Signed-off-by: Stephen Hemminger <sthemmin@linuxonhyperv.com>

This might not work.

It is assumed that when a netdev unregister happens, it may be done so
at any point in time.

Therefore that NETDEV_UNREGISTER event must eliminate any and all
references to a given netdev.  And that works perfectly fine right now
with all existing subsystems that take references to netdevs.

You'll have to add something so that a NETDEV_UNREGISTER event tears
this VF down and thus releases it's reference to the synthetic device.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC 1/2] netvsc: reference counting fix
@ 2016-08-15  4:31       ` David Miller
  0 siblings, 0 replies; 31+ messages in thread
From: David Miller @ 2016-08-15  4:31 UTC (permalink / raw)
  To: stephen; +Cc: netdev, haiyangz, linux-kernel, devel

From: Stephen Hemminger <stephen@networkplumber.org>
Date: Sat, 13 Aug 2016 11:35:59 -0700

> This is how I think it should be fixed, but not tested yet.
> 
> Subjec: netvsc: use device not module reference counts
> 
> Fix how the cross-device reference counting is handled.  When VF is
> associated with the synthetic interface, the VF driver module should
> still be able to be unloaded.  The module unload code will callback
> with NETDEV_UNREGISTER event which breaks the connection safely.
> (Fixes 9f4b5ba5db4 hv_netvsc: Implement support for VF drivers on Hyper-V)
> 
> Signed-off-by: Stephen Hemminger <sthemmin@linuxonhyperv.com>

This might not work.

It is assumed that when a netdev unregister happens, it may be done so
at any point in time.

Therefore that NETDEV_UNREGISTER event must eliminate any and all
references to a given netdev.  And that works perfectly fine right now
with all existing subsystems that take references to netdevs.

You'll have to add something so that a NETDEV_UNREGISTER event tears
this VF down and thus releases it's reference to the synthetic device.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC 2/2] netvsc: use RCU for VF net device reference
  2016-08-13 18:38       ` Stephen Hemminger
  (?)
@ 2016-08-15 10:06       ` Vitaly Kuznetsov
  -1 siblings, 0 replies; 31+ messages in thread
From: Vitaly Kuznetsov @ 2016-08-15 10:06 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: netdev, devel, linux-kernel, Haiyang Zhang, K. Y. Srinivasan

Stephen Hemminger <stephen@networkplumber.org> writes:

> Rather than keeping a pointer, a flag, and reference count, use RCU and existing
> device reference count to protect the synthetic to VF relationship.

Thanks! I like the idea. Some nitpicks below ...

>
> One other change is that injected packets must be accounted for on the synthetic
> device otherwise the statistics will be lost. The VF device driver (for most devices)
> creates the statistics based on device registers and therefore would ignore any direct
> manipulation of network device stats.
>
> Also, rx_dropped is not atomic_long.
>
> Signed-off-by: Stephen Hemminger <sthemmin@linuxonhyperv.com>
>
> --- a/drivers/net/hyperv/hyperv_net.h	2016-08-13 11:25:59.764085593 -0700
> +++ b/drivers/net/hyperv/hyperv_net.h	2016-08-13 11:25:59.736085464 -0700
> @@ -689,6 +689,9 @@ struct netvsc_device {
>  	wait_queue_head_t wait_drain;
>  	bool destroy;
>
> +	/* State to manage the associated VF interface. */
> +	struct net_device *vf_netdev __rcu;
> +
>  	/* Receive buffer allocated by us but manages by NetVSP */
>  	void *recv_buf;
>  	u32 recv_buf_size;
> @@ -739,10 +742,6 @@ struct netvsc_device {
>  	/* Serial number of the VF to team with */
>  	u32 vf_serial;
>  	atomic_t open_cnt;
> -	/* State to manage the associated VF interface. */
> -	bool vf_inject;
> -	struct net_device *vf_netdev;
> -	atomic_t vf_use_cnt;
>  };
>
>  static inline struct netvsc_device *
> --- a/drivers/net/hyperv/netvsc.c	2016-08-13 11:25:59.764085593 -0700
> +++ b/drivers/net/hyperv/netvsc.c	2016-08-13 11:25:59.736085464 -0700
> @@ -77,13 +77,10 @@ static struct netvsc_device *alloc_net_d
>  	init_waitqueue_head(&net_device->wait_drain);
>  	net_device->destroy = false;
>  	atomic_set(&net_device->open_cnt, 0);
> -	atomic_set(&net_device->vf_use_cnt, 0);
> +
>  	net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
>  	net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
>
> -	net_device->vf_netdev = NULL;
> -	net_device->vf_inject = false;
> -
>  	return net_device;
>  }
>
> --- a/drivers/net/hyperv/netvsc_drv.c	2016-08-13 11:25:59.764085593 -0700
> +++ b/drivers/net/hyperv/netvsc_drv.c	2016-08-13 11:31:47.733685146 -0700
> @@ -668,59 +668,45 @@ int netvsc_recv_callback(struct hv_devic
>  {
>  	struct net_device *net = hv_get_drvdata(device_obj);
>  	struct net_device_context *net_device_ctx = netdev_priv(net);
> -	struct sk_buff *skb;
> -	struct sk_buff *vf_skb;
> -	struct netvsc_stats *rx_stats;
> +	struct netvsc_stats *rx_stats = this_cpu_ptr(net_device_ctx->rx_stats);
>  	struct netvsc_device *netvsc_dev = net_device_ctx->nvdev;
> -	u32 bytes_recvd = packet->total_data_buflen;
> -	int ret = 0;
> +	struct net_device *vf_netdev;
> +	struct sk_buff *skb;
>
>  	if (!net || net->reg_state != NETREG_REGISTERED)
>  		return NVSP_STAT_FAIL;
>
> -	if (READ_ONCE(netvsc_dev->vf_inject)) {
> -		atomic_inc(&netvsc_dev->vf_use_cnt);
> -		if (!READ_ONCE(netvsc_dev->vf_inject)) {
> -			/*
> -			 * We raced; just move on.
> -			 */
> -			atomic_dec(&netvsc_dev->vf_use_cnt);
> -			goto vf_injection_done;
> -		}
> +	vf_netdev = rcu_dereference(netvsc_dev->vf_netdev);
> +	if (vf_netdev) {
> +		/* Inject this packet into the VF interface.  On
> +		 * Hyper-V, multicast and broadcast packets are only
> +		 * delivered on the synthetic interface (after
> +		 * subjecting these to policy filters on the
> +		 * host). Deliver these via the VF interface in the
> +		 * guest if up, otherwise drop.
> +		 */
> +		if (!netif_running(vf_netdev))
> +			goto drop;

Why drop? In case VF is not running I guess it would be better to
receive the packet through netvsc interface.

>
> -		/*
> -		 * Inject this packet into the VF inerface.
> -		 * On Hyper-V, multicast and brodcast packets
> -		 * are only delivered on the synthetic interface
> -		 * (after subjecting these to policy filters on
> -		 * the host). Deliver these via the VF interface
> -		 * in the guest.
> +		/* Account for this on the synthetic interface
> +		 * otherwise likely to be not accounted for since
> +		 * device statistics on the VF are driver dependent.
>  		 */
> -		vf_skb = netvsc_alloc_recv_skb(netvsc_dev->vf_netdev, packet,
> -					       csum_info, *data, vlan_tci);
> -		if (vf_skb != NULL) {
> -			++netvsc_dev->vf_netdev->stats.rx_packets;
> -			netvsc_dev->vf_netdev->stats.rx_bytes += bytes_recvd;
> -			netif_receive_skb(vf_skb);
> -		} else {
> -			++net->stats.rx_dropped;
> -			ret = NVSP_STAT_FAIL;
> -		}
> -		atomic_dec(&netvsc_dev->vf_use_cnt);
> -		return ret;
> +		++net->stats.multicast;

And if the packet is broadcast and not multicast?

> +		net = vf_netdev;
>  	}
>
> -vf_injection_done:
> -	rx_stats = this_cpu_ptr(net_device_ctx->rx_stats);
> -
>  	/* Allocate a skb - TODO direct I/O to pages? */
>  	skb = netvsc_alloc_recv_skb(net, packet, csum_info, *data, vlan_tci);
>  	if (unlikely(!skb)) {
> +drop:
>  		++net->stats.rx_dropped;
>  		return NVSP_STAT_FAIL;
>  	}
> -	skb_record_rx_queue(skb, channel->
> -			    offermsg.offer.sub_channel_index);
> +
> +	if (likely(!vf_netdev))
> +		skb_record_rx_queue(skb,
> +				    channel->offermsg.offer.sub_channel_index);
>
>  	u64_stats_update_begin(&rx_stats->syncp);
>  	rx_stats->packets++;
> @@ -989,6 +975,7 @@ static struct rtnl_link_stats64 *netvsc_
>
>  	t->rx_dropped	= net->stats.rx_dropped;
>  	t->rx_errors	= net->stats.rx_errors;
> +	t->multicast 	= net->stats.multicast;
>
>  	return t;
>  }
> @@ -1170,8 +1157,7 @@ static void netvsc_notify_peers(struct w
>  	gwrk = container_of(wrk, struct garp_wrk, dwrk);
>
>  	netdev_notify_peers(gwrk->netdev);
> -
> -	atomic_dec(&gwrk->netvsc_dev->vf_use_cnt);
> +	dev_put(gwrk->netdev);
>  }
>
>  static struct net_device *get_netvsc_net_device(char *mac)
> @@ -1222,7 +1208,7 @@ static int netvsc_register_vf(struct net
>  	netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);
>
>  	dev_hold(vf_netdev);
> -	netvsc_dev->vf_netdev = vf_netdev;
> +	rcu_assign_pointer(netvsc_dev->vf_netdev, vf_netdev);
>  	return NOTIFY_OK;
>  }
>
> @@ -1248,7 +1234,6 @@ static int netvsc_vf_up(struct net_devic
>  		return NOTIFY_DONE;
>
>  	netdev_info(ndev, "VF up: %s\n", vf_netdev->name);
> -	netvsc_dev->vf_inject = true;
>
>  	/*
>  	 * Open the device before switching data path.
> @@ -1268,7 +1253,7 @@ static int netvsc_vf_up(struct net_devic
>  	 * notify peers; take a reference to prevent
>  	 * the VF interface from vanishing.
>  	 */
> -	atomic_inc(&netvsc_dev->vf_use_cnt);
> +	dev_hold(vf_netdev);

PATCH net 4/4 of my series drops gwrk.dwrk completely so depending on
the patch order this may not be needed...

>  	net_device_ctx->gwrk.netdev = vf_netdev;
>  	net_device_ctx->gwrk.netvsc_dev = netvsc_dev;
>  	schedule_work(&net_device_ctx->gwrk.dwrk);
> @@ -1298,14 +1283,7 @@ static int netvsc_vf_down(struct net_dev
>  		return NOTIFY_DONE;
>
>  	netdev_info(ndev, "VF down: %s\n", vf_netdev->name);
> -	netvsc_dev->vf_inject = false;
> -	/*
> -	 * Wait for currently active users to
> -	 * drain out.
> -	 */
>
> -	while (atomic_read(&netvsc_dev->vf_use_cnt) != 0)
> -		udelay(50);
>  	netvsc_switch_datapath(ndev, false);
>  	netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name);
>  	rndis_filter_close(netvsc_dev);
> @@ -1313,7 +1291,7 @@ static int netvsc_vf_down(struct net_dev
>  	/*
>  	 * Notify peers.
>  	 */
> -	atomic_inc(&netvsc_dev->vf_use_cnt);
> +	dev_hold(ndev);
>  	net_device_ctx->gwrk.netdev = ndev;
>  	net_device_ctx->gwrk.netvsc_dev = netvsc_dev;
>  	schedule_work(&net_device_ctx->gwrk.dwrk);
> @@ -1342,7 +1320,7 @@ static int netvsc_unregister_vf(struct n
>  		return NOTIFY_DONE;
>  	netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
>
> -	netvsc_dev->vf_netdev = NULL;
> +	RCU_INIT_POINTER(netvsc_dev->vf_netdev, NULL);
>  	dev_put(vf_netdev);
>  	return NOTIFY_OK;
>  }

I'd also suggest you split your patch into two - switch to using RCU and
stats changes as these changes are more or less independent.

-- 
  Vitaly

^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2016-08-15 10:06 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-08-11 10:58 [PATCH net 0/4] hv_netvsc: fixes for VF removal path Vitaly Kuznetsov
2016-08-11 10:58 ` Vitaly Kuznetsov
2016-08-11 10:58 ` [PATCH net 1/4] hv_netvsc: don't lose VF information Vitaly Kuznetsov
2016-08-11 10:58   ` Vitaly Kuznetsov
2016-08-11 15:38   ` Haiyang Zhang
2016-08-11 15:38     ` Haiyang Zhang
2016-08-11 10:58 ` [PATCH net 2/4] hv_netvsc: reset vf_inject on VF removal Vitaly Kuznetsov
2016-08-11 11:46   ` Yuval Mintz
2016-08-11 12:09     ` Vitaly Kuznetsov
2016-08-11 12:09       ` Vitaly Kuznetsov
2016-08-12 14:47       ` Stephen Hemminger
2016-08-12 14:47         ` Stephen Hemminger
2016-08-11 15:38   ` Haiyang Zhang
2016-08-11 15:38     ` Haiyang Zhang
2016-08-13  3:40   ` David Miller
2016-08-13  3:40     ` David Miller
2016-08-13 18:35   ` [RFC 1/2] netvsc: reference counting fix Stephen Hemminger
2016-08-13 18:35     ` Stephen Hemminger
2016-08-13 18:38     ` [RFC 2/2] netvsc: use RCU for VF net device reference Stephen Hemminger
2016-08-13 18:38       ` Stephen Hemminger
2016-08-15 10:06       ` Vitaly Kuznetsov
2016-08-15  4:31     ` [RFC 1/2] netvsc: reference counting fix David Miller
2016-08-15  4:31       ` David Miller
2016-08-11 10:58 ` [PATCH net 3/4] hv_netvsc: protect module refcount by checking net_device_ctx->vf_netdev Vitaly Kuznetsov
2016-08-11 10:58   ` Vitaly Kuznetsov
2016-08-11 15:38   ` Haiyang Zhang
2016-08-11 15:38     ` Haiyang Zhang
2016-08-11 10:58 ` [PATCH net 4/4] hv_netvsc: avoid deadlocks between rtnl lock and netvsc_inject_disable() Vitaly Kuznetsov
2016-08-11 10:58   ` Vitaly Kuznetsov
2016-08-11 15:38   ` Haiyang Zhang
2016-08-11 15:38     ` Haiyang Zhang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.