linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next v2 0/2] hv_netvsc: fix a hang on channel/mtu changes
@ 2017-11-02 10:35 Vitaly Kuznetsov
  2017-11-02 10:35 ` [PATCH net-next v2 1/2] hv_netvsc: netvsc_teardown_gpadl() split Vitaly Kuznetsov
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Vitaly Kuznetsov @ 2017-11-02 10:35 UTC (permalink / raw)
  To: netdev
  Cc: linux-kernel, devel, K. Y. Srinivasan, Haiyang Zhang,
	Stephen Hemminger, Eric Dumazet

It was found that netvsc driver doesn't survive e.g.

# while true; do ethtool -L eth0 combined 4; ethtool -L eth0 combined 8; done"

test. I was able to identify a hang in guest/host communication, it is
fixed by PATCH1 of this series. PATCH2 is a cosmetic change masking
unneeded messages.

Changes since v1:
- Throw away patches 2 and 3 of the original series as one is unneeded and
  the other is not justified [Eric Dumazet, Stephen Hemminger] so I'm only
  fixing the hang now, the crash doesn't reproduce. Will keep an eye on it.

Vitaly Kuznetsov (2):
  hv_netvsc: netvsc_teardown_gpadl() split
  hv_netvsc: hide warnings about uninitialized/missing rndis device

 drivers/net/hyperv/netvsc.c       | 69 ++++++++++++++++++++-------------------
 drivers/net/hyperv/rndis_filter.c |  4 +--
 2 files changed, 38 insertions(+), 35 deletions(-)

-- 
2.13.6

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH net-next v2 1/2] hv_netvsc: netvsc_teardown_gpadl() split
  2017-11-02 10:35 [PATCH net-next v2 0/2] hv_netvsc: fix a hang on channel/mtu changes Vitaly Kuznetsov
@ 2017-11-02 10:35 ` Vitaly Kuznetsov
  2017-11-02 10:35 ` [PATCH net-next v2 2/2] hv_netvsc: hide warnings about uninitialized/missing rndis device Vitaly Kuznetsov
  2017-11-08  1:31 ` [PATCH net-next v2 0/2] hv_netvsc: fix a hang on channel/mtu changes David Miller
  2 siblings, 0 replies; 9+ messages in thread
From: Vitaly Kuznetsov @ 2017-11-02 10:35 UTC (permalink / raw)
  To: netdev
  Cc: linux-kernel, devel, K. Y. Srinivasan, Haiyang Zhang,
	Stephen Hemminger, Eric Dumazet

It was found that in some cases host refuses to teardown GPADL for send/
receive buffers (probably when some work with these buffere is scheduled or
ongoing). Change the teardown logic to be:
1) Send NVSP_MSG1_TYPE_REVOKE_* messages
2) Close the channel
3) Teardown GPADLs.
This seems to work reliably.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 drivers/net/hyperv/netvsc.c | 69 +++++++++++++++++++++++----------------------
 1 file changed, 36 insertions(+), 33 deletions(-)

diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 5bb6a20072dd..bfc79698b8f4 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -100,12 +100,11 @@ static void free_netvsc_device_rcu(struct netvsc_device *nvdev)
 	call_rcu(&nvdev->rcu, free_netvsc_device);
 }
 
-static void netvsc_destroy_buf(struct hv_device *device)
+static void netvsc_revoke_buf(struct hv_device *device,
+			      struct netvsc_device *net_device)
 {
 	struct nvsp_message *revoke_packet;
 	struct net_device *ndev = hv_get_drvdata(device);
-	struct net_device_context *ndc = netdev_priv(ndev);
-	struct netvsc_device *net_device = rtnl_dereference(ndc->nvdev);
 	int ret;
 
 	/*
@@ -148,28 +147,6 @@ static void netvsc_destroy_buf(struct hv_device *device)
 		net_device->recv_section_cnt = 0;
 	}
 
-	/* Teardown the gpadl on the vsp end */
-	if (net_device->recv_buf_gpadl_handle) {
-		ret = vmbus_teardown_gpadl(device->channel,
-					   net_device->recv_buf_gpadl_handle);
-
-		/* If we failed here, we might as well return and have a leak
-		 * rather than continue and a bugchk
-		 */
-		if (ret != 0) {
-			netdev_err(ndev,
-				   "unable to teardown receive buffer's gpadl\n");
-			return;
-		}
-		net_device->recv_buf_gpadl_handle = 0;
-	}
-
-	if (net_device->recv_buf) {
-		/* Free up the receive buffer */
-		vfree(net_device->recv_buf);
-		net_device->recv_buf = NULL;
-	}
-
 	/* Deal with the send buffer we may have setup.
 	 * If we got a  send section size, it means we received a
 	 * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent
@@ -210,7 +187,35 @@ static void netvsc_destroy_buf(struct hv_device *device)
 		}
 		net_device->send_section_cnt = 0;
 	}
-	/* Teardown the gpadl on the vsp end */
+}
+
+static void netvsc_teardown_gpadl(struct hv_device *device,
+				  struct netvsc_device *net_device)
+{
+	struct net_device *ndev = hv_get_drvdata(device);
+	int ret;
+
+	if (net_device->recv_buf_gpadl_handle) {
+		ret = vmbus_teardown_gpadl(device->channel,
+					   net_device->recv_buf_gpadl_handle);
+
+		/* If we failed here, we might as well return and have a leak
+		 * rather than continue and a bugchk
+		 */
+		if (ret != 0) {
+			netdev_err(ndev,
+				   "unable to teardown receive buffer's gpadl\n");
+			return;
+		}
+		net_device->recv_buf_gpadl_handle = 0;
+	}
+
+	if (net_device->recv_buf) {
+		/* Free up the receive buffer */
+		vfree(net_device->recv_buf);
+		net_device->recv_buf = NULL;
+	}
+
 	if (net_device->send_buf_gpadl_handle) {
 		ret = vmbus_teardown_gpadl(device->channel,
 					   net_device->send_buf_gpadl_handle);
@@ -420,7 +425,8 @@ static int netvsc_init_buf(struct hv_device *device,
 	goto exit;
 
 cleanup:
-	netvsc_destroy_buf(device);
+	netvsc_revoke_buf(device, net_device);
+	netvsc_teardown_gpadl(device, net_device);
 
 exit:
 	return ret;
@@ -539,11 +545,6 @@ static int netvsc_connect_vsp(struct hv_device *device,
 	return ret;
 }
 
-static void netvsc_disconnect_vsp(struct hv_device *device)
-{
-	netvsc_destroy_buf(device);
-}
-
 /*
  * netvsc_device_remove - Callback when the root bus device is removed
  */
@@ -557,7 +558,7 @@ void netvsc_device_remove(struct hv_device *device)
 
 	cancel_work_sync(&net_device->subchan_work);
 
-	netvsc_disconnect_vsp(device);
+	netvsc_revoke_buf(device, net_device);
 
 	RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
 
@@ -570,6 +571,8 @@ void netvsc_device_remove(struct hv_device *device)
 	/* Now, we can close the channel safely */
 	vmbus_close(device->channel);
 
+	netvsc_teardown_gpadl(device, net_device);
+
 	/* And dissassociate NAPI context from device */
 	for (i = 0; i < net_device->num_chn; i++)
 		netif_napi_del(&net_device->chan_table[i].napi);
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH net-next v2 2/2] hv_netvsc: hide warnings about uninitialized/missing rndis device
  2017-11-02 10:35 [PATCH net-next v2 0/2] hv_netvsc: fix a hang on channel/mtu changes Vitaly Kuznetsov
  2017-11-02 10:35 ` [PATCH net-next v2 1/2] hv_netvsc: netvsc_teardown_gpadl() split Vitaly Kuznetsov
@ 2017-11-02 10:35 ` Vitaly Kuznetsov
       [not found]   ` <CAOaVG14eqAq8irLbgR9b-JA--aiTov=VWZpTi2maKQ=nbo_ByQ@mail.gmail.com>
  2017-11-08  1:31 ` [PATCH net-next v2 0/2] hv_netvsc: fix a hang on channel/mtu changes David Miller
  2 siblings, 1 reply; 9+ messages in thread
From: Vitaly Kuznetsov @ 2017-11-02 10:35 UTC (permalink / raw)
  To: netdev
  Cc: linux-kernel, devel, K. Y. Srinivasan, Haiyang Zhang,
	Stephen Hemminger, Eric Dumazet

Hyper-V hosts are known to send RNDIS messages even after we halt the
device in rndis_filter_halt_device(). Remove user visible messages
as they are not really useful.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 drivers/net/hyperv/rndis_filter.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 0648eebda829..8b1242b8d8ef 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -407,13 +407,13 @@ int rndis_filter_receive(struct net_device *ndev,
 
 	/* Make sure the rndis device state is initialized */
 	if (unlikely(!rndis_dev)) {
-		netif_err(net_device_ctx, rx_err, ndev,
+		netif_dbg(net_device_ctx, rx_err, ndev,
 			  "got rndis message but no rndis device!\n");
 		return NVSP_STAT_FAIL;
 	}
 
 	if (unlikely(rndis_dev->state == RNDIS_DEV_UNINITIALIZED)) {
-		netif_err(net_device_ctx, rx_err, ndev,
+		netif_dbg(net_device_ctx, rx_err, ndev,
 			  "got rndis message uninitialized\n");
 		return NVSP_STAT_FAIL;
 	}
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next v2 0/2] hv_netvsc: fix a hang on channel/mtu changes
  2017-11-02 10:35 [PATCH net-next v2 0/2] hv_netvsc: fix a hang on channel/mtu changes Vitaly Kuznetsov
  2017-11-02 10:35 ` [PATCH net-next v2 1/2] hv_netvsc: netvsc_teardown_gpadl() split Vitaly Kuznetsov
  2017-11-02 10:35 ` [PATCH net-next v2 2/2] hv_netvsc: hide warnings about uninitialized/missing rndis device Vitaly Kuznetsov
@ 2017-11-08  1:31 ` David Miller
  2017-11-10  3:53   ` [RFC] hv_netvsc: safer orderly shutdown Stephen Hemminger
  2 siblings, 1 reply; 9+ messages in thread
From: David Miller @ 2017-11-08  1:31 UTC (permalink / raw)
  To: vkuznets
  Cc: netdev, linux-kernel, devel, kys, haiyangz, sthemmin, eric.dumazet

From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu,  2 Nov 2017 11:35:29 +0100

> It was found that netvsc driver doesn't survive e.g.
> 
> # while true; do ethtool -L eth0 combined 4; ethtool -L eth0 combined 8; done"
> 
> test. I was able to identify a hang in guest/host communication, it is
> fixed by PATCH1 of this series. PATCH2 is a cosmetic change masking
> unneeded messages.
> 
> Changes since v1:
> - Throw away patches 2 and 3 of the original series as one is unneeded and
>   the other is not justified [Eric Dumazet, Stephen Hemminger] so I'm only
>   fixing the hang now, the crash doesn't reproduce. Will keep an eye on it.

Series applied to net-next, thank you.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next v2 2/2] hv_netvsc: hide warnings about uninitialized/missing rndis device
       [not found]       ` <CAOaVG17ctoE53UkBG4O1=962K4qOzAjteEeG6uiNDgCUGq4jYQ@mail.gmail.com>
@ 2017-11-08  9:36         ` Vitaly Kuznetsov
  0 siblings, 0 replies; 9+ messages in thread
From: Vitaly Kuznetsov @ 2017-11-08  9:36 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: netdev, Stephen Hemminger, Eric Dumazet, Haiyang Zhang,
	linux-kernel, devel

Stephen Hemminger <stephen@networkplumber.org> writes:

> On Nov 2, 2017 19:35, "Vitaly Kuznetsov" <vkuznets@redhat.com> wrote:
>
>  Hyper-V hosts are known to send RNDIS messages even after we halt the
>  device in rndis_filter_halt_device(). Remove user visible messages
>  as they are not really useful.
>
>  Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
>  ---
>  drivers/net/hyperv/rndis_filter.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
>  diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
>  index 0648eebda829..8b1242b8d8ef 100644
>  --- a/drivers/net/hyperv/rndis_filter.c
>  +++ b/drivers/net/hyperv/rndis_filter.c
>  @@ -407,13 +407,13 @@ int rndis_filter_receive(struct net_device *ndev,
>
>  /* Make sure the rndis device state is initialized */
>  if (unlikely(!rndis_dev)) {
>  - netif_err(net_device_ctx, rx_err, ndev,
>  + netif_dbg(net_device_ctx, rx_err, ndev,
>  "got rndis message but no rndis device!\n");
>  return NVSP_STAT_FAIL;
>  }
>
>  if (unlikely(rndis_dev->state == RNDIS_DEV_UNINITIALIZED)) {
>  - netif_err(net_device_ctx, rx_err, ndev,
>  + netif_dbg(net_device_ctx, rx_err, ndev,
>  "got rndis message uninitialized\n");
>  return NVSP_STAT_FAIL;
>  }
>  --
>  2.13.6
>
>  _______________________________________________
>  devel mailing list
>  devel@linuxdriverproject.org
>  http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
>
> This should never happen since host removal should be setting device
> down which disables NAPI. If this is not working correctly, that needs
> to be fixed (rather than silencing the message).

This happens in between we halt RNDIS device in
rndis_filter_halt_device() and NAPI shutdown from netvsc_device_remove()
while the window is still open.

>
> Don't shoot the messenger

These messages are of no use for a random user: you change MTU on your
device and see 'got rndis message uninitialized' - what are you supposed
to do? I leave them at debugging level for us to actually debug.

-- 
  Vitaly

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [RFC] hv_netvsc: safer orderly shutdown
  2017-11-08  1:31 ` [PATCH net-next v2 0/2] hv_netvsc: fix a hang on channel/mtu changes David Miller
@ 2017-11-10  3:53   ` Stephen Hemminger
  2017-11-10 10:42     ` Vitaly Kuznetsov
  0 siblings, 1 reply; 9+ messages in thread
From: Stephen Hemminger @ 2017-11-10  3:53 UTC (permalink / raw)
  To: David Miller
  Cc: vkuznets, sthemmin, eric.dumazet, netdev, haiyangz, linux-kernel, devel


Several types of control operations require that the underlying RNDIS
infrastructure be restarted. This patch changes the ordering of the
shutdown to avoid race conditions.
Stop all transmits before doing RNDIS halt. This involves stopping the
network device transmit queues, then waiting for all outstanding
sends before informing host to halt.

Also, check for successful restart of the device when after the
change is done.

For review, not tested on Hyper-V yet.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
 drivers/net/hyperv/netvsc_drv.c   | 40 ++++++++++++++++++++++++++++++---------
 drivers/net/hyperv/rndis_filter.c | 23 +++++++++++-----------
 2 files changed, 42 insertions(+), 21 deletions(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index da216ca4f2b2..3afa082e093d 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -855,8 +855,10 @@ static int netvsc_set_channels(struct net_device *net,
 
 	orig = nvdev->num_chn;
 	was_opened = rndis_filter_opened(nvdev);
-	if (was_opened)
+	if (was_opened) {
+		netif_tx_disable(net);
 		rndis_filter_close(nvdev);
+	}
 
 	memset(&device_info, 0, sizeof(device_info));
 	device_info.num_chn = count;
@@ -881,8 +883,13 @@ static int netvsc_set_channels(struct net_device *net,
 		}
 	}
 
-	if (was_opened)
-		rndis_filter_open(nvdev);
+	if (was_opened) {
+		ret = rndis_filter_open(nvdev);
+		if (ret)
+			netdev_err(net, "reopening device failed: %d\n", ret);
+		else
+			netif_tx_start_all_queues(net);
+	}
 
 	/* We may have missed link change notifications */
 	net_device_ctx->last_reconfig = 0;
@@ -971,8 +978,10 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
 
 	netif_device_detach(ndev);
 	was_opened = rndis_filter_opened(nvdev);
-	if (was_opened)
+	if (was_opened) {
+		netif_tx_disable(net);
 		rndis_filter_close(nvdev);
+	}
 
 	memset(&device_info, 0, sizeof(device_info));
 	device_info.ring_size = ring_size;
@@ -1004,8 +1013,13 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
 		}
 	}
 
-	if (was_opened)
-		rndis_filter_open(nvdev);
+	if (was_opened) {
+		ret = rndis_filter_open(nvdev);
+		if (ret)
+			netdev_err(net, "reopening device failed: %d\n", ret);
+		else
+			netif_tx_start_all_queues(net);
+	}
 
 	netif_device_attach(ndev);
 
@@ -1547,8 +1561,10 @@ static int netvsc_set_ringparam(struct net_device *ndev,
 
 	netif_device_detach(ndev);
 	was_opened = rndis_filter_opened(nvdev);
-	if (was_opened)
+	if (was_opened) {
+		netif_tx_disable(net);
 		rndis_filter_close(nvdev);
+	}
 
 	rndis_filter_device_remove(hdev, nvdev);
 
@@ -1566,8 +1582,14 @@ static int netvsc_set_ringparam(struct net_device *ndev,
 		}
 	}
 
-	if (was_opened)
-		rndis_filter_open(nvdev);
+	if (was_opened) {
+		ret = rndis_filter_open(nvdev);
+		if (ret)
+			netdev_err(net, "reopening device failed: %d\n", ret);
+		else
+			netif_tx_start_all_queues(net);
+	}
+
 	netif_device_attach(ndev);
 
 	/* We may have missed link change notifications */
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 0648eebda829..164f5ffe9c50 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -948,11 +948,20 @@ static void rndis_filter_halt_device(struct rndis_device *dev)
 	struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
 	struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
 
+	/* tell bottom half that deice is being closed */
+	nvdev->destroy = true;
+
+	/* Force flag to be ordered before waiting */
+	wmb();
+
+	/* Wait for all send completions */
+	wait_event(nvdev->wait_drain, netvsc_device_idle(nvdev));
+
 	/* Attempt to do a rndis device halt */
 	request = get_rndis_request(dev, RNDIS_MSG_HALT,
 				RNDIS_MESSAGE_SIZE(struct rndis_halt_request));
 	if (!request)
-		goto cleanup;
+		return;
 
 	/* Setup the rndis set */
 	halt = &request->request_msg.msg.halt_req;
@@ -963,17 +972,7 @@ static void rndis_filter_halt_device(struct rndis_device *dev)
 
 	dev->state = RNDIS_DEV_UNINITIALIZED;
 
-cleanup:
-	nvdev->destroy = true;
-
-	/* Force flag to be ordered before waiting */
-	wmb();
-
-	/* Wait for all send completions */
-	wait_event(nvdev->wait_drain, netvsc_device_idle(nvdev));
-
-	if (request)
-		put_rndis_request(dev, request);
+	put_rndis_request(dev, request);
 }
 
 static int rndis_filter_open_device(struct rndis_device *dev)
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [RFC] hv_netvsc: safer orderly shutdown
  2017-11-10  3:53   ` [RFC] hv_netvsc: safer orderly shutdown Stephen Hemminger
@ 2017-11-10 10:42     ` Vitaly Kuznetsov
       [not found]       ` <CAOaVG15dBsPcpi1buaKyW5oRGmGUczgaO54De6eYXUqp-1z+gw@mail.gmail.com>
  0 siblings, 1 reply; 9+ messages in thread
From: Vitaly Kuznetsov @ 2017-11-10 10:42 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: David Miller, sthemmin, eric.dumazet, netdev, haiyangz,
	linux-kernel, devel

Stephen Hemminger <stephen@networkplumber.org> writes:

> Several types of control operations require that the underlying RNDIS
> infrastructure be restarted. This patch changes the ordering of the
> shutdown to avoid race conditions.
> Stop all transmits before doing RNDIS halt. This involves stopping the
> network device transmit queues, then waiting for all outstanding
> sends before informing host to halt.
>
> Also, check for successful restart of the device when after the
> change is done.
>
> For review, not tested on Hyper-V yet.
>
> Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
> ---
>  drivers/net/hyperv/netvsc_drv.c   | 40 ++++++++++++++++++++++++++++++---------
>  drivers/net/hyperv/rndis_filter.c | 23 +++++++++++-----------
>  2 files changed, 42 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
> index da216ca4f2b2..3afa082e093d 100644
> --- a/drivers/net/hyperv/netvsc_drv.c
> +++ b/drivers/net/hyperv/netvsc_drv.c
> @@ -855,8 +855,10 @@ static int netvsc_set_channels(struct net_device *net,
>
>  	orig = nvdev->num_chn;
>  	was_opened = rndis_filter_opened(nvdev);
> -	if (was_opened)
> +	if (was_opened) {
> +		netif_tx_disable(net);
>  		rndis_filter_close(nvdev);
> +	}

I was also experimenting with this and I think it may also make sense to
add napi_disable() for all queues here.

It also seems that the usual TX disable pattern is 

    netif_tx_stop_all_queues(net);
    netif_tx_disable(net);

not sure why..

>
>  	memset(&device_info, 0, sizeof(device_info));
>  	device_info.num_chn = count;
> @@ -881,8 +883,13 @@ static int netvsc_set_channels(struct net_device *net,
>  		}
>  	}
>
> -	if (was_opened)
> -		rndis_filter_open(nvdev);
> +	if (was_opened) {
> +		ret = rndis_filter_open(nvdev);
> +		if (ret)
> +			netdev_err(net, "reopening device failed: %d\n", ret);
> +		else
> +			netif_tx_start_all_queues(net);
> +	}
>
>  	/* We may have missed link change notifications */
>  	net_device_ctx->last_reconfig = 0;
> @@ -971,8 +978,10 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
>
>  	netif_device_detach(ndev);
>  	was_opened = rndis_filter_opened(nvdev);
> -	if (was_opened)
> +	if (was_opened) {
> +		netif_tx_disable(net);
>  		rndis_filter_close(nvdev);
> +	}

Shall we just nove netif_tx_disable() & friends to rndis_filter_close()?

>
>  	memset(&device_info, 0, sizeof(device_info));
>  	device_info.ring_size = ring_size;
> @@ -1004,8 +1013,13 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
>  		}
>  	}
>
> -	if (was_opened)
> -		rndis_filter_open(nvdev);
> +	if (was_opened) {
> +		ret = rndis_filter_open(nvdev);
> +		if (ret)
> +			netdev_err(net, "reopening device failed: %d\n", ret);
> +		else
> +			netif_tx_start_all_queues(net);
> +	}

Yea, the main problem here is that we can't do much if we fail, the
device will be completely unusable. That's not something users exepct
doing MTU change...

>
>  	netif_device_attach(ndev);
>
> @@ -1547,8 +1561,10 @@ static int netvsc_set_ringparam(struct net_device *ndev,
>
>  	netif_device_detach(ndev);
>  	was_opened = rndis_filter_opened(nvdev);
> -	if (was_opened)
> +	if (was_opened) {
> +		netif_tx_disable(net);
>  		rndis_filter_close(nvdev);
> +	}
>
>  	rndis_filter_device_remove(hdev, nvdev);
>
> @@ -1566,8 +1582,14 @@ static int netvsc_set_ringparam(struct net_device *ndev,
>  		}
>  	}
>
> -	if (was_opened)
> -		rndis_filter_open(nvdev);
> +	if (was_opened) {
> +		ret = rndis_filter_open(nvdev);
> +		if (ret)
> +			netdev_err(net, "reopening device failed: %d\n", ret);
> +		else
> +			netif_tx_start_all_queues(net);
> +	}
> +
>  	netif_device_attach(ndev);
>
>  	/* We may have missed link change notifications */
> diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
> index 0648eebda829..164f5ffe9c50 100644
> --- a/drivers/net/hyperv/rndis_filter.c
> +++ b/drivers/net/hyperv/rndis_filter.c
> @@ -948,11 +948,20 @@ static void rndis_filter_halt_device(struct rndis_device *dev)
>  	struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
>  	struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
>
> +	/* tell bottom half that deice is being closed */
> +	nvdev->destroy = true;
> +
> +	/* Force flag to be ordered before waiting */
> +	wmb();
> +
> +	/* Wait for all send completions */
> +	wait_event(nvdev->wait_drain, netvsc_device_idle(nvdev));
> +
>  	/* Attempt to do a rndis device halt */
>  	request = get_rndis_request(dev, RNDIS_MSG_HALT,
>  				RNDIS_MESSAGE_SIZE(struct rndis_halt_request));
>  	if (!request)
> -		goto cleanup;
> +		return;
>
>  	/* Setup the rndis set */
>  	halt = &request->request_msg.msg.halt_req;
> @@ -963,17 +972,7 @@ static void rndis_filter_halt_device(struct rndis_device *dev)
>
>  	dev->state = RNDIS_DEV_UNINITIALIZED;
>
> -cleanup:
> -	nvdev->destroy = true;
> -
> -	/* Force flag to be ordered before waiting */
> -	wmb();
> -
> -	/* Wait for all send completions */
> -	wait_event(nvdev->wait_drain, netvsc_device_idle(nvdev));
> -
> -	if (request)
> -		put_rndis_request(dev, request);
> +	put_rndis_request(dev, request);
>  }
>
>  static int rndis_filter_open_device(struct rndis_device *dev)

I'll give this patch a shot to see if anything blows up. Thanks!

-- 
  Vitaly

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC] hv_netvsc: safer orderly shutdown
       [not found]       ` <CAOaVG15dBsPcpi1buaKyW5oRGmGUczgaO54De6eYXUqp-1z+gw@mail.gmail.com>
@ 2017-11-13 10:57         ` Vitaly Kuznetsov
  2017-11-13 17:22           ` Stephen Hemminger
  0 siblings, 1 reply; 9+ messages in thread
From: Vitaly Kuznetsov @ 2017-11-13 10:57 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: David Miller, Stephen Hemminger, eric.dumazet, netdev, haiyangz,
	linux-kernel, devel

Stephen Hemminger <stephen@networkplumber.org> writes:

>
> The NAPI disable is already handled by rndis close.

Sorry, but I'm probably missing something: I can only see
netif_napi_del() call in netvsc_device_remove() but this happens much
later. And I don see us doing napi_disable() anywhere on the path.
But I'm probably missing something.

-- 
  Vitaly

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC] hv_netvsc: safer orderly shutdown
  2017-11-13 10:57         ` Vitaly Kuznetsov
@ 2017-11-13 17:22           ` Stephen Hemminger
  0 siblings, 0 replies; 9+ messages in thread
From: Stephen Hemminger @ 2017-11-13 17:22 UTC (permalink / raw)
  To: Vitaly Kuznetsov
  Cc: David Miller, Stephen Hemminger, eric.dumazet, netdev, haiyangz,
	linux-kernel, devel

On Mon, 13 Nov 2017 11:57:47 +0100
Vitaly Kuznetsov <vkuznets@redhat.com> wrote:

> Stephen Hemminger <stephen@networkplumber.org> writes:
> 
> >
> > The NAPI disable is already handled by rndis close.  
> 
> Sorry, but I'm probably missing something: I can only see
> netif_napi_del() call in netvsc_device_remove() but this happens much
> later. And I don see us doing napi_disable() anywhere on the path.
> But I'm probably missing something.
> 

You need to keep NAPI running to handle transmit completions.
Disabling the Tx and Rx filter should keep spurious activity
away until the halt is done.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2017-11-13 17:22 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-02 10:35 [PATCH net-next v2 0/2] hv_netvsc: fix a hang on channel/mtu changes Vitaly Kuznetsov
2017-11-02 10:35 ` [PATCH net-next v2 1/2] hv_netvsc: netvsc_teardown_gpadl() split Vitaly Kuznetsov
2017-11-02 10:35 ` [PATCH net-next v2 2/2] hv_netvsc: hide warnings about uninitialized/missing rndis device Vitaly Kuznetsov
     [not found]   ` <CAOaVG14eqAq8irLbgR9b-JA--aiTov=VWZpTi2maKQ=nbo_ByQ@mail.gmail.com>
     [not found]     ` <CAOaVG16ekV3rP4dA3GgnAcodPuALXd4W5eTtjNr_Pwtn3JVkxQ@mail.gmail.com>
     [not found]       ` <CAOaVG17ctoE53UkBG4O1=962K4qOzAjteEeG6uiNDgCUGq4jYQ@mail.gmail.com>
2017-11-08  9:36         ` Vitaly Kuznetsov
2017-11-08  1:31 ` [PATCH net-next v2 0/2] hv_netvsc: fix a hang on channel/mtu changes David Miller
2017-11-10  3:53   ` [RFC] hv_netvsc: safer orderly shutdown Stephen Hemminger
2017-11-10 10:42     ` Vitaly Kuznetsov
     [not found]       ` <CAOaVG15dBsPcpi1buaKyW5oRGmGUczgaO54De6eYXUqp-1z+gw@mail.gmail.com>
2017-11-13 10:57         ` Vitaly Kuznetsov
2017-11-13 17:22           ` Stephen Hemminger

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).