All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
@ 2018-05-23 21:12 ` Dexuan Cui
  0 siblings, 0 replies; 31+ messages in thread
From: Dexuan Cui @ 2018-05-23 21:12 UTC (permalink / raw)
  To: 'Lorenzo Pieralisi', 'Bjorn Helgaas',
	'linux-pci@vger.kernel.org',
	KY Srinivasan, Stephen Hemminger, 'olaf@aepfle.de',
	'apw@canonical.com', 'jasowang@redhat.com'
  Cc: 'marcelo.cerri@canonical.com',
	'vkuznets@redhat.com',
	Haiyang Zhang, 'driverdev-devel@linuxdriverproject.org',
	'linux-kernel@vger.kernel.org'


Before the guest finishes the device initialization, the device can be
removed anytime by the host, and after that the host won't respond to
the guest's request, so the guest should be prepared to handle this
case.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
---
 drivers/pci/host/pci-hyperv.c | 46 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 34 insertions(+), 12 deletions(-)

diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index ad6a64d..248765f 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -556,6 +556,26 @@ static void put_pcichild(struct hv_pci_dev *hv_pcidev,
 static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus);
 static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus);
 
+/*
+ * There is no good way to get notified from vmbus_onoffer_rescind(),
+ * so let's use polling here, since this is not a hot path.
+ */
+static int wait_for_response(struct hv_device *hdev,
+			     struct completion *comp)
+{
+	while (true) {
+		if (hdev->channel->rescind) {
+			dev_warn_once(&hdev->device, "The device is gone.\n");
+			return -ENODEV;
+		}
+
+		if (wait_for_completion_timeout(comp, HZ / 10))
+			break;
+	}
+
+	return 0;
+}
+
 /**
  * devfn_to_wslot() - Convert from Linux PCI slot to Windows
  * @devfn:	The Linux representation of PCI slot
@@ -1569,7 +1589,8 @@ static struct hv_pci_dev *new_pcichild_device(struct hv_pcibus_device *hbus,
 	if (ret)
 		goto error;
 
-	wait_for_completion(&comp_pkt.host_event);
+	if (wait_for_response(hbus->hdev, &comp_pkt.host_event))
+		goto error;
 
 	hpdev->desc = *desc;
 	refcount_set(&hpdev->refs, 1);
@@ -2070,15 +2091,16 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev)
 				sizeof(struct pci_version_request),
 				(unsigned long)pkt, VM_PKT_DATA_INBAND,
 				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+		if (!ret)
+			ret = wait_for_response(hdev, &comp_pkt.host_event);
+
 		if (ret) {
 			dev_err(&hdev->device,
-				"PCI Pass-through VSP failed sending version reqquest: %#x",
+				"PCI Pass-through VSP failed to request version: %d",
 				ret);
 			goto exit;
 		}
 
-		wait_for_completion(&comp_pkt.host_event);
-
 		if (comp_pkt.completion_status >= 0) {
 			pci_protocol_version = pci_protocol_versions[i];
 			dev_info(&hdev->device,
@@ -2287,11 +2309,12 @@ static int hv_pci_enter_d0(struct hv_device *hdev)
 	ret = vmbus_sendpacket(hdev->channel, d0_entry, sizeof(*d0_entry),
 			       (unsigned long)pkt, VM_PKT_DATA_INBAND,
 			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+	if (!ret)
+		ret = wait_for_response(hdev, &comp_pkt.host_event);
+
 	if (ret)
 		goto exit;
 
-	wait_for_completion(&comp_pkt.host_event);
-
 	if (comp_pkt.completion_status < 0) {
 		dev_err(&hdev->device,
 			"PCI Pass-through VSP failed D0 Entry with status %x\n",
@@ -2331,11 +2354,10 @@ static int hv_pci_query_relations(struct hv_device *hdev)
 
 	ret = vmbus_sendpacket(hdev->channel, &message, sizeof(message),
 			       0, VM_PKT_DATA_INBAND, 0);
-	if (ret)
-		return ret;
+	if (!ret)
+		ret = wait_for_response(hdev, &comp);
 
-	wait_for_completion(&comp);
-	return 0;
+	return ret;
 }
 
 /**
@@ -2405,11 +2427,11 @@ static int hv_send_resources_allocated(struct hv_device *hdev)
 				size_res, (unsigned long)pkt,
 				VM_PKT_DATA_INBAND,
 				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+		if (!ret)
+			ret = wait_for_response(hdev, &comp_pkt.host_event);
 		if (ret)
 			break;
 
-		wait_for_completion(&comp_pkt.host_event);
-
 		if (comp_pkt.completion_status < 0) {
 			ret = -EPROTO;
 			dev_err(&hdev->device,
-- 
2.7.4

_______________________________________________
devel mailing list
devel@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
@ 2018-05-23 21:12 ` Dexuan Cui
  0 siblings, 0 replies; 31+ messages in thread
From: Dexuan Cui @ 2018-05-23 21:12 UTC (permalink / raw)
  To: 'Lorenzo Pieralisi', 'Bjorn Helgaas',
	'linux-pci@vger.kernel.org',
	KY Srinivasan, Stephen Hemminger, 'olaf@aepfle.de',
	'apw@canonical.com', 'jasowang@redhat.com'
  Cc: 'linux-kernel@vger.kernel.org',
	'driverdev-devel@linuxdriverproject.org',
	Haiyang Zhang, 'vkuznets@redhat.com',
	'marcelo.cerri@canonical.com'


Before the guest finishes the device initialization, the device can be
removed anytime by the host, and after that the host won't respond to
the guest's request, so the guest should be prepared to handle this
case.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
---
 drivers/pci/host/pci-hyperv.c | 46 ++++++++++++++++++++++++++++++++-------=
----
 1 file changed, 34 insertions(+), 12 deletions(-)

diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index ad6a64d..248765f 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -556,6 +556,26 @@ static void put_pcichild(struct hv_pci_dev *hv_pcidev,
 static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus);
 static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus);
=20
+/*
+ * There is no good way to get notified from vmbus_onoffer_rescind(),
+ * so let's use polling here, since this is not a hot path.
+ */
+static int wait_for_response(struct hv_device *hdev,
+			     struct completion *comp)
+{
+	while (true) {
+		if (hdev->channel->rescind) {
+			dev_warn_once(&hdev->device, "The device is gone.\n");
+			return -ENODEV;
+		}
+
+		if (wait_for_completion_timeout(comp, HZ / 10))
+			break;
+	}
+
+	return 0;
+}
+
 /**
  * devfn_to_wslot() - Convert from Linux PCI slot to Windows
  * @devfn:	The Linux representation of PCI slot
@@ -1569,7 +1589,8 @@ static struct hv_pci_dev *new_pcichild_device(struct =
hv_pcibus_device *hbus,
 	if (ret)
 		goto error;
=20
-	wait_for_completion(&comp_pkt.host_event);
+	if (wait_for_response(hbus->hdev, &comp_pkt.host_event))
+		goto error;
=20
 	hpdev->desc =3D *desc;
 	refcount_set(&hpdev->refs, 1);
@@ -2070,15 +2091,16 @@ static int hv_pci_protocol_negotiation(struct hv_de=
vice *hdev)
 				sizeof(struct pci_version_request),
 				(unsigned long)pkt, VM_PKT_DATA_INBAND,
 				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+		if (!ret)
+			ret =3D wait_for_response(hdev, &comp_pkt.host_event);
+
 		if (ret) {
 			dev_err(&hdev->device,
-				"PCI Pass-through VSP failed sending version reqquest: %#x",
+				"PCI Pass-through VSP failed to request version: %d",
 				ret);
 			goto exit;
 		}
=20
-		wait_for_completion(&comp_pkt.host_event);
-
 		if (comp_pkt.completion_status >=3D 0) {
 			pci_protocol_version =3D pci_protocol_versions[i];
 			dev_info(&hdev->device,
@@ -2287,11 +2309,12 @@ static int hv_pci_enter_d0(struct hv_device *hdev)
 	ret =3D vmbus_sendpacket(hdev->channel, d0_entry, sizeof(*d0_entry),
 			       (unsigned long)pkt, VM_PKT_DATA_INBAND,
 			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+	if (!ret)
+		ret =3D wait_for_response(hdev, &comp_pkt.host_event);
+
 	if (ret)
 		goto exit;
=20
-	wait_for_completion(&comp_pkt.host_event);
-
 	if (comp_pkt.completion_status < 0) {
 		dev_err(&hdev->device,
 			"PCI Pass-through VSP failed D0 Entry with status %x\n",
@@ -2331,11 +2354,10 @@ static int hv_pci_query_relations(struct hv_device =
*hdev)
=20
 	ret =3D vmbus_sendpacket(hdev->channel, &message, sizeof(message),
 			       0, VM_PKT_DATA_INBAND, 0);
-	if (ret)
-		return ret;
+	if (!ret)
+		ret =3D wait_for_response(hdev, &comp);
=20
-	wait_for_completion(&comp);
-	return 0;
+	return ret;
 }
=20
 /**
@@ -2405,11 +2427,11 @@ static int hv_send_resources_allocated(struct hv_de=
vice *hdev)
 				size_res, (unsigned long)pkt,
 				VM_PKT_DATA_INBAND,
 				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+		if (!ret)
+			ret =3D wait_for_response(hdev, &comp_pkt.host_event);
 		if (ret)
 			break;
=20
-		wait_for_completion(&comp_pkt.host_event);
-
 		if (comp_pkt.completion_status < 0) {
 			ret =3D -EPROTO;
 			dev_err(&hdev->device,
--=20
2.7.4

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
  2018-05-23 21:12 ` Dexuan Cui
@ 2018-05-24 12:41   ` Lorenzo Pieralisi
  -1 siblings, 0 replies; 31+ messages in thread
From: Lorenzo Pieralisi @ 2018-05-24 12:41 UTC (permalink / raw)
  To: Dexuan Cui
  Cc: 'olaf@aepfle.de',
	Stephen Hemminger, 'linux-pci@vger.kernel.org',
	'jasowang@redhat.com',
	'driverdev-devel@linuxdriverproject.org',
	'linux-kernel@vger.kernel.org',
	'apw@canonical.com',
	'marcelo.cerri@canonical.com', 'Bjorn Helgaas',
	'vkuznets@redhat.com',
	Haiyang Zhang

On Wed, May 23, 2018 at 09:12:01PM +0000, Dexuan Cui wrote:
> 
> Before the guest finishes the device initialization, the device can be
> removed anytime by the host, and after that the host won't respond to
> the guest's request, so the guest should be prepared to handle this
> case.
> 
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> Cc: Stephen Hemminger <sthemmin@microsoft.com>
> Cc: K. Y. Srinivasan <kys@microsoft.com>
> ---
>  drivers/pci/host/pci-hyperv.c | 46 ++++++++++++++++++++++++++++++++-----------
>  1 file changed, 34 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
> index ad6a64d..248765f 100644
> --- a/drivers/pci/host/pci-hyperv.c
> +++ b/drivers/pci/host/pci-hyperv.c
> @@ -556,6 +556,26 @@ static void put_pcichild(struct hv_pci_dev *hv_pcidev,
>  static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus);
>  static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus);
>  
> +/*
> + * There is no good way to get notified from vmbus_onoffer_rescind(),
> + * so let's use polling here, since this is not a hot path.
> + */
> +static int wait_for_response(struct hv_device *hdev,
> +			     struct completion *comp)
> +{
> +	while (true) {
> +		if (hdev->channel->rescind) {
> +			dev_warn_once(&hdev->device, "The device is gone.\n");
> +			return -ENODEV;
> +		}
> +
> +		if (wait_for_completion_timeout(comp, HZ / 10))
> +			break;
> +	}
> +
> +	return 0;

This is pretty racy, isn't it ? Also, I reckon you should consider the
timeout return value as an error condition unless I am completely
missing the point of what you are doing.

Lorenzo

> +}
> +
>  /**
>   * devfn_to_wslot() - Convert from Linux PCI slot to Windows
>   * @devfn:	The Linux representation of PCI slot
> @@ -1569,7 +1589,8 @@ static struct hv_pci_dev *new_pcichild_device(struct hv_pcibus_device *hbus,
>  	if (ret)
>  		goto error;
>  
> -	wait_for_completion(&comp_pkt.host_event);
> +	if (wait_for_response(hbus->hdev, &comp_pkt.host_event))
> +		goto error;
>  
>  	hpdev->desc = *desc;
>  	refcount_set(&hpdev->refs, 1);
> @@ -2070,15 +2091,16 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev)
>  				sizeof(struct pci_version_request),
>  				(unsigned long)pkt, VM_PKT_DATA_INBAND,
>  				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
> +		if (!ret)
> +			ret = wait_for_response(hdev, &comp_pkt.host_event);
> +
>  		if (ret) {
>  			dev_err(&hdev->device,
> -				"PCI Pass-through VSP failed sending version reqquest: %#x",
> +				"PCI Pass-through VSP failed to request version: %d",
>  				ret);
>  			goto exit;
>  		}
>  
> -		wait_for_completion(&comp_pkt.host_event);
> -
>  		if (comp_pkt.completion_status >= 0) {
>  			pci_protocol_version = pci_protocol_versions[i];
>  			dev_info(&hdev->device,
> @@ -2287,11 +2309,12 @@ static int hv_pci_enter_d0(struct hv_device *hdev)
>  	ret = vmbus_sendpacket(hdev->channel, d0_entry, sizeof(*d0_entry),
>  			       (unsigned long)pkt, VM_PKT_DATA_INBAND,
>  			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
> +	if (!ret)
> +		ret = wait_for_response(hdev, &comp_pkt.host_event);
> +
>  	if (ret)
>  		goto exit;
>  
> -	wait_for_completion(&comp_pkt.host_event);
> -
>  	if (comp_pkt.completion_status < 0) {
>  		dev_err(&hdev->device,
>  			"PCI Pass-through VSP failed D0 Entry with status %x\n",
> @@ -2331,11 +2354,10 @@ static int hv_pci_query_relations(struct hv_device *hdev)
>  
>  	ret = vmbus_sendpacket(hdev->channel, &message, sizeof(message),
>  			       0, VM_PKT_DATA_INBAND, 0);
> -	if (ret)
> -		return ret;
> +	if (!ret)
> +		ret = wait_for_response(hdev, &comp);
>  
> -	wait_for_completion(&comp);
> -	return 0;
> +	return ret;
>  }
>  
>  /**
> @@ -2405,11 +2427,11 @@ static int hv_send_resources_allocated(struct hv_device *hdev)
>  				size_res, (unsigned long)pkt,
>  				VM_PKT_DATA_INBAND,
>  				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
> +		if (!ret)
> +			ret = wait_for_response(hdev, &comp_pkt.host_event);
>  		if (ret)
>  			break;
>  
> -		wait_for_completion(&comp_pkt.host_event);
> -
>  		if (comp_pkt.completion_status < 0) {
>  			ret = -EPROTO;
>  			dev_err(&hdev->device,
> -- 
> 2.7.4
> 
_______________________________________________
devel mailing list
devel@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
@ 2018-05-24 12:41   ` Lorenzo Pieralisi
  0 siblings, 0 replies; 31+ messages in thread
From: Lorenzo Pieralisi @ 2018-05-24 12:41 UTC (permalink / raw)
  To: Dexuan Cui
  Cc: 'Bjorn Helgaas', 'linux-pci@vger.kernel.org',
	KY Srinivasan, Stephen Hemminger, 'olaf@aepfle.de',
	'apw@canonical.com', 'jasowang@redhat.com',
	'linux-kernel@vger.kernel.org',
	'driverdev-devel@linuxdriverproject.org',
	Haiyang Zhang, 'vkuznets@redhat.com',
	'marcelo.cerri@canonical.com'

On Wed, May 23, 2018 at 09:12:01PM +0000, Dexuan Cui wrote:
> 
> Before the guest finishes the device initialization, the device can be
> removed anytime by the host, and after that the host won't respond to
> the guest's request, so the guest should be prepared to handle this
> case.
> 
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> Cc: Stephen Hemminger <sthemmin@microsoft.com>
> Cc: K. Y. Srinivasan <kys@microsoft.com>
> ---
>  drivers/pci/host/pci-hyperv.c | 46 ++++++++++++++++++++++++++++++++-----------
>  1 file changed, 34 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
> index ad6a64d..248765f 100644
> --- a/drivers/pci/host/pci-hyperv.c
> +++ b/drivers/pci/host/pci-hyperv.c
> @@ -556,6 +556,26 @@ static void put_pcichild(struct hv_pci_dev *hv_pcidev,
>  static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus);
>  static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus);
>  
> +/*
> + * There is no good way to get notified from vmbus_onoffer_rescind(),
> + * so let's use polling here, since this is not a hot path.
> + */
> +static int wait_for_response(struct hv_device *hdev,
> +			     struct completion *comp)
> +{
> +	while (true) {
> +		if (hdev->channel->rescind) {
> +			dev_warn_once(&hdev->device, "The device is gone.\n");
> +			return -ENODEV;
> +		}
> +
> +		if (wait_for_completion_timeout(comp, HZ / 10))
> +			break;
> +	}
> +
> +	return 0;

This is pretty racy, isn't it ? Also, I reckon you should consider the
timeout return value as an error condition unless I am completely
missing the point of what you are doing.

Lorenzo

> +}
> +
>  /**
>   * devfn_to_wslot() - Convert from Linux PCI slot to Windows
>   * @devfn:	The Linux representation of PCI slot
> @@ -1569,7 +1589,8 @@ static struct hv_pci_dev *new_pcichild_device(struct hv_pcibus_device *hbus,
>  	if (ret)
>  		goto error;
>  
> -	wait_for_completion(&comp_pkt.host_event);
> +	if (wait_for_response(hbus->hdev, &comp_pkt.host_event))
> +		goto error;
>  
>  	hpdev->desc = *desc;
>  	refcount_set(&hpdev->refs, 1);
> @@ -2070,15 +2091,16 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev)
>  				sizeof(struct pci_version_request),
>  				(unsigned long)pkt, VM_PKT_DATA_INBAND,
>  				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
> +		if (!ret)
> +			ret = wait_for_response(hdev, &comp_pkt.host_event);
> +
>  		if (ret) {
>  			dev_err(&hdev->device,
> -				"PCI Pass-through VSP failed sending version reqquest: %#x",
> +				"PCI Pass-through VSP failed to request version: %d",
>  				ret);
>  			goto exit;
>  		}
>  
> -		wait_for_completion(&comp_pkt.host_event);
> -
>  		if (comp_pkt.completion_status >= 0) {
>  			pci_protocol_version = pci_protocol_versions[i];
>  			dev_info(&hdev->device,
> @@ -2287,11 +2309,12 @@ static int hv_pci_enter_d0(struct hv_device *hdev)
>  	ret = vmbus_sendpacket(hdev->channel, d0_entry, sizeof(*d0_entry),
>  			       (unsigned long)pkt, VM_PKT_DATA_INBAND,
>  			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
> +	if (!ret)
> +		ret = wait_for_response(hdev, &comp_pkt.host_event);
> +
>  	if (ret)
>  		goto exit;
>  
> -	wait_for_completion(&comp_pkt.host_event);
> -
>  	if (comp_pkt.completion_status < 0) {
>  		dev_err(&hdev->device,
>  			"PCI Pass-through VSP failed D0 Entry with status %x\n",
> @@ -2331,11 +2354,10 @@ static int hv_pci_query_relations(struct hv_device *hdev)
>  
>  	ret = vmbus_sendpacket(hdev->channel, &message, sizeof(message),
>  			       0, VM_PKT_DATA_INBAND, 0);
> -	if (ret)
> -		return ret;
> +	if (!ret)
> +		ret = wait_for_response(hdev, &comp);
>  
> -	wait_for_completion(&comp);
> -	return 0;
> +	return ret;
>  }
>  
>  /**
> @@ -2405,11 +2427,11 @@ static int hv_send_resources_allocated(struct hv_device *hdev)
>  				size_res, (unsigned long)pkt,
>  				VM_PKT_DATA_INBAND,
>  				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
> +		if (!ret)
> +			ret = wait_for_response(hdev, &comp_pkt.host_event);
>  		if (ret)
>  			break;
>  
> -		wait_for_completion(&comp_pkt.host_event);
> -
>  		if (comp_pkt.completion_status < 0) {
>  			ret = -EPROTO;
>  			dev_err(&hdev->device,
> -- 
> 2.7.4
> 

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
  2018-05-24 12:41   ` Lorenzo Pieralisi
  (?)
@ 2018-05-24 23:55     ` Dexuan Cui
  -1 siblings, 0 replies; 31+ messages in thread
From: Dexuan Cui @ 2018-05-24 23:55 UTC (permalink / raw)
  To: Lorenzo Pieralisi
  Cc: 'Bjorn Helgaas', 'linux-pci@vger.kernel.org',
	KY Srinivasan, Stephen Hemminger, 'olaf@aepfle.de',
	'apw@canonical.com', 'jasowang@redhat.com',
	'linux-kernel@vger.kernel.org',
	'driverdev-devel@linuxdriverproject.org',
	Haiyang Zhang, 'vkuznets@redhat.com',
	'marcelo.cerri@canonical.com'

> From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> Sent: Thursday, May 24, 2018 05:41
> On Wed, May 23, 2018 at 09:12:01PM +0000, Dexuan Cui wrote:
> >
> > Before the guest finishes the device initialization, the device can be
> > removed anytime by the host, and after that the host won't respond to
> > the guest's request, so the guest should be prepared to handle this
> > case.
> >
> > --- a/drivers/pci/host/pci-hyperv.c
> > +++ b/drivers/pci/host/pci-hyperv.c
> > @@ -556,6 +556,26 @@ static void put_pcichild(struct hv_pci_dev
> *hv_pcidev,
> >  static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus);
> >  static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus);
> >
> > +/*
> > + * There is no good way to get notified from vmbus_onoffer_rescind(),
> > + * so let's use polling here, since this is not a hot path.
> > + */
> > +static int wait_for_response(struct hv_device *hdev,
> > +			     struct completion *comp)
> > +{
> > +	while (true) {
> > +		if (hdev->channel->rescind) {
> > +			dev_warn_once(&hdev->device, "The device is gone.\n");
> > +			return -ENODEV;
> > +		}
> > +
> > +		if (wait_for_completion_timeout(comp, HZ / 10))
> > +			break;
> > +	}
> > +
> > +	return 0;
> 
> This is pretty racy, isn't it ? Also, I reckon you should consider the
> timeout return value as an error condition unless I am completely
> missing the point of what you are doing.
> 
> Lorenzo

Actually, this is not racy: we only exit the loop when 
1) the channel is rescinded 
or
2) the channel is not rescinded, and the event is completed.

wait_for_completion_timeout() returns 0 if timed out: in this case,
we keep spinning in the loop every 0.1 second, testing the 2 conditions.

If the chanel is not rescinded, here we should wait for the event 
forever, as the host is supposed to respond to us quickly, and the
event will be completed accordingly. This is what the current code
does. But, in case the channel is rescinded, we need to exit the loop 
immediately with an error return value: this is the only change
made by the patch.

Ideally, we should not use this ugly "polling" method, and the
rescind-handler, i.e. vmbus_onoffer_rescind(), should notify 
wait_for_response(), but as I mentioned, there is no good way
to get notified from vmbus_onoffer_rescind(), so I'm proposing
this "polling" method: it's simple and it can work correctly,
and this is not a hot path.

Thanks,
-- Dexuan

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
@ 2018-05-24 23:55     ` Dexuan Cui
  0 siblings, 0 replies; 31+ messages in thread
From: Dexuan Cui @ 2018-05-24 23:55 UTC (permalink / raw)
  To: Lorenzo Pieralisi
  Cc: 'Bjorn Helgaas', 'linux-pci@vger.kernel.org',
	KY Srinivasan, Stephen Hemminger, 'olaf@aepfle.de',
	'apw@canonical.com', 'jasowang@redhat.com',
	'linux-kernel@vger.kernel.org',
	'driverdev-devel@linuxdriverproject.org',
	Haiyang Zhang, 'vkuznets@redhat.com',
	'marcelo.cerri@canonical.com'

> From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> Sent: Thursday, May 24, 2018 05:41
> On Wed, May 23, 2018 at 09:12:01PM +0000, Dexuan Cui wrote:
> >
> > Before the guest finishes the device initialization, the device can be
> > removed anytime by the host, and after that the host won't respond to
> > the guest's request, so the guest should be prepared to handle this
> > case.
> >
> > --- a/drivers/pci/host/pci-hyperv.c
> > +++ b/drivers/pci/host/pci-hyperv.c
> > @@ -556,6 +556,26 @@ static void put_pcichild(struct hv_pci_dev
> *hv_pcidev,
> >  static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus);
> >  static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus);
> >
> > +/*
> > + * There is no good way to get notified from vmbus_onoffer_rescind(),
> > + * so let's use polling here, since this is not a hot path.
> > + */
> > +static int wait_for_response(struct hv_device *hdev,
> > +			     struct completion *comp)
> > +{
> > +	while (true) {
> > +		if (hdev->channel->rescind) {
> > +			dev_warn_once(&hdev->device, "The device is gone.\n");
> > +			return -ENODEV;
> > +		}
> > +
> > +		if (wait_for_completion_timeout(comp, HZ / 10))
> > +			break;
> > +	}
> > +
> > +	return 0;
>=20
> This is pretty racy, isn't it ? Also, I reckon you should consider the
> timeout return value as an error condition unless I am completely
> missing the point of what you are doing.
>=20
> Lorenzo

Actually, this is not racy: we only exit the loop when=20
1) the channel is rescinded=20
or
2) the channel is not rescinded, and the event is completed.

wait_for_completion_timeout() returns 0 if timed out: in this case,
we keep spinning in the loop every 0.1 second, testing the 2 conditions.

If the chanel is not rescinded, here we should wait for the event=20
forever, as the host is supposed to respond to us quickly, and the
event will be completed accordingly. This is what the current code
does. But, in case the channel is rescinded, we need to exit the loop=20
immediately with an error return value: this is the only change
made by the patch.

Ideally, we should not use this ugly "polling" method, and the
rescind-handler, i.e. vmbus_onoffer_rescind(), should notify=20
wait_for_response(), but as I mentioned, there is no good way
to get notified from vmbus_onoffer_rescind(), so I'm proposing
this "polling" method: it's simple and it can work correctly,
and this is not a hot path.

Thanks,
-- Dexuan

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
@ 2018-05-24 23:55     ` Dexuan Cui
  0 siblings, 0 replies; 31+ messages in thread
From: Dexuan Cui @ 2018-05-24 23:55 UTC (permalink / raw)
  To: Lorenzo Pieralisi
  Cc: 'olaf@aepfle.de',
	Stephen Hemminger, 'linux-pci@vger.kernel.org',
	'jasowang@redhat.com',
	'driverdev-devel@linuxdriverproject.org',
	'linux-kernel@vger.kernel.org',
	'apw@canonical.com',
	'marcelo.cerri@canonical.com', 'Bjorn Helgaas',
	'vkuznets@redhat.com',
	Haiyang Zhang

> From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> Sent: Thursday, May 24, 2018 05:41
> On Wed, May 23, 2018 at 09:12:01PM +0000, Dexuan Cui wrote:
> >
> > Before the guest finishes the device initialization, the device can be
> > removed anytime by the host, and after that the host won't respond to
> > the guest's request, so the guest should be prepared to handle this
> > case.
> >
> > --- a/drivers/pci/host/pci-hyperv.c
> > +++ b/drivers/pci/host/pci-hyperv.c
> > @@ -556,6 +556,26 @@ static void put_pcichild(struct hv_pci_dev
> *hv_pcidev,
> >  static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus);
> >  static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus);
> >
> > +/*
> > + * There is no good way to get notified from vmbus_onoffer_rescind(),
> > + * so let's use polling here, since this is not a hot path.
> > + */
> > +static int wait_for_response(struct hv_device *hdev,
> > +			     struct completion *comp)
> > +{
> > +	while (true) {
> > +		if (hdev->channel->rescind) {
> > +			dev_warn_once(&hdev->device, "The device is gone.\n");
> > +			return -ENODEV;
> > +		}
> > +
> > +		if (wait_for_completion_timeout(comp, HZ / 10))
> > +			break;
> > +	}
> > +
> > +	return 0;
> 
> This is pretty racy, isn't it ? Also, I reckon you should consider the
> timeout return value as an error condition unless I am completely
> missing the point of what you are doing.
> 
> Lorenzo

Actually, this is not racy: we only exit the loop when 
1) the channel is rescinded 
or
2) the channel is not rescinded, and the event is completed.

wait_for_completion_timeout() returns 0 if timed out: in this case,
we keep spinning in the loop every 0.1 second, testing the 2 conditions.

If the chanel is not rescinded, here we should wait for the event 
forever, as the host is supposed to respond to us quickly, and the
event will be completed accordingly. This is what the current code
does. But, in case the channel is rescinded, we need to exit the loop 
immediately with an error return value: this is the only change
made by the patch.

Ideally, we should not use this ugly "polling" method, and the
rescind-handler, i.e. vmbus_onoffer_rescind(), should notify 
wait_for_response(), but as I mentioned, there is no good way
to get notified from vmbus_onoffer_rescind(), so I'm proposing
this "polling" method: it's simple and it can work correctly,
and this is not a hot path.

Thanks,
-- Dexuan
_______________________________________________
devel mailing list
devel@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
  2018-05-24 23:55     ` Dexuan Cui
@ 2018-05-25 10:29       ` Lorenzo Pieralisi
  -1 siblings, 0 replies; 31+ messages in thread
From: Lorenzo Pieralisi @ 2018-05-25 10:29 UTC (permalink / raw)
  To: Dexuan Cui
  Cc: 'Bjorn Helgaas', 'linux-pci@vger.kernel.org',
	KY Srinivasan, Stephen Hemminger, 'olaf@aepfle.de',
	'apw@canonical.com', 'jasowang@redhat.com',
	'linux-kernel@vger.kernel.org',
	'driverdev-devel@linuxdriverproject.org',
	Haiyang Zhang, 'vkuznets@redhat.com',
	'marcelo.cerri@canonical.com'

On Thu, May 24, 2018 at 11:55:35PM +0000, Dexuan Cui wrote:
> > From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> > Sent: Thursday, May 24, 2018 05:41
> > On Wed, May 23, 2018 at 09:12:01PM +0000, Dexuan Cui wrote:
> > >
> > > Before the guest finishes the device initialization, the device can be
> > > removed anytime by the host, and after that the host won't respond to
> > > the guest's request, so the guest should be prepared to handle this
> > > case.
> > >
> > > --- a/drivers/pci/host/pci-hyperv.c
> > > +++ b/drivers/pci/host/pci-hyperv.c
> > > @@ -556,6 +556,26 @@ static void put_pcichild(struct hv_pci_dev
> > *hv_pcidev,
> > >  static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus);
> > >  static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus);
> > >
> > > +/*
> > > + * There is no good way to get notified from vmbus_onoffer_rescind(),
> > > + * so let's use polling here, since this is not a hot path.
> > > + */
> > > +static int wait_for_response(struct hv_device *hdev,
> > > +			     struct completion *comp)
> > > +{
> > > +	while (true) {
> > > +		if (hdev->channel->rescind) {
> > > +			dev_warn_once(&hdev->device, "The device is gone.\n");
> > > +			return -ENODEV;
> > > +		}
> > > +
> > > +		if (wait_for_completion_timeout(comp, HZ / 10))
> > > +			break;
> > > +	}
> > > +
> > > +	return 0;
> > 
> > This is pretty racy, isn't it ? Also, I reckon you should consider the
> > timeout return value as an error condition unless I am completely
> > missing the point of what you are doing.
> > 
> > Lorenzo
> 
> Actually, this is not racy: we only exit the loop when 
> 1) the channel is rescinded 
> or
> 2) the channel is not rescinded, and the event is completed.
> 
> wait_for_completion_timeout() returns 0 if timed out: in this case,
> we keep spinning in the loop every 0.1 second, testing the 2 conditions.

Yes sorry, you are right, the exit condition is correct, I am waiting for
maintainers ACK to merge it, I need it as soon as possible if you want
this to make it for v4.18.

Thanks,
Lorenzo

> If the chanel is not rescinded, here we should wait for the event 
> forever, as the host is supposed to respond to us quickly, and the
> event will be completed accordingly. This is what the current code
> does. But, in case the channel is rescinded, we need to exit the loop 
> immediately with an error return value: this is the only change
> made by the patch.
> 
> Ideally, we should not use this ugly "polling" method, and the
> rescind-handler, i.e. vmbus_onoffer_rescind(), should notify 
> wait_for_response(), but as I mentioned, there is no good way
> to get notified from vmbus_onoffer_rescind(), so I'm proposing
> this "polling" method: it's simple and it can work correctly,
> and this is not a hot path.
> 
> Thanks,
> -- Dexuan

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
@ 2018-05-25 10:29       ` Lorenzo Pieralisi
  0 siblings, 0 replies; 31+ messages in thread
From: Lorenzo Pieralisi @ 2018-05-25 10:29 UTC (permalink / raw)
  To: Dexuan Cui
  Cc: 'olaf@aepfle.de',
	Stephen Hemminger, 'linux-pci@vger.kernel.org',
	'jasowang@redhat.com',
	'driverdev-devel@linuxdriverproject.org',
	'linux-kernel@vger.kernel.org',
	'apw@canonical.com',
	'marcelo.cerri@canonical.com', 'Bjorn Helgaas',
	'vkuznets@redhat.com',
	Haiyang Zhang

On Thu, May 24, 2018 at 11:55:35PM +0000, Dexuan Cui wrote:
> > From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> > Sent: Thursday, May 24, 2018 05:41
> > On Wed, May 23, 2018 at 09:12:01PM +0000, Dexuan Cui wrote:
> > >
> > > Before the guest finishes the device initialization, the device can be
> > > removed anytime by the host, and after that the host won't respond to
> > > the guest's request, so the guest should be prepared to handle this
> > > case.
> > >
> > > --- a/drivers/pci/host/pci-hyperv.c
> > > +++ b/drivers/pci/host/pci-hyperv.c
> > > @@ -556,6 +556,26 @@ static void put_pcichild(struct hv_pci_dev
> > *hv_pcidev,
> > >  static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus);
> > >  static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus);
> > >
> > > +/*
> > > + * There is no good way to get notified from vmbus_onoffer_rescind(),
> > > + * so let's use polling here, since this is not a hot path.
> > > + */
> > > +static int wait_for_response(struct hv_device *hdev,
> > > +			     struct completion *comp)
> > > +{
> > > +	while (true) {
> > > +		if (hdev->channel->rescind) {
> > > +			dev_warn_once(&hdev->device, "The device is gone.\n");
> > > +			return -ENODEV;
> > > +		}
> > > +
> > > +		if (wait_for_completion_timeout(comp, HZ / 10))
> > > +			break;
> > > +	}
> > > +
> > > +	return 0;
> > 
> > This is pretty racy, isn't it ? Also, I reckon you should consider the
> > timeout return value as an error condition unless I am completely
> > missing the point of what you are doing.
> > 
> > Lorenzo
> 
> Actually, this is not racy: we only exit the loop when 
> 1) the channel is rescinded 
> or
> 2) the channel is not rescinded, and the event is completed.
> 
> wait_for_completion_timeout() returns 0 if timed out: in this case,
> we keep spinning in the loop every 0.1 second, testing the 2 conditions.

Yes sorry, you are right, the exit condition is correct, I am waiting for
maintainers ACK to merge it, I need it as soon as possible if you want
this to make it for v4.18.

Thanks,
Lorenzo

> If the chanel is not rescinded, here we should wait for the event 
> forever, as the host is supposed to respond to us quickly, and the
> event will be completed accordingly. This is what the current code
> does. But, in case the channel is rescinded, we need to exit the loop 
> immediately with an error return value: this is the only change
> made by the patch.
> 
> Ideally, we should not use this ugly "polling" method, and the
> rescind-handler, i.e. vmbus_onoffer_rescind(), should notify 
> wait_for_response(), but as I mentioned, there is no good way
> to get notified from vmbus_onoffer_rescind(), so I'm proposing
> this "polling" method: it's simple and it can work correctly,
> and this is not a hot path.
> 
> Thanks,
> -- Dexuan
_______________________________________________
devel mailing list
devel@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
  2018-05-23 21:12 ` Dexuan Cui
  (?)
@ 2018-05-25 11:43   ` Haiyang Zhang
  -1 siblings, 0 replies; 31+ messages in thread
From: Haiyang Zhang @ 2018-05-25 11:43 UTC (permalink / raw)
  To: Dexuan Cui, 'Lorenzo Pieralisi', 'Bjorn Helgaas',
	'linux-pci@vger.kernel.org',
	KY Srinivasan, Stephen Hemminger, 'olaf@aepfle.de',
	'apw@canonical.com', 'jasowang@redhat.com'
  Cc: 'linux-kernel@vger.kernel.org',
	'driverdev-devel@linuxdriverproject.org',
	'vkuznets@redhat.com',
	'marcelo.cerri@canonical.com'



> -----Original Message-----
> From: Dexuan Cui
> Sent: Wednesday, May 23, 2018 5:12 PM
> To: 'Lorenzo Pieralisi' <lorenzo.pieralisi@arm.com>; 'Bjorn Helgaas'
> <bhelgaas@google.com>; 'linux-pci@vger.kernel.org' <linux-
> pci@vger.kernel.org>; KY Srinivasan <kys@microsoft.com>; Stephen
> Hemminger <sthemmin@microsoft.com>; 'olaf@aepfle.de' <olaf@aepfle.de>;
> 'apw@canonical.com' <apw@canonical.com>; 'jasowang@redhat.com'
> <jasowang@redhat.com>
> Cc: 'linux-kernel@vger.kernel.org' <linux-kernel@vger.kernel.org>; 'driverdev-
> devel@linuxdriverproject.org' <driverdev-devel@linuxdriverproject.org>;
> Haiyang Zhang <haiyangz@microsoft.com>; 'vkuznets@redhat.com'
> <vkuznets@redhat.com>; 'marcelo.cerri@canonical.com'
> <marcelo.cerri@canonical.com>
> Subject: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
> 
> 
> Before the guest finishes the device initialization, the device can be removed
> anytime by the host, and after that the host won't respond to the guest's
> request, so the guest should be prepared to handle this case.
> 
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> Cc: Stephen Hemminger <sthemmin@microsoft.com>
> Cc: K. Y. Srinivasan <kys@microsoft.com>
> ---
>  drivers/pci/host/pci-hyperv.c | 46 ++++++++++++++++++++++++++++++++-------
> ----

Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>

Thank you!

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
@ 2018-05-25 11:43   ` Haiyang Zhang
  0 siblings, 0 replies; 31+ messages in thread
From: Haiyang Zhang @ 2018-05-25 11:43 UTC (permalink / raw)
  To: Dexuan Cui, 'Lorenzo Pieralisi', 'Bjorn Helgaas',
	'linux-pci@vger.kernel.org',
	KY Srinivasan, Stephen Hemminger, 'olaf@aepfle.de',
	'apw@canonical.com', 'jasowang@redhat.com'
  Cc: 'linux-kernel@vger.kernel.org',
	'driverdev-devel@linuxdriverproject.org',
	'vkuznets@redhat.com',
	'marcelo.cerri@canonical.com'



> -----Original Message-----
> From: Dexuan Cui
> Sent: Wednesday, May 23, 2018 5:12 PM
> To: 'Lorenzo Pieralisi' <lorenzo.pieralisi@arm.com>; 'Bjorn Helgaas'
> <bhelgaas@google.com>; 'linux-pci@vger.kernel.org' <linux-
> pci@vger.kernel.org>; KY Srinivasan <kys@microsoft.com>; Stephen
> Hemminger <sthemmin@microsoft.com>; 'olaf@aepfle.de' <olaf@aepfle.de>;
> 'apw@canonical.com' <apw@canonical.com>; 'jasowang@redhat.com'
> <jasowang@redhat.com>
> Cc: 'linux-kernel@vger.kernel.org' <linux-kernel@vger.kernel.org>; 'drive=
rdev-
> devel@linuxdriverproject.org' <driverdev-devel@linuxdriverproject.org>;
> Haiyang Zhang <haiyangz@microsoft.com>; 'vkuznets@redhat.com'
> <vkuznets@redhat.com>; 'marcelo.cerri@canonical.com'
> <marcelo.cerri@canonical.com>
> Subject: [PATCH] PCI: hv: Do not wait forever on a device that has disapp=
eared
>=20
>=20
> Before the guest finishes the device initialization, the device can be re=
moved
> anytime by the host, and after that the host won't respond to the guest's
> request, so the guest should be prepared to handle this case.
>=20
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> Cc: Stephen Hemminger <sthemmin@microsoft.com>
> Cc: K. Y. Srinivasan <kys@microsoft.com>
> ---
>  drivers/pci/host/pci-hyperv.c | 46 ++++++++++++++++++++++++++++++++-----=
--
> ----

Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>

Thank you!

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
@ 2018-05-25 11:43   ` Haiyang Zhang
  0 siblings, 0 replies; 31+ messages in thread
From: Haiyang Zhang @ 2018-05-25 11:43 UTC (permalink / raw)
  To: Dexuan Cui, 'Lorenzo Pieralisi', 'Bjorn Helgaas',
	'linux-pci@vger.kernel.org',
	KY Srinivasan, Stephen Hemminger, 'olaf@aepfle.de',
	'apw@canonical.com', 'jasowang@redhat.com'
  Cc: 'marcelo.cerri@canonical.com',
	'vkuznets@redhat.com',
	'driverdev-devel@linuxdriverproject.org',
	'linux-kernel@vger.kernel.org'



> -----Original Message-----
> From: Dexuan Cui
> Sent: Wednesday, May 23, 2018 5:12 PM
> To: 'Lorenzo Pieralisi' <lorenzo.pieralisi@arm.com>; 'Bjorn Helgaas'
> <bhelgaas@google.com>; 'linux-pci@vger.kernel.org' <linux-
> pci@vger.kernel.org>; KY Srinivasan <kys@microsoft.com>; Stephen
> Hemminger <sthemmin@microsoft.com>; 'olaf@aepfle.de' <olaf@aepfle.de>;
> 'apw@canonical.com' <apw@canonical.com>; 'jasowang@redhat.com'
> <jasowang@redhat.com>
> Cc: 'linux-kernel@vger.kernel.org' <linux-kernel@vger.kernel.org>; 'driverdev-
> devel@linuxdriverproject.org' <driverdev-devel@linuxdriverproject.org>;
> Haiyang Zhang <haiyangz@microsoft.com>; 'vkuznets@redhat.com'
> <vkuznets@redhat.com>; 'marcelo.cerri@canonical.com'
> <marcelo.cerri@canonical.com>
> Subject: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
> 
> 
> Before the guest finishes the device initialization, the device can be removed
> anytime by the host, and after that the host won't respond to the guest's
> request, so the guest should be prepared to handle this case.
> 
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> Cc: Stephen Hemminger <sthemmin@microsoft.com>
> Cc: K. Y. Srinivasan <kys@microsoft.com>
> ---
>  drivers/pci/host/pci-hyperv.c | 46 ++++++++++++++++++++++++++++++++-------
> ----

Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>

Thank you!

_______________________________________________
devel mailing list
devel@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
  2018-05-23 21:12 ` Dexuan Cui
@ 2018-05-25 13:56   ` Lorenzo Pieralisi
  -1 siblings, 0 replies; 31+ messages in thread
From: Lorenzo Pieralisi @ 2018-05-25 13:56 UTC (permalink / raw)
  To: Dexuan Cui
  Cc: 'Bjorn Helgaas', 'linux-pci@vger.kernel.org',
	KY Srinivasan, Stephen Hemminger, 'olaf@aepfle.de',
	'apw@canonical.com', 'jasowang@redhat.com',
	'linux-kernel@vger.kernel.org',
	'driverdev-devel@linuxdriverproject.org',
	Haiyang Zhang, 'vkuznets@redhat.com',
	'marcelo.cerri@canonical.com'

On Wed, May 23, 2018 at 09:12:01PM +0000, Dexuan Cui wrote:
> 
> Before the guest finishes the device initialization, the device can be
> removed anytime by the host, and after that the host won't respond to
> the guest's request, so the guest should be prepared to handle this
> case.
> 
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> Cc: Stephen Hemminger <sthemmin@microsoft.com>
> Cc: K. Y. Srinivasan <kys@microsoft.com>
> ---
>  drivers/pci/host/pci-hyperv.c | 46 ++++++++++++++++++++++++++++++++-----------
>  1 file changed, 34 insertions(+), 12 deletions(-)

Applied with a minor tweak to the commit log to pci/hv for v4.18.

Thanks,
Lorenzo

> diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
> index ad6a64d..248765f 100644
> --- a/drivers/pci/host/pci-hyperv.c
> +++ b/drivers/pci/host/pci-hyperv.c
> @@ -556,6 +556,26 @@ static void put_pcichild(struct hv_pci_dev *hv_pcidev,
>  static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus);
>  static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus);
>  
> +/*
> + * There is no good way to get notified from vmbus_onoffer_rescind(),
> + * so let's use polling here, since this is not a hot path.
> + */
> +static int wait_for_response(struct hv_device *hdev,
> +			     struct completion *comp)
> +{
> +	while (true) {
> +		if (hdev->channel->rescind) {
> +			dev_warn_once(&hdev->device, "The device is gone.\n");
> +			return -ENODEV;
> +		}
> +
> +		if (wait_for_completion_timeout(comp, HZ / 10))
> +			break;
> +	}
> +
> +	return 0;
> +}
> +
>  /**
>   * devfn_to_wslot() - Convert from Linux PCI slot to Windows
>   * @devfn:	The Linux representation of PCI slot
> @@ -1569,7 +1589,8 @@ static struct hv_pci_dev *new_pcichild_device(struct hv_pcibus_device *hbus,
>  	if (ret)
>  		goto error;
>  
> -	wait_for_completion(&comp_pkt.host_event);
> +	if (wait_for_response(hbus->hdev, &comp_pkt.host_event))
> +		goto error;
>  
>  	hpdev->desc = *desc;
>  	refcount_set(&hpdev->refs, 1);
> @@ -2070,15 +2091,16 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev)
>  				sizeof(struct pci_version_request),
>  				(unsigned long)pkt, VM_PKT_DATA_INBAND,
>  				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
> +		if (!ret)
> +			ret = wait_for_response(hdev, &comp_pkt.host_event);
> +
>  		if (ret) {
>  			dev_err(&hdev->device,
> -				"PCI Pass-through VSP failed sending version reqquest: %#x",
> +				"PCI Pass-through VSP failed to request version: %d",
>  				ret);
>  			goto exit;
>  		}
>  
> -		wait_for_completion(&comp_pkt.host_event);
> -
>  		if (comp_pkt.completion_status >= 0) {
>  			pci_protocol_version = pci_protocol_versions[i];
>  			dev_info(&hdev->device,
> @@ -2287,11 +2309,12 @@ static int hv_pci_enter_d0(struct hv_device *hdev)
>  	ret = vmbus_sendpacket(hdev->channel, d0_entry, sizeof(*d0_entry),
>  			       (unsigned long)pkt, VM_PKT_DATA_INBAND,
>  			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
> +	if (!ret)
> +		ret = wait_for_response(hdev, &comp_pkt.host_event);
> +
>  	if (ret)
>  		goto exit;
>  
> -	wait_for_completion(&comp_pkt.host_event);
> -
>  	if (comp_pkt.completion_status < 0) {
>  		dev_err(&hdev->device,
>  			"PCI Pass-through VSP failed D0 Entry with status %x\n",
> @@ -2331,11 +2354,10 @@ static int hv_pci_query_relations(struct hv_device *hdev)
>  
>  	ret = vmbus_sendpacket(hdev->channel, &message, sizeof(message),
>  			       0, VM_PKT_DATA_INBAND, 0);
> -	if (ret)
> -		return ret;
> +	if (!ret)
> +		ret = wait_for_response(hdev, &comp);
>  
> -	wait_for_completion(&comp);
> -	return 0;
> +	return ret;
>  }
>  
>  /**
> @@ -2405,11 +2427,11 @@ static int hv_send_resources_allocated(struct hv_device *hdev)
>  				size_res, (unsigned long)pkt,
>  				VM_PKT_DATA_INBAND,
>  				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
> +		if (!ret)
> +			ret = wait_for_response(hdev, &comp_pkt.host_event);
>  		if (ret)
>  			break;
>  
> -		wait_for_completion(&comp_pkt.host_event);
> -
>  		if (comp_pkt.completion_status < 0) {
>  			ret = -EPROTO;
>  			dev_err(&hdev->device,
> -- 
> 2.7.4
> 

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
@ 2018-05-25 13:56   ` Lorenzo Pieralisi
  0 siblings, 0 replies; 31+ messages in thread
From: Lorenzo Pieralisi @ 2018-05-25 13:56 UTC (permalink / raw)
  To: Dexuan Cui
  Cc: 'olaf@aepfle.de',
	Stephen Hemminger, 'linux-pci@vger.kernel.org',
	'jasowang@redhat.com',
	'driverdev-devel@linuxdriverproject.org',
	'linux-kernel@vger.kernel.org',
	'apw@canonical.com',
	'marcelo.cerri@canonical.com', 'Bjorn Helgaas',
	'vkuznets@redhat.com',
	Haiyang Zhang

On Wed, May 23, 2018 at 09:12:01PM +0000, Dexuan Cui wrote:
> 
> Before the guest finishes the device initialization, the device can be
> removed anytime by the host, and after that the host won't respond to
> the guest's request, so the guest should be prepared to handle this
> case.
> 
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> Cc: Stephen Hemminger <sthemmin@microsoft.com>
> Cc: K. Y. Srinivasan <kys@microsoft.com>
> ---
>  drivers/pci/host/pci-hyperv.c | 46 ++++++++++++++++++++++++++++++++-----------
>  1 file changed, 34 insertions(+), 12 deletions(-)

Applied with a minor tweak to the commit log to pci/hv for v4.18.

Thanks,
Lorenzo

> diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
> index ad6a64d..248765f 100644
> --- a/drivers/pci/host/pci-hyperv.c
> +++ b/drivers/pci/host/pci-hyperv.c
> @@ -556,6 +556,26 @@ static void put_pcichild(struct hv_pci_dev *hv_pcidev,
>  static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus);
>  static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus);
>  
> +/*
> + * There is no good way to get notified from vmbus_onoffer_rescind(),
> + * so let's use polling here, since this is not a hot path.
> + */
> +static int wait_for_response(struct hv_device *hdev,
> +			     struct completion *comp)
> +{
> +	while (true) {
> +		if (hdev->channel->rescind) {
> +			dev_warn_once(&hdev->device, "The device is gone.\n");
> +			return -ENODEV;
> +		}
> +
> +		if (wait_for_completion_timeout(comp, HZ / 10))
> +			break;
> +	}
> +
> +	return 0;
> +}
> +
>  /**
>   * devfn_to_wslot() - Convert from Linux PCI slot to Windows
>   * @devfn:	The Linux representation of PCI slot
> @@ -1569,7 +1589,8 @@ static struct hv_pci_dev *new_pcichild_device(struct hv_pcibus_device *hbus,
>  	if (ret)
>  		goto error;
>  
> -	wait_for_completion(&comp_pkt.host_event);
> +	if (wait_for_response(hbus->hdev, &comp_pkt.host_event))
> +		goto error;
>  
>  	hpdev->desc = *desc;
>  	refcount_set(&hpdev->refs, 1);
> @@ -2070,15 +2091,16 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev)
>  				sizeof(struct pci_version_request),
>  				(unsigned long)pkt, VM_PKT_DATA_INBAND,
>  				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
> +		if (!ret)
> +			ret = wait_for_response(hdev, &comp_pkt.host_event);
> +
>  		if (ret) {
>  			dev_err(&hdev->device,
> -				"PCI Pass-through VSP failed sending version reqquest: %#x",
> +				"PCI Pass-through VSP failed to request version: %d",
>  				ret);
>  			goto exit;
>  		}
>  
> -		wait_for_completion(&comp_pkt.host_event);
> -
>  		if (comp_pkt.completion_status >= 0) {
>  			pci_protocol_version = pci_protocol_versions[i];
>  			dev_info(&hdev->device,
> @@ -2287,11 +2309,12 @@ static int hv_pci_enter_d0(struct hv_device *hdev)
>  	ret = vmbus_sendpacket(hdev->channel, d0_entry, sizeof(*d0_entry),
>  			       (unsigned long)pkt, VM_PKT_DATA_INBAND,
>  			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
> +	if (!ret)
> +		ret = wait_for_response(hdev, &comp_pkt.host_event);
> +
>  	if (ret)
>  		goto exit;
>  
> -	wait_for_completion(&comp_pkt.host_event);
> -
>  	if (comp_pkt.completion_status < 0) {
>  		dev_err(&hdev->device,
>  			"PCI Pass-through VSP failed D0 Entry with status %x\n",
> @@ -2331,11 +2354,10 @@ static int hv_pci_query_relations(struct hv_device *hdev)
>  
>  	ret = vmbus_sendpacket(hdev->channel, &message, sizeof(message),
>  			       0, VM_PKT_DATA_INBAND, 0);
> -	if (ret)
> -		return ret;
> +	if (!ret)
> +		ret = wait_for_response(hdev, &comp);
>  
> -	wait_for_completion(&comp);
> -	return 0;
> +	return ret;
>  }
>  
>  /**
> @@ -2405,11 +2427,11 @@ static int hv_send_resources_allocated(struct hv_device *hdev)
>  				size_res, (unsigned long)pkt,
>  				VM_PKT_DATA_INBAND,
>  				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
> +		if (!ret)
> +			ret = wait_for_response(hdev, &comp_pkt.host_event);
>  		if (ret)
>  			break;
>  
> -		wait_for_completion(&comp_pkt.host_event);
> -
>  		if (comp_pkt.completion_status < 0) {
>  			ret = -EPROTO;
>  			dev_err(&hdev->device,
> -- 
> 2.7.4
> 
_______________________________________________
devel mailing list
devel@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
  2018-05-23 21:12 ` Dexuan Cui
  (?)
@ 2018-05-29  0:19   ` Michael Kelley (EOSG)
  -1 siblings, 0 replies; 31+ messages in thread
From: Michael Kelley (EOSG) @ 2018-05-29  0:19 UTC (permalink / raw)
  To: Dexuan Cui, 'Lorenzo Pieralisi', 'Bjorn Helgaas',
	'linux-pci@vger.kernel.org',
	KY Srinivasan, Stephen Hemminger, 'olaf@aepfle.de',
	'apw@canonical.com', 'jasowang@redhat.com'
  Cc: 'linux-kernel@vger.kernel.org',
	'driverdev-devel@linuxdriverproject.org',
	Haiyang Zhang, 'vkuznets@redhat.com',
	'marcelo.cerri@canonical.com'

> 
> Before the guest finishes the device initialization, the device can be
> removed anytime by the host, and after that the host won't respond to
> the guest's request, so the guest should be prepared to handle this
> case.
> 
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> Cc: Stephen Hemminger <sthemmin@microsoft.com>
> Cc: K. Y. Srinivasan <kys@microsoft.com>
> ---
>  drivers/pci/host/pci-hyperv.c | 46 ++++++++++++++++++++++++++++++++-----------
>  1 file changed, 34 insertions(+), 12 deletions(-)
> 

While this patch solves the immediate problem of getting hung waiting
for a response from Hyper-V that will never come, there's another scenario
to look at that I think introduces a race.  Suppose the guest VM issues a
vmbus_sendpacket() request in one of the cases covered by this patch,
and suppose that Hyper-V queues a response to the request, and then
immediately follows with a rescind request.   Processing the response will
get queued to a tasklet associated with the channel, while processing the
rescind will get queued to a tasklet associated with the top-level vmbus
connection.   From what I can see, the code doesn't impose any ordering
on processing the two.  If the rescind is processed first, the new
wait_for_response() function may wake up, notice the rescind flag, and
return an error.  Its caller will return an error, and in doing so pop the
completion packet off the stack.   When the response is processed later,
it will try to signal completion via a completion packet that no longer
exists, and memory corruption will likely occur.

Am I missing anything that would prevent this scenario from happening?
It is admittedly low probability, and a solution seems non-trivial.  I haven't
looked specifically, but a similar scenario is probably possible with the
drivers for other VMbus devices.  We should work on a generic solution.

Michael

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
@ 2018-05-29  0:19   ` Michael Kelley (EOSG)
  0 siblings, 0 replies; 31+ messages in thread
From: Michael Kelley (EOSG) @ 2018-05-29  0:19 UTC (permalink / raw)
  To: Dexuan Cui, 'Lorenzo Pieralisi', 'Bjorn Helgaas',
	'linux-pci@vger.kernel.org',
	KY Srinivasan, Stephen Hemminger, 'olaf@aepfle.de',
	'apw@canonical.com', 'jasowang@redhat.com'
  Cc: 'linux-kernel@vger.kernel.org',
	'driverdev-devel@linuxdriverproject.org',
	Haiyang Zhang, 'vkuznets@redhat.com',
	'marcelo.cerri@canonical.com'

>=20
> Before the guest finishes the device initialization, the device can be
> removed anytime by the host, and after that the host won't respond to
> the guest's request, so the guest should be prepared to handle this
> case.
>=20
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> Cc: Stephen Hemminger <sthemmin@microsoft.com>
> Cc: K. Y. Srinivasan <kys@microsoft.com>
> ---
>  drivers/pci/host/pci-hyperv.c | 46 ++++++++++++++++++++++++++++++++-----=
------
>  1 file changed, 34 insertions(+), 12 deletions(-)
>=20

While this patch solves the immediate problem of getting hung waiting
for a response from Hyper-V that will never come, there's another scenario
to look at that I think introduces a race.  Suppose the guest VM issues a
vmbus_sendpacket() request in one of the cases covered by this patch,
and suppose that Hyper-V queues a response to the request, and then
immediately follows with a rescind request.   Processing the response will
get queued to a tasklet associated with the channel, while processing the
rescind will get queued to a tasklet associated with the top-level vmbus
connection.   From what I can see, the code doesn't impose any ordering
on processing the two.  If the rescind is processed first, the new
wait_for_response() function may wake up, notice the rescind flag, and
return an error.  Its caller will return an error, and in doing so pop the
completion packet off the stack.   When the response is processed later,
it will try to signal completion via a completion packet that no longer
exists, and memory corruption will likely occur.

Am I missing anything that would prevent this scenario from happening?
It is admittedly low probability, and a solution seems non-trivial.  I have=
n't
looked specifically, but a similar scenario is probably possible with the
drivers for other VMbus devices.  We should work on a generic solution.

Michael

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
@ 2018-05-29  0:19   ` Michael Kelley (EOSG)
  0 siblings, 0 replies; 31+ messages in thread
From: Michael Kelley (EOSG) @ 2018-05-29  0:19 UTC (permalink / raw)
  To: Dexuan Cui, 'Lorenzo Pieralisi', 'Bjorn Helgaas',
	'linux-pci@vger.kernel.org',
	KY Srinivasan, Stephen Hemminger, 'olaf@aepfle.de',
	'apw@canonical.com', 'jasowang@redhat.com'
  Cc: 'marcelo.cerri@canonical.com',
	'vkuznets@redhat.com',
	Haiyang Zhang, 'driverdev-devel@linuxdriverproject.org',
	'linux-kernel@vger.kernel.org'

> 
> Before the guest finishes the device initialization, the device can be
> removed anytime by the host, and after that the host won't respond to
> the guest's request, so the guest should be prepared to handle this
> case.
> 
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> Cc: Stephen Hemminger <sthemmin@microsoft.com>
> Cc: K. Y. Srinivasan <kys@microsoft.com>
> ---
>  drivers/pci/host/pci-hyperv.c | 46 ++++++++++++++++++++++++++++++++-----------
>  1 file changed, 34 insertions(+), 12 deletions(-)
> 

While this patch solves the immediate problem of getting hung waiting
for a response from Hyper-V that will never come, there's another scenario
to look at that I think introduces a race.  Suppose the guest VM issues a
vmbus_sendpacket() request in one of the cases covered by this patch,
and suppose that Hyper-V queues a response to the request, and then
immediately follows with a rescind request.   Processing the response will
get queued to a tasklet associated with the channel, while processing the
rescind will get queued to a tasklet associated with the top-level vmbus
connection.   From what I can see, the code doesn't impose any ordering
on processing the two.  If the rescind is processed first, the new
wait_for_response() function may wake up, notice the rescind flag, and
return an error.  Its caller will return an error, and in doing so pop the
completion packet off the stack.   When the response is processed later,
it will try to signal completion via a completion packet that no longer
exists, and memory corruption will likely occur.

Am I missing anything that would prevent this scenario from happening?
It is admittedly low probability, and a solution seems non-trivial.  I haven't
looked specifically, but a similar scenario is probably possible with the
drivers for other VMbus devices.  We should work on a generic solution.

Michael

_______________________________________________
devel mailing list
devel@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
  2018-05-29  0:19   ` Michael Kelley (EOSG)
  (?)
@ 2018-05-29 19:58     ` Dexuan Cui
  -1 siblings, 0 replies; 31+ messages in thread
From: Dexuan Cui @ 2018-05-29 19:58 UTC (permalink / raw)
  To: Michael Kelley (EOSG), 'Lorenzo Pieralisi',
	'Bjorn Helgaas', 'linux-pci@vger.kernel.org',
	KY Srinivasan, Stephen Hemminger, 'olaf@aepfle.de',
	'apw@canonical.com', 'jasowang@redhat.com'
  Cc: 'linux-kernel@vger.kernel.org',
	'driverdev-devel@linuxdriverproject.org',
	Haiyang Zhang, 'vkuznets@redhat.com',
	'marcelo.cerri@canonical.com'

> From: Michael Kelley (EOSG)
> Sent: Monday, May 28, 2018 17:19
> 
> While this patch solves the immediate problem of getting hung waiting
> for a response from Hyper-V that will never come, there's another scenario
> to look at that I think introduces a race.  Suppose the guest VM issues a
> vmbus_sendpacket() request in one of the cases covered by this patch,
> and suppose that Hyper-V queues a response to the request, and then
> immediately follows with a rescind request.   Processing the response will
> get queued to a tasklet associated with the channel, while processing the
> rescind will get queued to a tasklet associated with the top-level vmbus
> connection.   From what I can see, the code doesn't impose any ordering
> on processing the two.  If the rescind is processed first, the new
> wait_for_response() function may wake up, notice the rescind flag, and
> return an error.  Its caller will return an error, and in doing so pop the
> completion packet off the stack.   When the response is processed later,
> it will try to signal completion via a completion packet that no longer
> exists, and memory corruption will likely occur.
> 
> Am I missing anything that would prevent this scenario from happening?
> It is admittedly low probability, and a solution seems non-trivial.  I haven't
> looked specifically, but a similar scenario is probably possible with the
> drivers for other VMbus devices.  We should work on a generic solution.
> 
> Michael

Thanks for spotting the race! 

IMO we can disable the per-channel tasklet to exclude the race:

--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -565,6 +565,7 @@ static int wait_for_response(struct hv_device *hdev,
 {
        while (true) {
                if (hdev->channel->rescind) {
+                       tasklet_disable(&hdev->channel->callback_event);
                        dev_warn_once(&hdev->device, "The device is gone.\n");
                        return -ENODEV;
                }

This way, when we exit the loop, we're sure hv_pci_onchannelcallback() can not 
run anymore. What do you think of this?


It looks the list of the other vmbus devices that can be hot-removed is:

the hv_utils devices
hv_sock devices
storvsc device
netvsc device

As I checked, the first 3 types of devices don't have this "send a request to the
host and wait for the response forever" pattern. NetVSC should be fixed as it has
the same pattern.

-- Dexuan

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
@ 2018-05-29 19:58     ` Dexuan Cui
  0 siblings, 0 replies; 31+ messages in thread
From: Dexuan Cui @ 2018-05-29 19:58 UTC (permalink / raw)
  To: Michael Kelley (EOSG), 'Lorenzo Pieralisi',
	'Bjorn Helgaas', 'linux-pci@vger.kernel.org',
	KY Srinivasan, Stephen Hemminger, 'olaf@aepfle.de',
	'apw@canonical.com', 'jasowang@redhat.com'
  Cc: 'linux-kernel@vger.kernel.org',
	'driverdev-devel@linuxdriverproject.org',
	Haiyang Zhang, 'vkuznets@redhat.com',
	'marcelo.cerri@canonical.com'

> From: Michael Kelley (EOSG)
> Sent: Monday, May 28, 2018 17:19
>=20
> While this patch solves the immediate problem of getting hung waiting
> for a response from Hyper-V that will never come, there's another scenari=
o
> to look at that I think introduces a race.  Suppose the guest VM issues a
> vmbus_sendpacket() request in one of the cases covered by this patch,
> and suppose that Hyper-V queues a response to the request, and then
> immediately follows with a rescind request.   Processing the response wil=
l
> get queued to a tasklet associated with the channel, while processing the
> rescind will get queued to a tasklet associated with the top-level vmbus
> connection.   From what I can see, the code doesn't impose any ordering
> on processing the two.  If the rescind is processed first, the new
> wait_for_response() function may wake up, notice the rescind flag, and
> return an error.  Its caller will return an error, and in doing so pop th=
e
> completion packet off the stack.   When the response is processed later,
> it will try to signal completion via a completion packet that no longer
> exists, and memory corruption will likely occur.
>=20
> Am I missing anything that would prevent this scenario from happening?
> It is admittedly low probability, and a solution seems non-trivial.  I ha=
ven't
> looked specifically, but a similar scenario is probably possible with the
> drivers for other VMbus devices.  We should work on a generic solution.
>=20
> Michael

Thanks for spotting the race!=20

IMO we can disable the per-channel tasklet to exclude the race:

--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -565,6 +565,7 @@ static int wait_for_response(struct hv_device *hdev,
 {
        while (true) {
                if (hdev->channel->rescind) {
+                       tasklet_disable(&hdev->channel->callback_event);
                        dev_warn_once(&hdev->device, "The device is gone.\n=
");
                        return -ENODEV;
                }

This way, when we exit the loop, we're sure hv_pci_onchannelcallback() can =
not=20
run anymore. What do you think of this?


It looks the list of the other vmbus devices that can be hot-removed is:

the hv_utils devices
hv_sock devices
storvsc device
netvsc device

As I checked, the first 3 types of devices don't have this "send a request =
to the
host and wait for the response forever" pattern. NetVSC should be fixed as =
it has
the same pattern.

-- Dexuan

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
@ 2018-05-29 19:58     ` Dexuan Cui
  0 siblings, 0 replies; 31+ messages in thread
From: Dexuan Cui @ 2018-05-29 19:58 UTC (permalink / raw)
  To: Michael Kelley (EOSG), 'Lorenzo Pieralisi',
	'Bjorn Helgaas', 'linux-pci@vger.kernel.org',
	KY Srinivasan, Stephen Hemminger, 'olaf@aepfle.de',
	'apw@canonical.com', 'jasowang@redhat.com'
  Cc: 'marcelo.cerri@canonical.com',
	'vkuznets@redhat.com',
	Haiyang Zhang, 'driverdev-devel@linuxdriverproject.org',
	'linux-kernel@vger.kernel.org'

> From: Michael Kelley (EOSG)
> Sent: Monday, May 28, 2018 17:19
> 
> While this patch solves the immediate problem of getting hung waiting
> for a response from Hyper-V that will never come, there's another scenario
> to look at that I think introduces a race.  Suppose the guest VM issues a
> vmbus_sendpacket() request in one of the cases covered by this patch,
> and suppose that Hyper-V queues a response to the request, and then
> immediately follows with a rescind request.   Processing the response will
> get queued to a tasklet associated with the channel, while processing the
> rescind will get queued to a tasklet associated with the top-level vmbus
> connection.   From what I can see, the code doesn't impose any ordering
> on processing the two.  If the rescind is processed first, the new
> wait_for_response() function may wake up, notice the rescind flag, and
> return an error.  Its caller will return an error, and in doing so pop the
> completion packet off the stack.   When the response is processed later,
> it will try to signal completion via a completion packet that no longer
> exists, and memory corruption will likely occur.
> 
> Am I missing anything that would prevent this scenario from happening?
> It is admittedly low probability, and a solution seems non-trivial.  I haven't
> looked specifically, but a similar scenario is probably possible with the
> drivers for other VMbus devices.  We should work on a generic solution.
> 
> Michael

Thanks for spotting the race! 

IMO we can disable the per-channel tasklet to exclude the race:

--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -565,6 +565,7 @@ static int wait_for_response(struct hv_device *hdev,
 {
        while (true) {
                if (hdev->channel->rescind) {
+                       tasklet_disable(&hdev->channel->callback_event);
                        dev_warn_once(&hdev->device, "The device is gone.\n");
                        return -ENODEV;
                }

This way, when we exit the loop, we're sure hv_pci_onchannelcallback() can not 
run anymore. What do you think of this?


It looks the list of the other vmbus devices that can be hot-removed is:

the hv_utils devices
hv_sock devices
storvsc device
netvsc device

As I checked, the first 3 types of devices don't have this "send a request to the
host and wait for the response forever" pattern. NetVSC should be fixed as it has
the same pattern.

-- Dexuan
_______________________________________________
devel mailing list
devel@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
  2018-05-23 21:12 ` Dexuan Cui
@ 2018-05-29 21:20   ` Andy Shevchenko
  -1 siblings, 0 replies; 31+ messages in thread
From: Andy Shevchenko @ 2018-05-29 21:20 UTC (permalink / raw)
  To: Dexuan Cui
  Cc: Lorenzo Pieralisi, Bjorn Helgaas, linux-pci, KY Srinivasan,
	Stephen Hemminger, olaf, apw, jasowang, linux-kernel,
	driverdev-devel, Haiyang Zhang, vkuznets, marcelo.cerri

On Thu, May 24, 2018 at 12:12 AM, Dexuan Cui <decui@microsoft.com> wrote:
>
> Before the guest finishes the device initialization, the device can be
> removed anytime by the host, and after that the host won't respond to
> the guest's request, so the guest should be prepared to handle this
> case.

> +       while (true) {
> +               if (hdev->channel->rescind) {
> +                       dev_warn_once(&hdev->device, "The device is gone.\n");
> +                       return -ENODEV;
> +               }
> +
> +               if (wait_for_completion_timeout(comp, HZ / 10))
> +                       break;
> +       }

Infinite loops are usually a red flags.

What's wrong with simple:

do {
  ...
} while (wait_...(...) == 0);

?

> +       if (!ret)
> +               ret = wait_for_response(hdev, &comp);

Better to use well established patterns, i.e.

if (ret)
 return ret;

> +               if (!ret)
> +                       ret = wait_for_response(hdev, &comp_pkt.host_event);

Here it looks okay on the first glance, but better to think about it
again and refactor.

-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
@ 2018-05-29 21:20   ` Andy Shevchenko
  0 siblings, 0 replies; 31+ messages in thread
From: Andy Shevchenko @ 2018-05-29 21:20 UTC (permalink / raw)
  To: Dexuan Cui
  Cc: olaf, Lorenzo Pieralisi, Stephen Hemminger, linux-pci, jasowang,
	driverdev-devel, linux-kernel, apw, marcelo.cerri, Bjorn Helgaas,
	vkuznets, Haiyang Zhang

On Thu, May 24, 2018 at 12:12 AM, Dexuan Cui <decui@microsoft.com> wrote:
>
> Before the guest finishes the device initialization, the device can be
> removed anytime by the host, and after that the host won't respond to
> the guest's request, so the guest should be prepared to handle this
> case.

> +       while (true) {
> +               if (hdev->channel->rescind) {
> +                       dev_warn_once(&hdev->device, "The device is gone.\n");
> +                       return -ENODEV;
> +               }
> +
> +               if (wait_for_completion_timeout(comp, HZ / 10))
> +                       break;
> +       }

Infinite loops are usually a red flags.

What's wrong with simple:

do {
  ...
} while (wait_...(...) == 0);

?

> +       if (!ret)
> +               ret = wait_for_response(hdev, &comp);

Better to use well established patterns, i.e.

if (ret)
 return ret;

> +               if (!ret)
> +                       ret = wait_for_response(hdev, &comp_pkt.host_event);

Here it looks okay on the first glance, but better to think about it
again and refactor.

-- 
With Best Regards,
Andy Shevchenko
_______________________________________________
devel mailing list
devel@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
  2018-05-29 21:20   ` Andy Shevchenko
  (?)
@ 2018-05-29 21:28     ` Dexuan Cui
  -1 siblings, 0 replies; 31+ messages in thread
From: Dexuan Cui @ 2018-05-29 21:28 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: Lorenzo Pieralisi, Bjorn Helgaas, linux-pci, KY Srinivasan,
	Stephen Hemminger, olaf, apw, jasowang, linux-kernel,
	driverdev-devel, Haiyang Zhang, vkuznets, marcelo.cerri

> From: Andy Shevchenko <andy.shevchenko@gmail.com>
> Sent: Tuesday, May 29, 2018 14:21
> On Thu, May 24, 2018 at 12:12 AM, Dexuan Cui <decui@microsoft.com>
> wrote:
> >
> > Before the guest finishes the device initialization, the device can be
> > removed anytime by the host, and after that the host won't respond to
> > the guest's request, so the guest should be prepared to handle this
> > case.
> 
> > +       while (true) {
> > +               if (hdev->channel->rescind) {
> > +                       dev_warn_once(&hdev->device, "The device is
> gone.\n");
> > +                       return -ENODEV;
> > +               }
> > +
> > +               if (wait_for_completion_timeout(comp, HZ / 10))
> > +                       break;
> > +       }
> 
> Infinite loops are usually a red flags.
> 
> What's wrong with simple:
> 
> do {
>   ...
> } while (wait_...(...) == 0);
> 
> ?
Thanks for the suggestion, Andy!
The coding style you suggested looks better to me. :-)

> > +       if (!ret)
> > +               ret = wait_for_response(hdev, &comp);
> 
> Better to use well established patterns, i.e.
> 
> if (ret)
>  return ret;
Agreed.

> 
> > +               if (!ret)
> > +                       ret = wait_for_response(hdev,
> &comp_pkt.host_event);
> 
> Here it looks okay on the first glance, but better to think about it
> again and refactor.

> With Best Regards,
> Andy Shevchenko

I'll try to send out a patch to improve the coding style, after I address
Michael Kelley's concern of a race.

Thanks,
-- Dexuan

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
@ 2018-05-29 21:28     ` Dexuan Cui
  0 siblings, 0 replies; 31+ messages in thread
From: Dexuan Cui @ 2018-05-29 21:28 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: Lorenzo Pieralisi, Bjorn Helgaas, linux-pci, KY Srinivasan,
	Stephen Hemminger, olaf, apw, jasowang, linux-kernel,
	driverdev-devel, Haiyang Zhang, vkuznets, marcelo.cerri

PiBGcm9tOiBBbmR5IFNoZXZjaGVua28gPGFuZHkuc2hldmNoZW5rb0BnbWFpbC5jb20+DQo+IFNl
bnQ6IFR1ZXNkYXksIE1heSAyOSwgMjAxOCAxNDoyMQ0KPiBPbiBUaHUsIE1heSAyNCwgMjAxOCBh
dCAxMjoxMiBBTSwgRGV4dWFuIEN1aSA8ZGVjdWlAbWljcm9zb2Z0LmNvbT4NCj4gd3JvdGU6DQo+
ID4NCj4gPiBCZWZvcmUgdGhlIGd1ZXN0IGZpbmlzaGVzIHRoZSBkZXZpY2UgaW5pdGlhbGl6YXRp
b24sIHRoZSBkZXZpY2UgY2FuIGJlDQo+ID4gcmVtb3ZlZCBhbnl0aW1lIGJ5IHRoZSBob3N0LCBh
bmQgYWZ0ZXIgdGhhdCB0aGUgaG9zdCB3b24ndCByZXNwb25kIHRvDQo+ID4gdGhlIGd1ZXN0J3Mg
cmVxdWVzdCwgc28gdGhlIGd1ZXN0IHNob3VsZCBiZSBwcmVwYXJlZCB0byBoYW5kbGUgdGhpcw0K
PiA+IGNhc2UuDQo+IA0KPiA+ICsgICAgICAgd2hpbGUgKHRydWUpIHsNCj4gPiArICAgICAgICAg
ICAgICAgaWYgKGhkZXYtPmNoYW5uZWwtPnJlc2NpbmQpIHsNCj4gPiArICAgICAgICAgICAgICAg
ICAgICAgICBkZXZfd2Fybl9vbmNlKCZoZGV2LT5kZXZpY2UsICJUaGUgZGV2aWNlIGlzDQo+IGdv
bmUuXG4iKTsNCj4gPiArICAgICAgICAgICAgICAgICAgICAgICByZXR1cm4gLUVOT0RFVjsNCj4g
PiArICAgICAgICAgICAgICAgfQ0KPiA+ICsNCj4gPiArICAgICAgICAgICAgICAgaWYgKHdhaXRf
Zm9yX2NvbXBsZXRpb25fdGltZW91dChjb21wLCBIWiAvIDEwKSkNCj4gPiArICAgICAgICAgICAg
ICAgICAgICAgICBicmVhazsNCj4gPiArICAgICAgIH0NCj4gDQo+IEluZmluaXRlIGxvb3BzIGFy
ZSB1c3VhbGx5IGEgcmVkIGZsYWdzLg0KPiANCj4gV2hhdCdzIHdyb25nIHdpdGggc2ltcGxlOg0K
PiANCj4gZG8gew0KPiAgIC4uLg0KPiB9IHdoaWxlICh3YWl0Xy4uLiguLi4pID09IDApOw0KPiAN
Cj4gPw0KVGhhbmtzIGZvciB0aGUgc3VnZ2VzdGlvbiwgQW5keSENClRoZSBjb2Rpbmcgc3R5bGUg
eW91IHN1Z2dlc3RlZCBsb29rcyBiZXR0ZXIgdG8gbWUuIDotKQ0KDQo+ID4gKyAgICAgICBpZiAo
IXJldCkNCj4gPiArICAgICAgICAgICAgICAgcmV0ID0gd2FpdF9mb3JfcmVzcG9uc2UoaGRldiwg
JmNvbXApOw0KPiANCj4gQmV0dGVyIHRvIHVzZSB3ZWxsIGVzdGFibGlzaGVkIHBhdHRlcm5zLCBp
LmUuDQo+IA0KPiBpZiAocmV0KQ0KPiAgcmV0dXJuIHJldDsNCkFncmVlZC4NCg0KPiANCj4gPiAr
ICAgICAgICAgICAgICAgaWYgKCFyZXQpDQo+ID4gKyAgICAgICAgICAgICAgICAgICAgICAgcmV0
ID0gd2FpdF9mb3JfcmVzcG9uc2UoaGRldiwNCj4gJmNvbXBfcGt0Lmhvc3RfZXZlbnQpOw0KPiAN
Cj4gSGVyZSBpdCBsb29rcyBva2F5IG9uIHRoZSBmaXJzdCBnbGFuY2UsIGJ1dCBiZXR0ZXIgdG8g
dGhpbmsgYWJvdXQgaXQNCj4gYWdhaW4gYW5kIHJlZmFjdG9yLg0KDQo+IFdpdGggQmVzdCBSZWdh
cmRzLA0KPiBBbmR5IFNoZXZjaGVua28NCg0KSSdsbCB0cnkgdG8gc2VuZCBvdXQgYSBwYXRjaCB0
byBpbXByb3ZlIHRoZSBjb2Rpbmcgc3R5bGUsIGFmdGVyIEkgYWRkcmVzcw0KTWljaGFlbCBLZWxs
ZXkncyBjb25jZXJuIG9mIGEgcmFjZS4NCg0KVGhhbmtzLA0KLS0gRGV4dWFuDQo=

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
@ 2018-05-29 21:28     ` Dexuan Cui
  0 siblings, 0 replies; 31+ messages in thread
From: Dexuan Cui @ 2018-05-29 21:28 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: olaf, Lorenzo Pieralisi, Stephen Hemminger, linux-pci, jasowang,
	driverdev-devel, linux-kernel, apw, marcelo.cerri, Bjorn Helgaas,
	vkuznets, Haiyang Zhang

> From: Andy Shevchenko <andy.shevchenko@gmail.com>
> Sent: Tuesday, May 29, 2018 14:21
> On Thu, May 24, 2018 at 12:12 AM, Dexuan Cui <decui@microsoft.com>
> wrote:
> >
> > Before the guest finishes the device initialization, the device can be
> > removed anytime by the host, and after that the host won't respond to
> > the guest's request, so the guest should be prepared to handle this
> > case.
> 
> > +       while (true) {
> > +               if (hdev->channel->rescind) {
> > +                       dev_warn_once(&hdev->device, "The device is
> gone.\n");
> > +                       return -ENODEV;
> > +               }
> > +
> > +               if (wait_for_completion_timeout(comp, HZ / 10))
> > +                       break;
> > +       }
> 
> Infinite loops are usually a red flags.
> 
> What's wrong with simple:
> 
> do {
>   ...
> } while (wait_...(...) == 0);
> 
> ?
Thanks for the suggestion, Andy!
The coding style you suggested looks better to me. :-)

> > +       if (!ret)
> > +               ret = wait_for_response(hdev, &comp);
> 
> Better to use well established patterns, i.e.
> 
> if (ret)
>  return ret;
Agreed.

> 
> > +               if (!ret)
> > +                       ret = wait_for_response(hdev,
> &comp_pkt.host_event);
> 
> Here it looks okay on the first glance, but better to think about it
> again and refactor.

> With Best Regards,
> Andy Shevchenko

I'll try to send out a patch to improve the coding style, after I address
Michael Kelley's concern of a race.

Thanks,
-- Dexuan
_______________________________________________
devel mailing list
devel@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
  2018-05-29 19:58     ` Dexuan Cui
  (?)
@ 2018-05-31 16:40       ` Michael Kelley (EOSG)
  -1 siblings, 0 replies; 31+ messages in thread
From: Michael Kelley (EOSG) @ 2018-05-31 16:40 UTC (permalink / raw)
  To: Dexuan Cui, 'Lorenzo Pieralisi', 'Bjorn Helgaas',
	'linux-pci@vger.kernel.org',
	KY Srinivasan, Stephen Hemminger, 'olaf@aepfle.de',
	'apw@canonical.com', 'jasowang@redhat.com'
  Cc: 'linux-kernel@vger.kernel.org',
	'driverdev-devel@linuxdriverproject.org',
	Haiyang Zhang, 'vkuznets@redhat.com',
	'marcelo.cerri@canonical.com'

> From: Dexuan Cui
> Sent: Tuesday, May 29, 2018 12:58 PM
> Subject: RE: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
> 
> > From: Michael Kelley (EOSG)
> > Sent: Monday, May 28, 2018 17:19
> >
> > While this patch solves the immediate problem of getting hung waiting
> > for a response from Hyper-V that will never come, there's another scenario
> > to look at that I think introduces a race.  Suppose the guest VM issues a
> > vmbus_sendpacket() request in one of the cases covered by this patch,
> > and suppose that Hyper-V queues a response to the request, and then
> > immediately follows with a rescind request.   Processing the response will
> > get queued to a tasklet associated with the channel, while processing the
> > rescind will get queued to a tasklet associated with the top-level vmbus
> > connection.   From what I can see, the code doesn't impose any ordering
> > on processing the two.  If the rescind is processed first, the new
> > wait_for_response() function may wake up, notice the rescind flag, and
> > return an error.  Its caller will return an error, and in doing so pop the
> > completion packet off the stack.   When the response is processed later,
> > it will try to signal completion via a completion packet that no longer
> > exists, and memory corruption will likely occur.
> >
> > Am I missing anything that would prevent this scenario from happening?
> > It is admittedly low probability, and a solution seems non-trivial.  I haven't
> > looked specifically, but a similar scenario is probably possible with the
> > drivers for other VMbus devices.  We should work on a generic solution.
> >
> > Michael
> 
> Thanks for spotting the race!
> 
> IMO we can disable the per-channel tasklet to exclude the race:
> 
> --- a/drivers/pci/host/pci-hyperv.c
> +++ b/drivers/pci/host/pci-hyperv.c
> @@ -565,6 +565,7 @@ static int wait_for_response(struct hv_device *hdev,
>  {
>         while (true) {
>                 if (hdev->channel->rescind) {
> +                       tasklet_disable(&hdev->channel->callback_event);
>                         dev_warn_once(&hdev->device, "The device is gone.\n");
>                         return -ENODEV;
>                 }
> 
> This way, when we exit the loop, we're sure hv_pci_onchannelcallback() can not
> run anymore. What do you think of this?

I've stared at this and the tasklet code over a couple of days now.  Adding the
call to tasklet_disable() solves the immediate problem of preventing 
hv_pci_onchannelcallback() from calling complete() against a completion packet
that has been popped off the stack.  But in doing so, it simply pushes the core
problem further down the road and leaves it unresolved.

tasklet_disable() does not prevent the tasklet from being scheduled.  So if there
is a response from Hyper-V to the original message, the tasklet still gets
scheduled.  Because it is disabled, it will sit in the tasklet queue and be skipped
over each time the queue is processed.  Later,  when the channel is eventually
deleted in free_channel(), tasklet_kill() is called.  Unfortunately, tasklet_kill()
will get stuck in an infinite loop, waiting for the tasklet to run.   There aren't
any tasklet interfaces to dequeue an already scheduled tasklet.

Please double-check my reasoning.  To solve this problem, I think the VMbus
driver code will need some additional synchronization between rescind
notifications and a response, which may or may not have been sent, and
which could be processed after the rescind.  I haven't yet thought about
what this synchronization might need to look like.

Michael

> 
> 
> It looks the list of the other vmbus devices that can be hot-removed is:
> 
> the hv_utils devices
> hv_sock devices
> storvsc device
> netvsc device
> 
> As I checked, the first 3 types of devices don't have this "send a request to the
> host and wait for the response forever" pattern. NetVSC should be fixed as it has
> the same pattern.
> 
> -- Dexuan

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
@ 2018-05-31 16:40       ` Michael Kelley (EOSG)
  0 siblings, 0 replies; 31+ messages in thread
From: Michael Kelley (EOSG) @ 2018-05-31 16:40 UTC (permalink / raw)
  To: Dexuan Cui, 'Lorenzo Pieralisi', 'Bjorn Helgaas',
	'linux-pci@vger.kernel.org',
	KY Srinivasan, Stephen Hemminger, 'olaf@aepfle.de',
	'apw@canonical.com', 'jasowang@redhat.com'
  Cc: 'linux-kernel@vger.kernel.org',
	'driverdev-devel@linuxdriverproject.org',
	Haiyang Zhang, 'vkuznets@redhat.com',
	'marcelo.cerri@canonical.com'

> From: Dexuan Cui
> Sent: Tuesday, May 29, 2018 12:58 PM
> Subject: RE: [PATCH] PCI: hv: Do not wait forever on a device that has di=
sappeared
>=20
> > From: Michael Kelley (EOSG)
> > Sent: Monday, May 28, 2018 17:19
> >
> > While this patch solves the immediate problem of getting hung waiting
> > for a response from Hyper-V that will never come, there's another scena=
rio
> > to look at that I think introduces a race.  Suppose the guest VM issues=
 a
> > vmbus_sendpacket() request in one of the cases covered by this patch,
> > and suppose that Hyper-V queues a response to the request, and then
> > immediately follows with a rescind request.   Processing the response w=
ill
> > get queued to a tasklet associated with the channel, while processing t=
he
> > rescind will get queued to a tasklet associated with the top-level vmbu=
s
> > connection.   From what I can see, the code doesn't impose any ordering
> > on processing the two.  If the rescind is processed first, the new
> > wait_for_response() function may wake up, notice the rescind flag, and
> > return an error.  Its caller will return an error, and in doing so pop =
the
> > completion packet off the stack.   When the response is processed later=
,
> > it will try to signal completion via a completion packet that no longer
> > exists, and memory corruption will likely occur.
> >
> > Am I missing anything that would prevent this scenario from happening?
> > It is admittedly low probability, and a solution seems non-trivial.  I =
haven't
> > looked specifically, but a similar scenario is probably possible with t=
he
> > drivers for other VMbus devices.  We should work on a generic solution.
> >
> > Michael
>=20
> Thanks for spotting the race!
>=20
> IMO we can disable the per-channel tasklet to exclude the race:
>=20
> --- a/drivers/pci/host/pci-hyperv.c
> +++ b/drivers/pci/host/pci-hyperv.c
> @@ -565,6 +565,7 @@ static int wait_for_response(struct hv_device *hdev,
>  {
>         while (true) {
>                 if (hdev->channel->rescind) {
> +                       tasklet_disable(&hdev->channel->callback_event);
>                         dev_warn_once(&hdev->device, "The device is gone.=
\n");
>                         return -ENODEV;
>                 }
>=20
> This way, when we exit the loop, we're sure hv_pci_onchannelcallback() ca=
n not
> run anymore. What do you think of this?

I've stared at this and the tasklet code over a couple of days now.  Adding=
 the
call to tasklet_disable() solves the immediate problem of preventing=20
hv_pci_onchannelcallback() from calling complete() against a completion pac=
ket
that has been popped off the stack.  But in doing so, it simply pushes the =
core
problem further down the road and leaves it unresolved.

tasklet_disable() does not prevent the tasklet from being scheduled.  So if=
 there
is a response from Hyper-V to the original message, the tasklet still gets
scheduled.  Because it is disabled, it will sit in the tasklet queue and be=
 skipped
over each time the queue is processed.  Later,  when the channel is eventua=
lly
deleted in free_channel(), tasklet_kill() is called.  Unfortunately, taskle=
t_kill()
will get stuck in an infinite loop, waiting for the tasklet to run.   There=
 aren't
any tasklet interfaces to dequeue an already scheduled tasklet.

Please double-check my reasoning.  To solve this problem, I think the VMbus
driver code will need some additional synchronization between rescind
notifications and a response, which may or may not have been sent, and
which could be processed after the rescind.  I haven't yet thought about
what this synchronization might need to look like.

Michael

>=20
>=20
> It looks the list of the other vmbus devices that can be hot-removed is:
>=20
> the hv_utils devices
> hv_sock devices
> storvsc device
> netvsc device
>=20
> As I checked, the first 3 types of devices don't have this "send a reques=
t to the
> host and wait for the response forever" pattern. NetVSC should be fixed a=
s it has
> the same pattern.
>=20
> -- Dexuan

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
@ 2018-05-31 16:40       ` Michael Kelley (EOSG)
  0 siblings, 0 replies; 31+ messages in thread
From: Michael Kelley (EOSG) @ 2018-05-31 16:40 UTC (permalink / raw)
  To: Dexuan Cui, 'Lorenzo Pieralisi', 'Bjorn Helgaas',
	'linux-pci@vger.kernel.org',
	KY Srinivasan, Stephen Hemminger, 'olaf@aepfle.de',
	'apw@canonical.com', 'jasowang@redhat.com'
  Cc: 'marcelo.cerri@canonical.com',
	'vkuznets@redhat.com',
	Haiyang Zhang, 'driverdev-devel@linuxdriverproject.org',
	'linux-kernel@vger.kernel.org'

> From: Dexuan Cui
> Sent: Tuesday, May 29, 2018 12:58 PM
> Subject: RE: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
> 
> > From: Michael Kelley (EOSG)
> > Sent: Monday, May 28, 2018 17:19
> >
> > While this patch solves the immediate problem of getting hung waiting
> > for a response from Hyper-V that will never come, there's another scenario
> > to look at that I think introduces a race.  Suppose the guest VM issues a
> > vmbus_sendpacket() request in one of the cases covered by this patch,
> > and suppose that Hyper-V queues a response to the request, and then
> > immediately follows with a rescind request.   Processing the response will
> > get queued to a tasklet associated with the channel, while processing the
> > rescind will get queued to a tasklet associated with the top-level vmbus
> > connection.   From what I can see, the code doesn't impose any ordering
> > on processing the two.  If the rescind is processed first, the new
> > wait_for_response() function may wake up, notice the rescind flag, and
> > return an error.  Its caller will return an error, and in doing so pop the
> > completion packet off the stack.   When the response is processed later,
> > it will try to signal completion via a completion packet that no longer
> > exists, and memory corruption will likely occur.
> >
> > Am I missing anything that would prevent this scenario from happening?
> > It is admittedly low probability, and a solution seems non-trivial.  I haven't
> > looked specifically, but a similar scenario is probably possible with the
> > drivers for other VMbus devices.  We should work on a generic solution.
> >
> > Michael
> 
> Thanks for spotting the race!
> 
> IMO we can disable the per-channel tasklet to exclude the race:
> 
> --- a/drivers/pci/host/pci-hyperv.c
> +++ b/drivers/pci/host/pci-hyperv.c
> @@ -565,6 +565,7 @@ static int wait_for_response(struct hv_device *hdev,
>  {
>         while (true) {
>                 if (hdev->channel->rescind) {
> +                       tasklet_disable(&hdev->channel->callback_event);
>                         dev_warn_once(&hdev->device, "The device is gone.\n");
>                         return -ENODEV;
>                 }
> 
> This way, when we exit the loop, we're sure hv_pci_onchannelcallback() can not
> run anymore. What do you think of this?

I've stared at this and the tasklet code over a couple of days now.  Adding the
call to tasklet_disable() solves the immediate problem of preventing 
hv_pci_onchannelcallback() from calling complete() against a completion packet
that has been popped off the stack.  But in doing so, it simply pushes the core
problem further down the road and leaves it unresolved.

tasklet_disable() does not prevent the tasklet from being scheduled.  So if there
is a response from Hyper-V to the original message, the tasklet still gets
scheduled.  Because it is disabled, it will sit in the tasklet queue and be skipped
over each time the queue is processed.  Later,  when the channel is eventually
deleted in free_channel(), tasklet_kill() is called.  Unfortunately, tasklet_kill()
will get stuck in an infinite loop, waiting for the tasklet to run.   There aren't
any tasklet interfaces to dequeue an already scheduled tasklet.

Please double-check my reasoning.  To solve this problem, I think the VMbus
driver code will need some additional synchronization between rescind
notifications and a response, which may or may not have been sent, and
which could be processed after the rescind.  I haven't yet thought about
what this synchronization might need to look like.

Michael

> 
> 
> It looks the list of the other vmbus devices that can be hot-removed is:
> 
> the hv_utils devices
> hv_sock devices
> storvsc device
> netvsc device
> 
> As I checked, the first 3 types of devices don't have this "send a request to the
> host and wait for the response forever" pattern. NetVSC should be fixed as it has
> the same pattern.
> 
> -- Dexuan
_______________________________________________
devel mailing list
devel@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
  2018-05-31 16:40       ` Michael Kelley (EOSG)
  (?)
@ 2018-05-31 21:01         ` Dexuan Cui
  -1 siblings, 0 replies; 31+ messages in thread
From: Dexuan Cui @ 2018-05-31 21:01 UTC (permalink / raw)
  To: Michael Kelley (EOSG), 'Lorenzo Pieralisi',
	'Bjorn Helgaas', 'linux-pci@vger.kernel.org',
	KY Srinivasan, Stephen Hemminger, 'olaf@aepfle.de',
	'apw@canonical.com', 'jasowang@redhat.com'
  Cc: 'linux-kernel@vger.kernel.org',
	'driverdev-devel@linuxdriverproject.org',
	Haiyang Zhang, 'vkuznets@redhat.com',
	'marcelo.cerri@canonical.com'

> From: Michael Kelley (EOSG)
> Sent: Thursday, May 31, 2018 09:41
> >
> > IMO we can disable the per-channel tasklet to exclude the race:
> > This way, when we exit the loop, we're sure hv_pci_onchannelcallback() can
> > not run anymore. What do you think of this?
> 
> I've stared at this and the tasklet code over a couple of days now.  Adding the
> call to tasklet_disable() solves the immediate problem of preventing
> hv_pci_onchannelcallback() from calling complete() against a completion
> packet
> that has been popped off the stack.  But in doing so, it simply pushes the core
> problem further down the road and leaves it unresolved.
> 
> tasklet_disable() does not prevent the tasklet from being scheduled.  So if
> there
> is a response from Hyper-V to the original message, the tasklet still gets
> scheduled.  Because it is disabled, it will sit in the tasklet queue and be
> skipped
> over each time the queue is processed.  Later,  when the channel is
> eventually
> deleted in free_channel(), tasklet_kill() is called.  Unfortunately, tasklet_kill()
> will get stuck in an infinite loop, waiting for the tasklet to run.   There aren't
> any tasklet interfaces to dequeue an already scheduled tasklet.

I think you're correct.
 
> Please double-check my reasoning.  To solve this problem, I think the VMbus
> driver code will need some additional synchronization between rescind
> notifications and a response, which may or may not have been sent, and
> which could be processed after the rescind.  I haven't yet thought about
> what this synchronization might need to look like.
> 
> Michael

Yes, it looks the VMBus driver needs to provide an API to cope with this.
I'll try to further investigate the issue.

-- Dexuan

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
@ 2018-05-31 21:01         ` Dexuan Cui
  0 siblings, 0 replies; 31+ messages in thread
From: Dexuan Cui @ 2018-05-31 21:01 UTC (permalink / raw)
  To: Michael Kelley (EOSG), 'Lorenzo Pieralisi',
	'Bjorn Helgaas', 'linux-pci@vger.kernel.org',
	KY Srinivasan, Stephen Hemminger, 'olaf@aepfle.de',
	'apw@canonical.com', 'jasowang@redhat.com'
  Cc: 'linux-kernel@vger.kernel.org',
	'driverdev-devel@linuxdriverproject.org',
	Haiyang Zhang, 'vkuznets@redhat.com',
	'marcelo.cerri@canonical.com'

> From: Michael Kelley (EOSG)
> Sent: Thursday, May 31, 2018 09:41
> >
> > IMO we can disable the per-channel tasklet to exclude the race:
> > This way, when we exit the loop, we're sure hv_pci_onchannelcallback() =
can
> > not run anymore. What do you think of this?
>=20
> I've stared at this and the tasklet code over a couple of days now.  Addi=
ng the
> call to tasklet_disable() solves the immediate problem of preventing
> hv_pci_onchannelcallback() from calling complete() against a completion
> packet
> that has been popped off the stack.  But in doing so, it simply pushes th=
e core
> problem further down the road and leaves it unresolved.
>=20
> tasklet_disable() does not prevent the tasklet from being scheduled.  So =
if
> there
> is a response from Hyper-V to the original message, the tasklet still get=
s
> scheduled.  Because it is disabled, it will sit in the tasklet queue and =
be
> skipped
> over each time the queue is processed.  Later,  when the channel is
> eventually
> deleted in free_channel(), tasklet_kill() is called.  Unfortunately, task=
let_kill()
> will get stuck in an infinite loop, waiting for the tasklet to run.   The=
re aren't
> any tasklet interfaces to dequeue an already scheduled tasklet.

I think you're correct.
=20
> Please double-check my reasoning.  To solve this problem, I think the VMb=
us
> driver code will need some additional synchronization between rescind
> notifications and a response, which may or may not have been sent, and
> which could be processed after the rescind.  I haven't yet thought about
> what this synchronization might need to look like.
>=20
> Michael

Yes, it looks the VMBus driver needs to provide an API to cope with this.
I'll try to further investigate the issue.

-- Dexuan

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: [PATCH] PCI: hv: Do not wait forever on a device that has disappeared
@ 2018-05-31 21:01         ` Dexuan Cui
  0 siblings, 0 replies; 31+ messages in thread
From: Dexuan Cui @ 2018-05-31 21:01 UTC (permalink / raw)
  To: Michael Kelley (EOSG), 'Lorenzo Pieralisi',
	'Bjorn Helgaas', 'linux-pci@vger.kernel.org',
	KY Srinivasan, Stephen Hemminger, 'olaf@aepfle.de',
	'apw@canonical.com', 'jasowang@redhat.com'
  Cc: 'marcelo.cerri@canonical.com',
	'vkuznets@redhat.com',
	Haiyang Zhang, 'driverdev-devel@linuxdriverproject.org',
	'linux-kernel@vger.kernel.org'

> From: Michael Kelley (EOSG)
> Sent: Thursday, May 31, 2018 09:41
> >
> > IMO we can disable the per-channel tasklet to exclude the race:
> > This way, when we exit the loop, we're sure hv_pci_onchannelcallback() can
> > not run anymore. What do you think of this?
> 
> I've stared at this and the tasklet code over a couple of days now.  Adding the
> call to tasklet_disable() solves the immediate problem of preventing
> hv_pci_onchannelcallback() from calling complete() against a completion
> packet
> that has been popped off the stack.  But in doing so, it simply pushes the core
> problem further down the road and leaves it unresolved.
> 
> tasklet_disable() does not prevent the tasklet from being scheduled.  So if
> there
> is a response from Hyper-V to the original message, the tasklet still gets
> scheduled.  Because it is disabled, it will sit in the tasklet queue and be
> skipped
> over each time the queue is processed.  Later,  when the channel is
> eventually
> deleted in free_channel(), tasklet_kill() is called.  Unfortunately, tasklet_kill()
> will get stuck in an infinite loop, waiting for the tasklet to run.   There aren't
> any tasklet interfaces to dequeue an already scheduled tasklet.

I think you're correct.
 
> Please double-check my reasoning.  To solve this problem, I think the VMbus
> driver code will need some additional synchronization between rescind
> notifications and a response, which may or may not have been sent, and
> which could be processed after the rescind.  I haven't yet thought about
> what this synchronization might need to look like.
> 
> Michael

Yes, it looks the VMBus driver needs to provide an API to cope with this.
I'll try to further investigate the issue.

-- Dexuan
_______________________________________________
devel mailing list
devel@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel

^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2018-05-31 21:16 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-23 21:12 [PATCH] PCI: hv: Do not wait forever on a device that has disappeared Dexuan Cui
2018-05-23 21:12 ` Dexuan Cui
2018-05-24 12:41 ` Lorenzo Pieralisi
2018-05-24 12:41   ` Lorenzo Pieralisi
2018-05-24 23:55   ` Dexuan Cui
2018-05-24 23:55     ` Dexuan Cui
2018-05-24 23:55     ` Dexuan Cui
2018-05-25 10:29     ` Lorenzo Pieralisi
2018-05-25 10:29       ` Lorenzo Pieralisi
2018-05-25 11:43 ` Haiyang Zhang
2018-05-25 11:43   ` Haiyang Zhang
2018-05-25 11:43   ` Haiyang Zhang
2018-05-25 13:56 ` Lorenzo Pieralisi
2018-05-25 13:56   ` Lorenzo Pieralisi
2018-05-29  0:19 ` Michael Kelley (EOSG)
2018-05-29  0:19   ` Michael Kelley (EOSG)
2018-05-29  0:19   ` Michael Kelley (EOSG)
2018-05-29 19:58   ` Dexuan Cui
2018-05-29 19:58     ` Dexuan Cui
2018-05-29 19:58     ` Dexuan Cui
2018-05-31 16:40     ` Michael Kelley (EOSG)
2018-05-31 16:40       ` Michael Kelley (EOSG)
2018-05-31 16:40       ` Michael Kelley (EOSG)
2018-05-31 21:01       ` Dexuan Cui
2018-05-31 21:01         ` Dexuan Cui
2018-05-31 21:01         ` Dexuan Cui
2018-05-29 21:20 ` Andy Shevchenko
2018-05-29 21:20   ` Andy Shevchenko
2018-05-29 21:28   ` Dexuan Cui
2018-05-29 21:28     ` Dexuan Cui
2018-05-29 21:28     ` Dexuan Cui

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.