Linux-HyperV Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH v3 0/4] Enhance pci-hyperv to support hibernation, and 2 misc fixes
@ 2019-11-25  5:33 Dexuan Cui
  2019-11-25  5:33 ` [PATCH v3 1/4] PCI: hv: Reorganize the code in preparation of hibernation Dexuan Cui
                   ` (4 more replies)
  0 siblings, 5 replies; 8+ messages in thread
From: Dexuan Cui @ 2019-11-25  5:33 UTC (permalink / raw)
  To: kys, haiyangz, sthemmin, sashal, lorenzo.pieralisi, bhelgaas,
	linux-hyperv, linux-pci, linux-kernel, mikelley, Alexander.Levin
  Cc: Dexuan Cui

I suggest the patchset goes through the pci.git tree.

Patch #1: no functional change.
Patch #2 enhances the pci-hyperv driver to support hibernation.
Patch #3 is unrelated to hibernation.
Patch #4 is unrelated to hibernation.

Changes in v3:
Patch #1: Added Michael Kelley's Signed-off-by.
Patch #2: Used a better commit log message from Michael Kelley.
Patch #3: Added Michael Kelley's Signed-off-by.
Patch #4: Used kzalloc() rather than get_zeroed_page()/kmemleak_alloc()/
          kmemleak_free(), and added the necessary comments.

Michael, can you please review #2 and #4 again?

Dexuan Cui (4):
  PCI: hv: Reorganize the code in preparation of hibernation
  PCI: hv: Add the support of hibernation
  PCI: hv: Change pci_protocol_version to per-hbus
  PCI: hv: Avoid a kmemleak false positive caused by the hbus buffer

 drivers/pci/controller/pci-hyperv.c | 208 ++++++++++++++++++++++++----
 1 file changed, 179 insertions(+), 29 deletions(-)

-- 
2.19.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v3 1/4] PCI: hv: Reorganize the code in preparation of hibernation
  2019-11-25  5:33 [PATCH v3 0/4] Enhance pci-hyperv to support hibernation, and 2 misc fixes Dexuan Cui
@ 2019-11-25  5:33 ` Dexuan Cui
  2019-11-25  5:33 ` [PATCH v3 2/4] PCI: hv: Add the support " Dexuan Cui
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 8+ messages in thread
From: Dexuan Cui @ 2019-11-25  5:33 UTC (permalink / raw)
  To: kys, haiyangz, sthemmin, sashal, lorenzo.pieralisi, bhelgaas,
	linux-hyperv, linux-pci, linux-kernel, mikelley, Alexander.Levin
  Cc: Dexuan Cui

There is no functional change. This is just preparatory to a later
patch which adds the hibernation support for the pci-hyperv driver.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
---
 drivers/pci/controller/pci-hyperv.c | 43 +++++++++++++++++++----------
 1 file changed, 28 insertions(+), 15 deletions(-)

diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index f1f300218fab..65f18f840ce9 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -2379,7 +2379,9 @@ static void hv_pci_onchannelcallback(void *context)
  * failing if the host doesn't support the necessary protocol
  * level.
  */
-static int hv_pci_protocol_negotiation(struct hv_device *hdev)
+static int hv_pci_protocol_negotiation(struct hv_device *hdev,
+				       enum pci_protocol_version_t version[],
+				       int num_version)
 {
 	struct pci_version_request *version_req;
 	struct hv_pci_compl comp_pkt;
@@ -2403,8 +2405,8 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev)
 	version_req = (struct pci_version_request *)&pkt->message;
 	version_req->message_type.type = PCI_QUERY_PROTOCOL_VERSION;
 
-	for (i = 0; i < ARRAY_SIZE(pci_protocol_versions); i++) {
-		version_req->protocol_version = pci_protocol_versions[i];
+	for (i = 0; i < num_version; i++) {
+		version_req->protocol_version = version[i];
 		ret = vmbus_sendpacket(hdev->channel, version_req,
 				sizeof(struct pci_version_request),
 				(unsigned long)pkt, VM_PKT_DATA_INBAND,
@@ -2420,7 +2422,7 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev)
 		}
 
 		if (comp_pkt.completion_status >= 0) {
-			pci_protocol_version = pci_protocol_versions[i];
+			pci_protocol_version = version[i];
 			dev_info(&hdev->device,
 				"PCI VMBus probing: Using version %#x\n",
 				pci_protocol_version);
@@ -2930,7 +2932,8 @@ static int hv_pci_probe(struct hv_device *hdev,
 
 	hv_set_drvdata(hdev, hbus);
 
-	ret = hv_pci_protocol_negotiation(hdev);
+	ret = hv_pci_protocol_negotiation(hdev, pci_protocol_versions,
+					  ARRAY_SIZE(pci_protocol_versions));
 	if (ret)
 		goto close;
 
@@ -3011,7 +3014,7 @@ static int hv_pci_probe(struct hv_device *hdev,
 	return ret;
 }
 
-static void hv_pci_bus_exit(struct hv_device *hdev)
+static int hv_pci_bus_exit(struct hv_device *hdev, bool hibernating)
 {
 	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
 	struct {
@@ -3027,16 +3030,20 @@ static void hv_pci_bus_exit(struct hv_device *hdev)
 	 * access the per-channel ringbuffer any longer.
 	 */
 	if (hdev->channel->rescind)
-		return;
+		return 0;
 
-	/* Delete any children which might still exist. */
-	memset(&relations, 0, sizeof(relations));
-	hv_pci_devices_present(hbus, &relations);
+	if (!hibernating) {
+		/* Delete any children which might still exist. */
+		memset(&relations, 0, sizeof(relations));
+		hv_pci_devices_present(hbus, &relations);
+	}
 
 	ret = hv_send_resources_released(hdev);
-	if (ret)
+	if (ret) {
 		dev_err(&hdev->device,
 			"Couldn't send resources released packet(s)\n");
+		return ret;
+	}
 
 	memset(&pkt.teardown_packet, 0, sizeof(pkt.teardown_packet));
 	init_completion(&comp_pkt.host_event);
@@ -3049,8 +3056,13 @@ static void hv_pci_bus_exit(struct hv_device *hdev)
 			       (unsigned long)&pkt.teardown_packet,
 			       VM_PKT_DATA_INBAND,
 			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
-	if (!ret)
-		wait_for_completion_timeout(&comp_pkt.host_event, 10 * HZ);
+	if (ret)
+		return ret;
+
+	if (wait_for_completion_timeout(&comp_pkt.host_event, 10 * HZ) == 0)
+		return -ETIMEDOUT;
+
+	return 0;
 }
 
 /**
@@ -3062,6 +3074,7 @@ static void hv_pci_bus_exit(struct hv_device *hdev)
 static int hv_pci_remove(struct hv_device *hdev)
 {
 	struct hv_pcibus_device *hbus;
+	int ret;
 
 	hbus = hv_get_drvdata(hdev);
 	if (hbus->state == hv_pcibus_installed) {
@@ -3074,7 +3087,7 @@ static int hv_pci_remove(struct hv_device *hdev)
 		hbus->state = hv_pcibus_removed;
 	}
 
-	hv_pci_bus_exit(hdev);
+	ret = hv_pci_bus_exit(hdev, false);
 
 	vmbus_close(hdev->channel);
 
@@ -3091,7 +3104,7 @@ static int hv_pci_remove(struct hv_device *hdev)
 	hv_put_dom_num(hbus->sysdata.domain);
 
 	free_page((unsigned long)hbus);
-	return 0;
+	return ret;
 }
 
 static const struct hv_vmbus_device_id hv_pci_id_table[] = {
-- 
2.19.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v3 2/4] PCI: hv: Add the support of hibernation
  2019-11-25  5:33 [PATCH v3 0/4] Enhance pci-hyperv to support hibernation, and 2 misc fixes Dexuan Cui
  2019-11-25  5:33 ` [PATCH v3 1/4] PCI: hv: Reorganize the code in preparation of hibernation Dexuan Cui
@ 2019-11-25  5:33 ` " Dexuan Cui
  2019-11-25 22:49   ` Michael Kelley
  2019-11-25  5:33 ` [PATCH v3 3/4] PCI: hv: Change pci_protocol_version to per-hbus Dexuan Cui
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 8+ messages in thread
From: Dexuan Cui @ 2019-11-25  5:33 UTC (permalink / raw)
  To: kys, haiyangz, sthemmin, sashal, lorenzo.pieralisi, bhelgaas,
	linux-hyperv, linux-pci, linux-kernel, mikelley, Alexander.Levin
  Cc: Dexuan Cui

Add suspend() and resume() functions so that Hyper-V virtual PCI devices are
handled properly when the VM hibernates and resumes from hibernation.

Note that the suspend() function must make sure there are no pending work
items before calling vmbus_close(), since it runs in a process context as a
callback in dpm_suspend().  When it starts to run, the channel callback
hv_pci_onchannelcallback(), which runs in a tasklet context, can be still running
concurrently and scheduling new work items onto hbus->wq in
hv_pci_devices_present() and hv_pci_eject_device(), and the work item
handlers can access the vmbus channel, which can be being closed by
hv_pci_suspend(), e.g. the work item handler pci_devices_present_work() ->
new_pcichild_device() writes to the vmbus channel.

To eliminate the race, hv_pci_suspend() disables the channel callback
tasklet, sets hbus->state to hv_pcibus_removing, and re-enables the tasklet.
This way, when hv_pci_suspend() proceeds, it knows that no new work item
can be scheduled, and then it flushes hbus->wq and safely closes the vmbus
channel.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
 drivers/pci/controller/pci-hyperv.c | 125 +++++++++++++++++++++++++++-
 1 file changed, 123 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 65f18f840ce9..3c4996b073ca 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -455,6 +455,7 @@ enum hv_pcibus_state {
 	hv_pcibus_init = 0,
 	hv_pcibus_probed,
 	hv_pcibus_installed,
+	hv_pcibus_removing,
 	hv_pcibus_removed,
 	hv_pcibus_maximum
 };
@@ -1681,6 +1682,23 @@ static void prepopulate_bars(struct hv_pcibus_device *hbus)
 
 	spin_lock_irqsave(&hbus->device_list_lock, flags);
 
+	/*
+	 * Clear the memory enable bit, in case it's already set. This occurs
+	 * in the suspend path of hibernation, where the device is suspended,
+	 * resumed and suspended again: see hibernation_snapshot() and
+	 * hibernation_platform_enter().
+	 *
+	 * If the memory enable bit is already set, Hyper-V sliently ignores
+	 * the below BAR updates, and the related PCI device driver can not
+	 * work, because reading from the device register(s) always returns
+	 * 0xFFFFFFFF.
+	 */
+	list_for_each_entry(hpdev, &hbus->children, list_entry) {
+		_hv_pcifront_read_config(hpdev, PCI_COMMAND, 2, &command);
+		command &= ~PCI_COMMAND_MEMORY;
+		_hv_pcifront_write_config(hpdev, PCI_COMMAND, 2, command);
+	}
+
 	/* Pick addresses for the BARs. */
 	do {
 		list_for_each_entry(hpdev, &hbus->children, list_entry) {
@@ -2107,6 +2125,12 @@ static void hv_pci_devices_present(struct hv_pcibus_device *hbus,
 	unsigned long flags;
 	bool pending_dr;
 
+	if (hbus->state == hv_pcibus_removing) {
+		dev_info(&hbus->hdev->device,
+			 "PCI VMBus BUS_RELATIONS: ignored\n");
+		return;
+	}
+
 	dr_wrk = kzalloc(sizeof(*dr_wrk), GFP_NOWAIT);
 	if (!dr_wrk)
 		return;
@@ -2223,11 +2247,19 @@ static void hv_eject_device_work(struct work_struct *work)
  */
 static void hv_pci_eject_device(struct hv_pci_dev *hpdev)
 {
+	struct hv_pcibus_device *hbus = hpdev->hbus;
+	struct hv_device *hdev = hbus->hdev;
+
+	if (hbus->state == hv_pcibus_removing) {
+		dev_info(&hdev->device, "PCI VMBus EJECT: ignored\n");
+		return;
+	}
+
 	hpdev->state = hv_pcichild_ejecting;
 	get_pcichild(hpdev);
 	INIT_WORK(&hpdev->wrk, hv_eject_device_work);
-	get_hvpcibus(hpdev->hbus);
-	queue_work(hpdev->hbus->wq, &hpdev->wrk);
+	get_hvpcibus(hbus);
+	queue_work(hbus->wq, &hpdev->wrk);
 }
 
 /**
@@ -3107,6 +3139,93 @@ static int hv_pci_remove(struct hv_device *hdev)
 	return ret;
 }
 
+static int hv_pci_suspend(struct hv_device *hdev)
+{
+	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
+	enum hv_pcibus_state old_state;
+	int ret;
+
+	/*
+	 * hv_pci_suspend() must make sure there are no pending work items
+	 * before calling vmbus_close(), since it runs in a process context
+	 * as a callback in dpm_suspend().  When it starts to run, the channel
+	 * callback hv_pci_onchannelcallback(), which runs in a tasklet
+	 * context, can be still running concurrently and scheduling new work
+	 * items onto hbus->wq in hv_pci_devices_present() and
+	 * hv_pci_eject_device(), and the work item handlers can access the
+	 * vmbus channel, which can be being closed by hv_pci_suspend(), e.g.
+	 * the work item handler pci_devices_present_work() ->
+	 * new_pcichild_device() writes to the vmbus channel.
+	 *
+	 * To eliminate the race, hv_pci_suspend() disables the channel
+	 * callback tasklet, sets hbus->state to hv_pcibus_removing, and
+	 * re-enables the tasklet. This way, when hv_pci_suspend() proceeds,
+	 * it knows that no new work item can be scheduled, and then it flushes
+	 * hbus->wq and safely closes the vmbus channel.
+	 */
+	tasklet_disable(&hdev->channel->callback_event);
+
+	/* Change the hbus state to prevent new work items. */
+	old_state = hbus->state;
+	if (hbus->state == hv_pcibus_installed)
+		hbus->state = hv_pcibus_removing;
+
+	tasklet_enable(&hdev->channel->callback_event);
+
+	if (old_state != hv_pcibus_installed)
+		return -EINVAL;
+
+	flush_workqueue(hbus->wq);
+
+	ret = hv_pci_bus_exit(hdev, true);
+	if (ret)
+		return ret;
+
+	vmbus_close(hdev->channel);
+
+	return 0;
+}
+
+static int hv_pci_resume(struct hv_device *hdev)
+{
+	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
+	enum pci_protocol_version_t version[1];
+	int ret;
+
+	hbus->state = hv_pcibus_init;
+
+	ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0,
+			 hv_pci_onchannelcallback, hbus);
+	if (ret)
+		return ret;
+
+	/* Only use the version that was in use before hibernation. */
+	version[0] = pci_protocol_version;
+	ret = hv_pci_protocol_negotiation(hdev, version, 1);
+	if (ret)
+		goto out;
+
+	ret = hv_pci_query_relations(hdev);
+	if (ret)
+		goto out;
+
+	ret = hv_pci_enter_d0(hdev);
+	if (ret)
+		goto out;
+
+	ret = hv_send_resources_allocated(hdev);
+	if (ret)
+		goto out;
+
+	prepopulate_bars(hbus);
+
+	hbus->state = hv_pcibus_installed;
+	return 0;
+out:
+	vmbus_close(hdev->channel);
+	return ret;
+}
+
 static const struct hv_vmbus_device_id hv_pci_id_table[] = {
 	/* PCI Pass-through Class ID */
 	/* 44C4F61D-4444-4400-9D52-802E27EDE19F */
@@ -3121,6 +3240,8 @@ static struct hv_driver hv_pci_drv = {
 	.id_table	= hv_pci_id_table,
 	.probe		= hv_pci_probe,
 	.remove		= hv_pci_remove,
+	.suspend	= hv_pci_suspend,
+	.resume		= hv_pci_resume,
 };
 
 static void __exit exit_hv_pci_drv(void)
-- 
2.19.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v3 3/4] PCI: hv: Change pci_protocol_version to per-hbus
  2019-11-25  5:33 [PATCH v3 0/4] Enhance pci-hyperv to support hibernation, and 2 misc fixes Dexuan Cui
  2019-11-25  5:33 ` [PATCH v3 1/4] PCI: hv: Reorganize the code in preparation of hibernation Dexuan Cui
  2019-11-25  5:33 ` [PATCH v3 2/4] PCI: hv: Add the support " Dexuan Cui
@ 2019-11-25  5:33 ` Dexuan Cui
  2019-11-25  5:33 ` [PATCH v3 4/4] PCI: hv: Avoid a kmemleak false positive caused by the hbus buffer Dexuan Cui
  2019-11-26 10:46 ` [PATCH v3 0/4] Enhance pci-hyperv to support hibernation, and 2 misc fixes Lorenzo Pieralisi
  4 siblings, 0 replies; 8+ messages in thread
From: Dexuan Cui @ 2019-11-25  5:33 UTC (permalink / raw)
  To: kys, haiyangz, sthemmin, sashal, lorenzo.pieralisi, bhelgaas,
	linux-hyperv, linux-pci, linux-kernel, mikelley, Alexander.Levin
  Cc: Dexuan Cui

A VM can have multiple Hyper-V hbus. It's incorrect to set the global
variable 'pci_protocol_version' when *every* hbus is initialized in
hv_pci_protocol_negotiation(). This is not an issue in practice since
every hbus should have the same value of hbus->protocol_version, but
we should make the variable per-hbus, so in case we have busses
with different protocol_versions, the driver can still work correctly.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
---
 drivers/pci/controller/pci-hyperv.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 3c4996b073ca..910fa016d095 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -76,11 +76,6 @@ static enum pci_protocol_version_t pci_protocol_versions[] = {
 	PCI_PROTOCOL_VERSION_1_1,
 };
 
-/*
- * Protocol version negotiated by hv_pci_protocol_negotiation().
- */
-static enum pci_protocol_version_t pci_protocol_version;
-
 #define PCI_CONFIG_MMIO_LENGTH	0x2000
 #define CFG_PAGE_OFFSET 0x1000
 #define CFG_PAGE_SIZE (PCI_CONFIG_MMIO_LENGTH - CFG_PAGE_OFFSET)
@@ -462,6 +457,8 @@ enum hv_pcibus_state {
 
 struct hv_pcibus_device {
 	struct pci_sysdata sysdata;
+	/* Protocol version negotiated with the host */
+	enum pci_protocol_version_t protocol_version;
 	enum hv_pcibus_state state;
 	refcount_t remove_lock;
 	struct hv_device *hdev;
@@ -1225,7 +1222,7 @@ static void hv_irq_unmask(struct irq_data *data)
 	 * negative effect (yet?).
 	 */
 
-	if (pci_protocol_version >= PCI_PROTOCOL_VERSION_1_2) {
+	if (hbus->protocol_version >= PCI_PROTOCOL_VERSION_1_2) {
 		/*
 		 * PCI_PROTOCOL_VERSION_1_2 supports the VP_SET version of the
 		 * HVCALL_RETARGET_INTERRUPT hypercall, which also coincides
@@ -1395,7 +1392,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 	ctxt.pci_pkt.completion_func = hv_pci_compose_compl;
 	ctxt.pci_pkt.compl_ctxt = &comp;
 
-	switch (pci_protocol_version) {
+	switch (hbus->protocol_version) {
 	case PCI_PROTOCOL_VERSION_1_1:
 		size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1,
 					dest,
@@ -2415,6 +2412,7 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev,
 				       enum pci_protocol_version_t version[],
 				       int num_version)
 {
+	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
 	struct pci_version_request *version_req;
 	struct hv_pci_compl comp_pkt;
 	struct pci_packet *pkt;
@@ -2454,10 +2452,10 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev,
 		}
 
 		if (comp_pkt.completion_status >= 0) {
-			pci_protocol_version = version[i];
+			hbus->protocol_version = version[i];
 			dev_info(&hdev->device,
 				"PCI VMBus probing: Using version %#x\n",
-				pci_protocol_version);
+				hbus->protocol_version);
 			goto exit;
 		}
 
@@ -2741,7 +2739,7 @@ static int hv_send_resources_allocated(struct hv_device *hdev)
 	u32 wslot;
 	int ret;
 
-	size_res = (pci_protocol_version < PCI_PROTOCOL_VERSION_1_2)
+	size_res = (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_2)
 			? sizeof(*res_assigned) : sizeof(*res_assigned2);
 
 	pkt = kmalloc(sizeof(*pkt) + size_res, GFP_KERNEL);
@@ -2760,7 +2758,7 @@ static int hv_send_resources_allocated(struct hv_device *hdev)
 		pkt->completion_func = hv_pci_generic_compl;
 		pkt->compl_ctxt = &comp_pkt;
 
-		if (pci_protocol_version < PCI_PROTOCOL_VERSION_1_2) {
+		if (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_2) {
 			res_assigned =
 				(struct pci_resources_assigned *)&pkt->message;
 			res_assigned->message_type.type =
@@ -3200,7 +3198,7 @@ static int hv_pci_resume(struct hv_device *hdev)
 		return ret;
 
 	/* Only use the version that was in use before hibernation. */
-	version[0] = pci_protocol_version;
+	version[0] = hbus->protocol_version;
 	ret = hv_pci_protocol_negotiation(hdev, version, 1);
 	if (ret)
 		goto out;
-- 
2.19.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v3 4/4] PCI: hv: Avoid a kmemleak false positive caused by the hbus buffer
  2019-11-25  5:33 [PATCH v3 0/4] Enhance pci-hyperv to support hibernation, and 2 misc fixes Dexuan Cui
                   ` (2 preceding siblings ...)
  2019-11-25  5:33 ` [PATCH v3 3/4] PCI: hv: Change pci_protocol_version to per-hbus Dexuan Cui
@ 2019-11-25  5:33 ` Dexuan Cui
  2019-11-25 22:52   ` Michael Kelley
  2019-11-26 10:46 ` [PATCH v3 0/4] Enhance pci-hyperv to support hibernation, and 2 misc fixes Lorenzo Pieralisi
  4 siblings, 1 reply; 8+ messages in thread
From: Dexuan Cui @ 2019-11-25  5:33 UTC (permalink / raw)
  To: kys, haiyangz, sthemmin, sashal, lorenzo.pieralisi, bhelgaas,
	linux-hyperv, linux-pci, linux-kernel, mikelley, Alexander.Levin
  Cc: Dexuan Cui

With the recent 59bb47985c1d ("mm, sl[aou]b: guarantee natural
alignment for kmalloc(power-of-two)"), kzalloc() is able to allocate
a 4KB buffer that is guaranteed to be 4KB-aligned. Here the size and
alignment of hbus is important because hbus's field
retarget_msi_interrupt_params must not cross a 4KB page boundary.

Here we prefer kzalloc to get_zeroed_page(), because a buffer
allocated by the latter is not tracked and scanned by kmemleak, and
hence kmemleak reports the pointer contained in the hbus buffer
(i.e. the hpdev struct, which is created in new_pcichild_device() and
is tracked by hbus->children) as memory leak (false positive).

If the kernel doesn't have 59bb47985c1d, get_zeroed_page() *must* be
used to allocate the hbus buffer and we can avoid the kmemleak false
positive by using kmemleak_alloc() and kmemleak_free() to ask
kmemleak to track and scan the hbus buffer.

Reported-by: Lili Deng <v-lide@microsoft.com>
Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
 drivers/pci/controller/pci-hyperv.c | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 910fa016d095..be99862166d0 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -2902,9 +2902,27 @@ static int hv_pci_probe(struct hv_device *hdev,
 	 * hv_pcibus_device contains the hypercall arguments for retargeting in
 	 * hv_irq_unmask(). Those must not cross a page boundary.
 	 */
-	BUILD_BUG_ON(sizeof(*hbus) > PAGE_SIZE);
+	BUILD_BUG_ON(sizeof(*hbus) > HV_HYP_PAGE_SIZE);
 
-	hbus = (struct hv_pcibus_device *)get_zeroed_page(GFP_KERNEL);
+	/*
+	 * With the recent 59bb47985c1d ("mm, sl[aou]b: guarantee natural
+	 * alignment for kmalloc(power-of-two)"), kzalloc() is able to allocate
+	 * a 4KB buffer that is guaranteed to be 4KB-aligned. Here the size and
+	 * alignment of hbus is important because hbus's field
+	 * retarget_msi_interrupt_params must not cross a 4KB page boundary.
+	 *
+	 * Here we prefer kzalloc to get_zeroed_page(), because a buffer
+	 * allocated by the latter is not tracked and scanned by kmemleak, and
+	 * hence kmemleak reports the pointer contained in the hbus buffer
+	 * (i.e. the hpdev struct, which is created in new_pcichild_device() and
+	 * is tracked by hbus->children) as memory leak (false positive).
+	 *
+	 * If the kernel doesn't have 59bb47985c1d, get_zeroed_page() *must* be
+	 * used to allocate the hbus buffer and we can avoid the kmemleak false
+	 * positive by using kmemleak_alloc() and kmemleak_free() to ask
+	 * kmemleak to track and scan the hbus buffer.
+	 */
+	hbus = (struct hv_pcibus_device *)kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
 	if (!hbus)
 		return -ENOMEM;
 	hbus->state = hv_pcibus_init;
@@ -3133,7 +3151,7 @@ static int hv_pci_remove(struct hv_device *hdev)
 
 	hv_put_dom_num(hbus->sysdata.domain);
 
-	free_page((unsigned long)hbus);
+	kfree(hbus);
 	return ret;
 }
 
-- 
2.19.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH v3 2/4] PCI: hv: Add the support of hibernation
  2019-11-25  5:33 ` [PATCH v3 2/4] PCI: hv: Add the support " Dexuan Cui
@ 2019-11-25 22:49   ` Michael Kelley
  0 siblings, 0 replies; 8+ messages in thread
From: Michael Kelley @ 2019-11-25 22:49 UTC (permalink / raw)
  To: Dexuan Cui, KY Srinivasan, Haiyang Zhang, Stephen Hemminger,
	sashal, lorenzo.pieralisi, bhelgaas, linux-hyperv, linux-pci,
	linux-kernel, Sasha Levin

From: Dexuan Cui <decui@microsoft.com> Sent: Sunday, November 24, 2019 9:34 PM
> 
> Add suspend() and resume() functions so that Hyper-V virtual PCI devices are
> handled properly when the VM hibernates and resumes from hibernation.
> 
> Note that the suspend() function must make sure there are no pending work
> items before calling vmbus_close(), since it runs in a process context as a
> callback in dpm_suspend().  When it starts to run, the channel callback
> hv_pci_onchannelcallback(), which runs in a tasklet context, can be still running
> concurrently and scheduling new work items onto hbus->wq in
> hv_pci_devices_present() and hv_pci_eject_device(), and the work item
> handlers can access the vmbus channel, which can be being closed by
> hv_pci_suspend(), e.g. the work item handler pci_devices_present_work() ->
> new_pcichild_device() writes to the vmbus channel.
> 
> To eliminate the race, hv_pci_suspend() disables the channel callback
> tasklet, sets hbus->state to hv_pcibus_removing, and re-enables the tasklet.
> This way, when hv_pci_suspend() proceeds, it knows that no new work item
> can be scheduled, and then it flushes hbus->wq and safely closes the vmbus
> channel.
> 
> Signed-off-by: Dexuan Cui <decui@microsoft.com>

Reviewed-by: Michael Kelley <mikelley@microsoft.com>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH v3 4/4] PCI: hv: Avoid a kmemleak false positive caused by the hbus buffer
  2019-11-25  5:33 ` [PATCH v3 4/4] PCI: hv: Avoid a kmemleak false positive caused by the hbus buffer Dexuan Cui
@ 2019-11-25 22:52   ` Michael Kelley
  0 siblings, 0 replies; 8+ messages in thread
From: Michael Kelley @ 2019-11-25 22:52 UTC (permalink / raw)
  To: Dexuan Cui, KY Srinivasan, Haiyang Zhang, Stephen Hemminger,
	sashal, lorenzo.pieralisi, bhelgaas, linux-hyperv, linux-pci,
	linux-kernel, Sasha Levin

From: Dexuan Cui <decui@microsoft.com> Sent: Sunday, November 24, 2019 9:34 PM
> 
> With the recent 59bb47985c1d ("mm, sl[aou]b: guarantee natural
> alignment for kmalloc(power-of-two)"), kzalloc() is able to allocate
> a 4KB buffer that is guaranteed to be 4KB-aligned. Here the size and
> alignment of hbus is important because hbus's field
> retarget_msi_interrupt_params must not cross a 4KB page boundary.
> 
> Here we prefer kzalloc to get_zeroed_page(), because a buffer
> allocated by the latter is not tracked and scanned by kmemleak, and
> hence kmemleak reports the pointer contained in the hbus buffer
> (i.e. the hpdev struct, which is created in new_pcichild_device() and
> is tracked by hbus->children) as memory leak (false positive).
> 
> If the kernel doesn't have 59bb47985c1d, get_zeroed_page() *must* be
> used to allocate the hbus buffer and we can avoid the kmemleak false
> positive by using kmemleak_alloc() and kmemleak_free() to ask
> kmemleak to track and scan the hbus buffer.
> 
> Reported-by: Lili Deng <v-lide@microsoft.com>
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
>  drivers/pci/controller/pci-hyperv.c | 24 +++++++++++++++++++++---
>  1 file changed, 21 insertions(+), 3 deletions(-)
> 

Reviewed-by: Michael Kelley <mikelley@microsoft.com>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 0/4] Enhance pci-hyperv to support hibernation, and 2 misc fixes
  2019-11-25  5:33 [PATCH v3 0/4] Enhance pci-hyperv to support hibernation, and 2 misc fixes Dexuan Cui
                   ` (3 preceding siblings ...)
  2019-11-25  5:33 ` [PATCH v3 4/4] PCI: hv: Avoid a kmemleak false positive caused by the hbus buffer Dexuan Cui
@ 2019-11-26 10:46 ` Lorenzo Pieralisi
  4 siblings, 0 replies; 8+ messages in thread
From: Lorenzo Pieralisi @ 2019-11-26 10:46 UTC (permalink / raw)
  To: Dexuan Cui
  Cc: kys, haiyangz, sthemmin, sashal, bhelgaas, linux-hyperv,
	linux-pci, linux-kernel, mikelley, Alexander.Levin

On Sun, Nov 24, 2019 at 09:33:50PM -0800, Dexuan Cui wrote:
> I suggest the patchset goes through the pci.git tree.
> 
> Patch #1: no functional change.
> Patch #2 enhances the pci-hyperv driver to support hibernation.
> Patch #3 is unrelated to hibernation.
> Patch #4 is unrelated to hibernation.
> 
> Changes in v3:
> Patch #1: Added Michael Kelley's Signed-off-by.
> Patch #2: Used a better commit log message from Michael Kelley.
> Patch #3: Added Michael Kelley's Signed-off-by.
> Patch #4: Used kzalloc() rather than get_zeroed_page()/kmemleak_alloc()/
>           kmemleak_free(), and added the necessary comments.
> 
> Michael, can you please review #2 and #4 again?
> 
> Dexuan Cui (4):
>   PCI: hv: Reorganize the code in preparation of hibernation
>   PCI: hv: Add the support of hibernation
>   PCI: hv: Change pci_protocol_version to per-hbus
>   PCI: hv: Avoid a kmemleak false positive caused by the hbus buffer
> 
>  drivers/pci/controller/pci-hyperv.c | 208 ++++++++++++++++++++++++----
>  1 file changed, 179 insertions(+), 29 deletions(-)

Applied to pci/hv, should be able to hit v5.5, thanks.

Lorenzo

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, back to index

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-25  5:33 [PATCH v3 0/4] Enhance pci-hyperv to support hibernation, and 2 misc fixes Dexuan Cui
2019-11-25  5:33 ` [PATCH v3 1/4] PCI: hv: Reorganize the code in preparation of hibernation Dexuan Cui
2019-11-25  5:33 ` [PATCH v3 2/4] PCI: hv: Add the support " Dexuan Cui
2019-11-25 22:49   ` Michael Kelley
2019-11-25  5:33 ` [PATCH v3 3/4] PCI: hv: Change pci_protocol_version to per-hbus Dexuan Cui
2019-11-25  5:33 ` [PATCH v3 4/4] PCI: hv: Avoid a kmemleak false positive caused by the hbus buffer Dexuan Cui
2019-11-25 22:52   ` Michael Kelley
2019-11-26 10:46 ` [PATCH v3 0/4] Enhance pci-hyperv to support hibernation, and 2 misc fixes Lorenzo Pieralisi

Linux-HyperV Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-hyperv/0 linux-hyperv/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-hyperv linux-hyperv/ https://lore.kernel.org/linux-hyperv \
		linux-hyperv@vger.kernel.org
	public-inbox-index linux-hyperv

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-hyperv


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git