Linux-HyperV Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH v2 0/4] Enhance pci-hyperv to support hibernation, and 2 misc fixes
@ 2019-11-20  7:16 Dexuan Cui
  2019-11-20  7:16 ` [PATCH v2 1/4] PCI: hv: Reorganize the code in preparation of hibernation Dexuan Cui
                   ` (3 more replies)
  0 siblings, 4 replies; 15+ messages in thread
From: Dexuan Cui @ 2019-11-20  7:16 UTC (permalink / raw)
  To: kys, haiyangz, sthemmin, sashal, lorenzo.pieralisi, bhelgaas,
	linux-hyperv, linux-pci, linux-kernel, mikelley, Alexander.Levin
  Cc: Dexuan Cui

I suggest the patchset goes through the pci.git tree.

Patch #1: no functional change.
Patch #2 enhances the pci-hyperv driver to support hibernation.
Patch #3 is unrelated to hibernation.
Patch #4 is unrelated to hibernation.

Dexuan Cui (4):
  PCI: hv: Reorganize the code in preparation of hibernation
  PCI: hv: Add the support of hibernation
  PCI: hv: Change pci_protocol_version to per-hbus
  PCI: hv: kmemleak: Track the page allocations for struct
    hv_pcibus_device

 drivers/pci/controller/pci-hyperv.c | 179 ++++++++++++++++++++++++----
 1 file changed, 153 insertions(+), 26 deletions(-)

-- 
2.19.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v2 1/4] PCI: hv: Reorganize the code in preparation of hibernation
  2019-11-20  7:16 [PATCH v2 0/4] Enhance pci-hyperv to support hibernation, and 2 misc fixes Dexuan Cui
@ 2019-11-20  7:16 ` Dexuan Cui
  2019-11-24 22:21   ` Michael Kelley
  2019-11-20  7:16 ` [PATCH v2 2/4] PCI: hv: Add the support " Dexuan Cui
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 15+ messages in thread
From: Dexuan Cui @ 2019-11-20  7:16 UTC (permalink / raw)
  To: kys, haiyangz, sthemmin, sashal, lorenzo.pieralisi, bhelgaas,
	linux-hyperv, linux-pci, linux-kernel, mikelley, Alexander.Levin
  Cc: Dexuan Cui

There is no functional change. This is just preparatory to a later
patch which adds the hibernation support for the pci-hyperv driver.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
---

No change in v2.

 drivers/pci/controller/pci-hyperv.c | 43 +++++++++++++++++++----------
 1 file changed, 28 insertions(+), 15 deletions(-)

diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index f1f300218fab..65f18f840ce9 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -2379,7 +2379,9 @@ static void hv_pci_onchannelcallback(void *context)
  * failing if the host doesn't support the necessary protocol
  * level.
  */
-static int hv_pci_protocol_negotiation(struct hv_device *hdev)
+static int hv_pci_protocol_negotiation(struct hv_device *hdev,
+				       enum pci_protocol_version_t version[],
+				       int num_version)
 {
 	struct pci_version_request *version_req;
 	struct hv_pci_compl comp_pkt;
@@ -2403,8 +2405,8 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev)
 	version_req = (struct pci_version_request *)&pkt->message;
 	version_req->message_type.type = PCI_QUERY_PROTOCOL_VERSION;
 
-	for (i = 0; i < ARRAY_SIZE(pci_protocol_versions); i++) {
-		version_req->protocol_version = pci_protocol_versions[i];
+	for (i = 0; i < num_version; i++) {
+		version_req->protocol_version = version[i];
 		ret = vmbus_sendpacket(hdev->channel, version_req,
 				sizeof(struct pci_version_request),
 				(unsigned long)pkt, VM_PKT_DATA_INBAND,
@@ -2420,7 +2422,7 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev)
 		}
 
 		if (comp_pkt.completion_status >= 0) {
-			pci_protocol_version = pci_protocol_versions[i];
+			pci_protocol_version = version[i];
 			dev_info(&hdev->device,
 				"PCI VMBus probing: Using version %#x\n",
 				pci_protocol_version);
@@ -2930,7 +2932,8 @@ static int hv_pci_probe(struct hv_device *hdev,
 
 	hv_set_drvdata(hdev, hbus);
 
-	ret = hv_pci_protocol_negotiation(hdev);
+	ret = hv_pci_protocol_negotiation(hdev, pci_protocol_versions,
+					  ARRAY_SIZE(pci_protocol_versions));
 	if (ret)
 		goto close;
 
@@ -3011,7 +3014,7 @@ static int hv_pci_probe(struct hv_device *hdev,
 	return ret;
 }
 
-static void hv_pci_bus_exit(struct hv_device *hdev)
+static int hv_pci_bus_exit(struct hv_device *hdev, bool hibernating)
 {
 	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
 	struct {
@@ -3027,16 +3030,20 @@ static void hv_pci_bus_exit(struct hv_device *hdev)
 	 * access the per-channel ringbuffer any longer.
 	 */
 	if (hdev->channel->rescind)
-		return;
+		return 0;
 
-	/* Delete any children which might still exist. */
-	memset(&relations, 0, sizeof(relations));
-	hv_pci_devices_present(hbus, &relations);
+	if (!hibernating) {
+		/* Delete any children which might still exist. */
+		memset(&relations, 0, sizeof(relations));
+		hv_pci_devices_present(hbus, &relations);
+	}
 
 	ret = hv_send_resources_released(hdev);
-	if (ret)
+	if (ret) {
 		dev_err(&hdev->device,
 			"Couldn't send resources released packet(s)\n");
+		return ret;
+	}
 
 	memset(&pkt.teardown_packet, 0, sizeof(pkt.teardown_packet));
 	init_completion(&comp_pkt.host_event);
@@ -3049,8 +3056,13 @@ static void hv_pci_bus_exit(struct hv_device *hdev)
 			       (unsigned long)&pkt.teardown_packet,
 			       VM_PKT_DATA_INBAND,
 			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
-	if (!ret)
-		wait_for_completion_timeout(&comp_pkt.host_event, 10 * HZ);
+	if (ret)
+		return ret;
+
+	if (wait_for_completion_timeout(&comp_pkt.host_event, 10 * HZ) == 0)
+		return -ETIMEDOUT;
+
+	return 0;
 }
 
 /**
@@ -3062,6 +3074,7 @@ static void hv_pci_bus_exit(struct hv_device *hdev)
 static int hv_pci_remove(struct hv_device *hdev)
 {
 	struct hv_pcibus_device *hbus;
+	int ret;
 
 	hbus = hv_get_drvdata(hdev);
 	if (hbus->state == hv_pcibus_installed) {
@@ -3074,7 +3087,7 @@ static int hv_pci_remove(struct hv_device *hdev)
 		hbus->state = hv_pcibus_removed;
 	}
 
-	hv_pci_bus_exit(hdev);
+	ret = hv_pci_bus_exit(hdev, false);
 
 	vmbus_close(hdev->channel);
 
@@ -3091,7 +3104,7 @@ static int hv_pci_remove(struct hv_device *hdev)
 	hv_put_dom_num(hbus->sysdata.domain);
 
 	free_page((unsigned long)hbus);
-	return 0;
+	return ret;
 }
 
 static const struct hv_vmbus_device_id hv_pci_id_table[] = {
-- 
2.19.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v2 2/4] PCI: hv: Add the support of hibernation
  2019-11-20  7:16 [PATCH v2 0/4] Enhance pci-hyperv to support hibernation, and 2 misc fixes Dexuan Cui
  2019-11-20  7:16 ` [PATCH v2 1/4] PCI: hv: Reorganize the code in preparation of hibernation Dexuan Cui
@ 2019-11-20  7:16 ` " Dexuan Cui
  2019-11-20 17:20   ` Lorenzo Pieralisi
  2019-11-20  7:16 ` [PATCH v2 3/4] PCI: hv: Change pci_protocol_version to per-hbus Dexuan Cui
  2019-11-20  7:16 ` [PATCH v2 4/4] PCI: hv: kmemleak: Track the page allocations for struct hv_pcibus_device Dexuan Cui
  3 siblings, 1 reply; 15+ messages in thread
From: Dexuan Cui @ 2019-11-20  7:16 UTC (permalink / raw)
  To: kys, haiyangz, sthemmin, sashal, lorenzo.pieralisi, bhelgaas,
	linux-hyperv, linux-pci, linux-kernel, mikelley, Alexander.Levin
  Cc: Dexuan Cui

Implement the suspend/resume callbacks.

We must make sure there is no pending work items before we call
vmbus_close().

Signed-off-by: Dexuan Cui <decui@microsoft.com>
---

Changes in v2: this patch is a simple merge of 2 previous smaller patches,
accordign to the suggestion of Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>.

 drivers/pci/controller/pci-hyperv.c | 107 +++++++++++++++++++++++++++-
 1 file changed, 105 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 65f18f840ce9..e71eb6e0bfdd 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -455,6 +455,7 @@ enum hv_pcibus_state {
 	hv_pcibus_init = 0,
 	hv_pcibus_probed,
 	hv_pcibus_installed,
+	hv_pcibus_removing,
 	hv_pcibus_removed,
 	hv_pcibus_maximum
 };
@@ -1681,6 +1682,23 @@ static void prepopulate_bars(struct hv_pcibus_device *hbus)
 
 	spin_lock_irqsave(&hbus->device_list_lock, flags);
 
+	/*
+	 * Clear the memory enable bit, in case it's already set. This occurs
+	 * in the suspend path of hibernation, where the device is suspended,
+	 * resumed and suspended again: see hibernation_snapshot() and
+	 * hibernation_platform_enter().
+	 *
+	 * If the memory enable bit is already set, Hyper-V sliently ignores
+	 * the below BAR updates, and the related PCI device driver can not
+	 * work, because reading from the device register(s) always returns
+	 * 0xFFFFFFFF.
+	 */
+	list_for_each_entry(hpdev, &hbus->children, list_entry) {
+		_hv_pcifront_read_config(hpdev, PCI_COMMAND, 2, &command);
+		command &= ~PCI_COMMAND_MEMORY;
+		_hv_pcifront_write_config(hpdev, PCI_COMMAND, 2, command);
+	}
+
 	/* Pick addresses for the BARs. */
 	do {
 		list_for_each_entry(hpdev, &hbus->children, list_entry) {
@@ -2107,6 +2125,12 @@ static void hv_pci_devices_present(struct hv_pcibus_device *hbus,
 	unsigned long flags;
 	bool pending_dr;
 
+	if (hbus->state == hv_pcibus_removing) {
+		dev_info(&hbus->hdev->device,
+			 "PCI VMBus BUS_RELATIONS: ignored\n");
+		return;
+	}
+
 	dr_wrk = kzalloc(sizeof(*dr_wrk), GFP_NOWAIT);
 	if (!dr_wrk)
 		return;
@@ -2223,11 +2247,19 @@ static void hv_eject_device_work(struct work_struct *work)
  */
 static void hv_pci_eject_device(struct hv_pci_dev *hpdev)
 {
+	struct hv_pcibus_device *hbus = hpdev->hbus;
+	struct hv_device *hdev = hbus->hdev;
+
+	if (hbus->state == hv_pcibus_removing) {
+		dev_info(&hdev->device, "PCI VMBus EJECT: ignored\n");
+		return;
+	}
+
 	hpdev->state = hv_pcichild_ejecting;
 	get_pcichild(hpdev);
 	INIT_WORK(&hpdev->wrk, hv_eject_device_work);
-	get_hvpcibus(hpdev->hbus);
-	queue_work(hpdev->hbus->wq, &hpdev->wrk);
+	get_hvpcibus(hbus);
+	queue_work(hbus->wq, &hpdev->wrk);
 }
 
 /**
@@ -3107,6 +3139,75 @@ static int hv_pci_remove(struct hv_device *hdev)
 	return ret;
 }
 
+static int hv_pci_suspend(struct hv_device *hdev)
+{
+	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
+	enum hv_pcibus_state old_state;
+	int ret;
+
+	tasklet_disable(&hdev->channel->callback_event);
+
+	/* Change the hbus state to prevent new work items. */
+	old_state = hbus->state;
+	if (hbus->state == hv_pcibus_installed)
+		hbus->state = hv_pcibus_removing;
+
+	tasklet_enable(&hdev->channel->callback_event);
+
+	if (old_state != hv_pcibus_installed)
+		return -EINVAL;
+
+	flush_workqueue(hbus->wq);
+
+	ret = hv_pci_bus_exit(hdev, true);
+	if (ret)
+		return ret;
+
+	vmbus_close(hdev->channel);
+
+	return 0;
+}
+
+static int hv_pci_resume(struct hv_device *hdev)
+{
+	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
+	enum pci_protocol_version_t version[1];
+	int ret;
+
+	hbus->state = hv_pcibus_init;
+
+	ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0,
+			 hv_pci_onchannelcallback, hbus);
+	if (ret)
+		return ret;
+
+	/* Only use the version that was in use before hibernation. */
+	version[0] = pci_protocol_version;
+	ret = hv_pci_protocol_negotiation(hdev, version, 1);
+	if (ret)
+		goto out;
+
+	ret = hv_pci_query_relations(hdev);
+	if (ret)
+		goto out;
+
+	ret = hv_pci_enter_d0(hdev);
+	if (ret)
+		goto out;
+
+	ret = hv_send_resources_allocated(hdev);
+	if (ret)
+		goto out;
+
+	prepopulate_bars(hbus);
+
+	hbus->state = hv_pcibus_installed;
+	return 0;
+out:
+	vmbus_close(hdev->channel);
+	return ret;
+}
+
 static const struct hv_vmbus_device_id hv_pci_id_table[] = {
 	/* PCI Pass-through Class ID */
 	/* 44C4F61D-4444-4400-9D52-802E27EDE19F */
@@ -3121,6 +3222,8 @@ static struct hv_driver hv_pci_drv = {
 	.id_table	= hv_pci_id_table,
 	.probe		= hv_pci_probe,
 	.remove		= hv_pci_remove,
+	.suspend	= hv_pci_suspend,
+	.resume		= hv_pci_resume,
 };
 
 static void __exit exit_hv_pci_drv(void)
-- 
2.19.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v2 3/4] PCI: hv: Change pci_protocol_version to per-hbus
  2019-11-20  7:16 [PATCH v2 0/4] Enhance pci-hyperv to support hibernation, and 2 misc fixes Dexuan Cui
  2019-11-20  7:16 ` [PATCH v2 1/4] PCI: hv: Reorganize the code in preparation of hibernation Dexuan Cui
  2019-11-20  7:16 ` [PATCH v2 2/4] PCI: hv: Add the support " Dexuan Cui
@ 2019-11-20  7:16 ` Dexuan Cui
  2019-11-24 22:00   ` Michael Kelley
  2019-11-20  7:16 ` [PATCH v2 4/4] PCI: hv: kmemleak: Track the page allocations for struct hv_pcibus_device Dexuan Cui
  3 siblings, 1 reply; 15+ messages in thread
From: Dexuan Cui @ 2019-11-20  7:16 UTC (permalink / raw)
  To: kys, haiyangz, sthemmin, sashal, lorenzo.pieralisi, bhelgaas,
	linux-hyperv, linux-pci, linux-kernel, mikelley, Alexander.Levin
  Cc: Dexuan Cui

A VM can have multiple Hyper-V hbus. It's incorrect to set the global
variable 'pci_protocol_version' when *every* hbus is initialized in
hv_pci_protocol_negotiation(). This is not an issue in practice since
every hbus should have the same value of hbus->protocol_version, but
we should make the variable per-hbus, so in case we have busses
with different protocol_versions, the driver can still work correctly.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
---

changes in v2: Improved the changelog by making it clear that this patch
fixes a potential bug if we have busses with different protocol_versions.

 drivers/pci/controller/pci-hyperv.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index e71eb6e0bfdd..d7e05d436b5e 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -76,11 +76,6 @@ static enum pci_protocol_version_t pci_protocol_versions[] = {
 	PCI_PROTOCOL_VERSION_1_1,
 };
 
-/*
- * Protocol version negotiated by hv_pci_protocol_negotiation().
- */
-static enum pci_protocol_version_t pci_protocol_version;
-
 #define PCI_CONFIG_MMIO_LENGTH	0x2000
 #define CFG_PAGE_OFFSET 0x1000
 #define CFG_PAGE_SIZE (PCI_CONFIG_MMIO_LENGTH - CFG_PAGE_OFFSET)
@@ -462,6 +457,8 @@ enum hv_pcibus_state {
 
 struct hv_pcibus_device {
 	struct pci_sysdata sysdata;
+	/* Protocol version negotiated with the host */
+	enum pci_protocol_version_t protocol_version;
 	enum hv_pcibus_state state;
 	refcount_t remove_lock;
 	struct hv_device *hdev;
@@ -1225,7 +1222,7 @@ static void hv_irq_unmask(struct irq_data *data)
 	 * negative effect (yet?).
 	 */
 
-	if (pci_protocol_version >= PCI_PROTOCOL_VERSION_1_2) {
+	if (hbus->protocol_version >= PCI_PROTOCOL_VERSION_1_2) {
 		/*
 		 * PCI_PROTOCOL_VERSION_1_2 supports the VP_SET version of the
 		 * HVCALL_RETARGET_INTERRUPT hypercall, which also coincides
@@ -1395,7 +1392,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 	ctxt.pci_pkt.completion_func = hv_pci_compose_compl;
 	ctxt.pci_pkt.compl_ctxt = &comp;
 
-	switch (pci_protocol_version) {
+	switch (hbus->protocol_version) {
 	case PCI_PROTOCOL_VERSION_1_1:
 		size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1,
 					dest,
@@ -2415,6 +2412,7 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev,
 				       enum pci_protocol_version_t version[],
 				       int num_version)
 {
+	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
 	struct pci_version_request *version_req;
 	struct hv_pci_compl comp_pkt;
 	struct pci_packet *pkt;
@@ -2454,10 +2452,10 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev,
 		}
 
 		if (comp_pkt.completion_status >= 0) {
-			pci_protocol_version = version[i];
+			hbus->protocol_version = version[i];
 			dev_info(&hdev->device,
 				"PCI VMBus probing: Using version %#x\n",
-				pci_protocol_version);
+				hbus->protocol_version);
 			goto exit;
 		}
 
@@ -2741,7 +2739,7 @@ static int hv_send_resources_allocated(struct hv_device *hdev)
 	u32 wslot;
 	int ret;
 
-	size_res = (pci_protocol_version < PCI_PROTOCOL_VERSION_1_2)
+	size_res = (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_2)
 			? sizeof(*res_assigned) : sizeof(*res_assigned2);
 
 	pkt = kmalloc(sizeof(*pkt) + size_res, GFP_KERNEL);
@@ -2760,7 +2758,7 @@ static int hv_send_resources_allocated(struct hv_device *hdev)
 		pkt->completion_func = hv_pci_generic_compl;
 		pkt->compl_ctxt = &comp_pkt;
 
-		if (pci_protocol_version < PCI_PROTOCOL_VERSION_1_2) {
+		if (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_2) {
 			res_assigned =
 				(struct pci_resources_assigned *)&pkt->message;
 			res_assigned->message_type.type =
@@ -3182,7 +3180,7 @@ static int hv_pci_resume(struct hv_device *hdev)
 		return ret;
 
 	/* Only use the version that was in use before hibernation. */
-	version[0] = pci_protocol_version;
+	version[0] = hbus->protocol_version;
 	ret = hv_pci_protocol_negotiation(hdev, version, 1);
 	if (ret)
 		goto out;
-- 
2.19.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v2 4/4] PCI: hv: kmemleak: Track the page allocations for struct hv_pcibus_device
  2019-11-20  7:16 [PATCH v2 0/4] Enhance pci-hyperv to support hibernation, and 2 misc fixes Dexuan Cui
                   ` (2 preceding siblings ...)
  2019-11-20  7:16 ` [PATCH v2 3/4] PCI: hv: Change pci_protocol_version to per-hbus Dexuan Cui
@ 2019-11-20  7:16 ` Dexuan Cui
  2019-11-20 17:12   ` Lorenzo Pieralisi
  2019-11-24 21:52   ` Michael Kelley
  3 siblings, 2 replies; 15+ messages in thread
From: Dexuan Cui @ 2019-11-20  7:16 UTC (permalink / raw)
  To: kys, haiyangz, sthemmin, sashal, lorenzo.pieralisi, bhelgaas,
	linux-hyperv, linux-pci, linux-kernel, mikelley, Alexander.Levin
  Cc: Dexuan Cui

The page allocated for struct hv_pcibus_device contains pointers to other
slab allocations in new_pcichild_device(). Since kmemleak does not track
and scan page allocations, the slab objects will be reported as leaks
(false positives). Use the kmemleak APIs to allow tracking of such pages.

Reported-by: Lili Deng <v-lide@microsoft.com>
Signed-off-by: Dexuan Cui <decui@microsoft.com>
---

This is actually v1. I use "v2" in the Subject just to be consistent with
the other patches in the patchset.

Without the patch, we can see the below warning in dmesg, if kmemleak is
enabled: 

kmemleak: 1 new suspected memory leaks (see /sys/kernel/debug/kmemleak)

and "cat /sys/kernel/debug/kmemleak" shows:

unreferenced object 0xffff9217d1f2bec0 (size 192):
  comm "kworker/u256:7", pid 100821, jiffies 4501481057 (age 61409.997s)
  hex dump (first 32 bytes):
    a8 60 b1 63 17 92 ff ff a8 60 b1 63 17 92 ff ff  .`.c.....`.c....
    02 00 00 00 00 00 00 00 80 92 cd 61 17 92 ff ff  ...........a....
  backtrace:
    [<0000000006f7ae93>] pci_devices_present_work+0x326/0x5e0 [pci_hyperv]
    [<00000000278eb88a>] process_one_work+0x15f/0x360
    [<00000000c59501e6>] worker_thread+0x49/0x3c0
    [<000000000a0a7a94>] kthread+0xf8/0x130
[<000000007075c2e7>] ret_from_fork+0x35/0x40

 drivers/pci/controller/pci-hyperv.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index d7e05d436b5e..cc73f49c9e03 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -46,6 +46,7 @@
 #include <asm/irqdomain.h>
 #include <asm/apic.h>
 #include <linux/irq.h>
+#include <linux/kmemleak.h>
 #include <linux/msi.h>
 #include <linux/hyperv.h>
 #include <linux/refcount.h>
@@ -2907,6 +2908,16 @@ static int hv_pci_probe(struct hv_device *hdev,
 	hbus = (struct hv_pcibus_device *)get_zeroed_page(GFP_KERNEL);
 	if (!hbus)
 		return -ENOMEM;
+
+	/*
+	 * kmemleak doesn't track page allocations as they are not commonly
+	 * used for kernel data structures, but here hbus->children indeed
+	 * contains pointers to hv_pci_dev structs, which are dynamically
+	 * created in new_pcichild_device(). Allow kmemleak to scan the page
+	 * so we don't get a false warning of memory leak.
+	 */
+	kmemleak_alloc(hbus, PAGE_SIZE, 1, GFP_KERNEL);
+
 	hbus->state = hv_pcibus_init;
 
 	/*
@@ -3133,6 +3144,8 @@ static int hv_pci_remove(struct hv_device *hdev)
 
 	hv_put_dom_num(hbus->sysdata.domain);
 
+	/* Remove kmemleak object previously allocated in hv_pci_probe() */
+	kmemleak_free(hbus);
 	free_page((unsigned long)hbus);
 	return ret;
 }
-- 
2.19.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 4/4] PCI: hv: kmemleak: Track the page allocations for struct hv_pcibus_device
  2019-11-20  7:16 ` [PATCH v2 4/4] PCI: hv: kmemleak: Track the page allocations for struct hv_pcibus_device Dexuan Cui
@ 2019-11-20 17:12   ` Lorenzo Pieralisi
  2019-11-21  0:53     ` Dexuan Cui
  2019-11-24 21:52   ` Michael Kelley
  1 sibling, 1 reply; 15+ messages in thread
From: Lorenzo Pieralisi @ 2019-11-20 17:12 UTC (permalink / raw)
  To: Dexuan Cui
  Cc: kys, haiyangz, sthemmin, sashal, bhelgaas, linux-hyperv,
	linux-pci, linux-kernel, mikelley, Alexander.Levin

On Tue, Nov 19, 2019 at 11:16:58PM -0800, Dexuan Cui wrote:
> The page allocated for struct hv_pcibus_device contains pointers to other
> slab allocations in new_pcichild_device(). Since kmemleak does not track
> and scan page allocations, the slab objects will be reported as leaks
> (false positives). Use the kmemleak APIs to allow tracking of such pages.
> 
> Reported-by: Lili Deng <v-lide@microsoft.com>
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
> 
> This is actually v1. I use "v2" in the Subject just to be consistent with
> the other patches in the patchset.

That's a mistake, you should have posted patches separately. I need
hyper-V ACKs on this series to get it through.

Thanks,
Lorenzo

> Without the patch, we can see the below warning in dmesg, if kmemleak is
> enabled: 
> 
> kmemleak: 1 new suspected memory leaks (see /sys/kernel/debug/kmemleak)
> 
> and "cat /sys/kernel/debug/kmemleak" shows:
> 
> unreferenced object 0xffff9217d1f2bec0 (size 192):
>   comm "kworker/u256:7", pid 100821, jiffies 4501481057 (age 61409.997s)
>   hex dump (first 32 bytes):
>     a8 60 b1 63 17 92 ff ff a8 60 b1 63 17 92 ff ff  .`.c.....`.c....
>     02 00 00 00 00 00 00 00 80 92 cd 61 17 92 ff ff  ...........a....
>   backtrace:
>     [<0000000006f7ae93>] pci_devices_present_work+0x326/0x5e0 [pci_hyperv]
>     [<00000000278eb88a>] process_one_work+0x15f/0x360
>     [<00000000c59501e6>] worker_thread+0x49/0x3c0
>     [<000000000a0a7a94>] kthread+0xf8/0x130
> [<000000007075c2e7>] ret_from_fork+0x35/0x40
> 
>  drivers/pci/controller/pci-hyperv.c | 13 +++++++++++++
>  1 file changed, 13 insertions(+)
> 
> diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
> index d7e05d436b5e..cc73f49c9e03 100644
> --- a/drivers/pci/controller/pci-hyperv.c
> +++ b/drivers/pci/controller/pci-hyperv.c
> @@ -46,6 +46,7 @@
>  #include <asm/irqdomain.h>
>  #include <asm/apic.h>
>  #include <linux/irq.h>
> +#include <linux/kmemleak.h>
>  #include <linux/msi.h>
>  #include <linux/hyperv.h>
>  #include <linux/refcount.h>
> @@ -2907,6 +2908,16 @@ static int hv_pci_probe(struct hv_device *hdev,
>  	hbus = (struct hv_pcibus_device *)get_zeroed_page(GFP_KERNEL);
>  	if (!hbus)
>  		return -ENOMEM;
> +
> +	/*
> +	 * kmemleak doesn't track page allocations as they are not commonly
> +	 * used for kernel data structures, but here hbus->children indeed
> +	 * contains pointers to hv_pci_dev structs, which are dynamically
> +	 * created in new_pcichild_device(). Allow kmemleak to scan the page
> +	 * so we don't get a false warning of memory leak.
> +	 */
> +	kmemleak_alloc(hbus, PAGE_SIZE, 1, GFP_KERNEL);
> +
>  	hbus->state = hv_pcibus_init;
>  
>  	/*
> @@ -3133,6 +3144,8 @@ static int hv_pci_remove(struct hv_device *hdev)
>  
>  	hv_put_dom_num(hbus->sysdata.domain);
>  
> +	/* Remove kmemleak object previously allocated in hv_pci_probe() */
> +	kmemleak_free(hbus);
>  	free_page((unsigned long)hbus);
>  	return ret;
>  }
> -- 
> 2.19.1
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 2/4] PCI: hv: Add the support of hibernation
  2019-11-20  7:16 ` [PATCH v2 2/4] PCI: hv: Add the support " Dexuan Cui
@ 2019-11-20 17:20   ` Lorenzo Pieralisi
  2019-11-21  0:50     ` Dexuan Cui
  0 siblings, 1 reply; 15+ messages in thread
From: Lorenzo Pieralisi @ 2019-11-20 17:20 UTC (permalink / raw)
  To: Dexuan Cui
  Cc: kys, haiyangz, sthemmin, sashal, bhelgaas, linux-hyperv,
	linux-pci, linux-kernel, mikelley, Alexander.Levin

On Tue, Nov 19, 2019 at 11:16:56PM -0800, Dexuan Cui wrote:
> Implement the suspend/resume callbacks.
> 
> We must make sure there is no pending work items before we call
> vmbus_close().

Where ? Why ? Imagine a developer reading this log to try to understand
why you made this change, do you really think this commit log is
informative in its current form ?

I am not asking a book but this is a significant feature please make
an effort to explain it (I can update the log for you but please
write one and I shall do it).

Lorenzo

> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
> 
> Changes in v2: this patch is a simple merge of 2 previous smaller patches,
> accordign to the suggestion of Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>.
> 
>  drivers/pci/controller/pci-hyperv.c | 107 +++++++++++++++++++++++++++-
>  1 file changed, 105 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
> index 65f18f840ce9..e71eb6e0bfdd 100644
> --- a/drivers/pci/controller/pci-hyperv.c
> +++ b/drivers/pci/controller/pci-hyperv.c
> @@ -455,6 +455,7 @@ enum hv_pcibus_state {
>  	hv_pcibus_init = 0,
>  	hv_pcibus_probed,
>  	hv_pcibus_installed,
> +	hv_pcibus_removing,
>  	hv_pcibus_removed,
>  	hv_pcibus_maximum
>  };
> @@ -1681,6 +1682,23 @@ static void prepopulate_bars(struct hv_pcibus_device *hbus)
>  
>  	spin_lock_irqsave(&hbus->device_list_lock, flags);
>  
> +	/*
> +	 * Clear the memory enable bit, in case it's already set. This occurs
> +	 * in the suspend path of hibernation, where the device is suspended,
> +	 * resumed and suspended again: see hibernation_snapshot() and
> +	 * hibernation_platform_enter().
> +	 *
> +	 * If the memory enable bit is already set, Hyper-V sliently ignores
> +	 * the below BAR updates, and the related PCI device driver can not
> +	 * work, because reading from the device register(s) always returns
> +	 * 0xFFFFFFFF.
> +	 */
> +	list_for_each_entry(hpdev, &hbus->children, list_entry) {
> +		_hv_pcifront_read_config(hpdev, PCI_COMMAND, 2, &command);
> +		command &= ~PCI_COMMAND_MEMORY;
> +		_hv_pcifront_write_config(hpdev, PCI_COMMAND, 2, command);
> +	}
> +
>  	/* Pick addresses for the BARs. */
>  	do {
>  		list_for_each_entry(hpdev, &hbus->children, list_entry) {
> @@ -2107,6 +2125,12 @@ static void hv_pci_devices_present(struct hv_pcibus_device *hbus,
>  	unsigned long flags;
>  	bool pending_dr;
>  
> +	if (hbus->state == hv_pcibus_removing) {
> +		dev_info(&hbus->hdev->device,
> +			 "PCI VMBus BUS_RELATIONS: ignored\n");
> +		return;
> +	}
> +
>  	dr_wrk = kzalloc(sizeof(*dr_wrk), GFP_NOWAIT);
>  	if (!dr_wrk)
>  		return;
> @@ -2223,11 +2247,19 @@ static void hv_eject_device_work(struct work_struct *work)
>   */
>  static void hv_pci_eject_device(struct hv_pci_dev *hpdev)
>  {
> +	struct hv_pcibus_device *hbus = hpdev->hbus;
> +	struct hv_device *hdev = hbus->hdev;
> +
> +	if (hbus->state == hv_pcibus_removing) {
> +		dev_info(&hdev->device, "PCI VMBus EJECT: ignored\n");
> +		return;
> +	}
> +
>  	hpdev->state = hv_pcichild_ejecting;
>  	get_pcichild(hpdev);
>  	INIT_WORK(&hpdev->wrk, hv_eject_device_work);
> -	get_hvpcibus(hpdev->hbus);
> -	queue_work(hpdev->hbus->wq, &hpdev->wrk);
> +	get_hvpcibus(hbus);
> +	queue_work(hbus->wq, &hpdev->wrk);
>  }
>  
>  /**
> @@ -3107,6 +3139,75 @@ static int hv_pci_remove(struct hv_device *hdev)
>  	return ret;
>  }
>  
> +static int hv_pci_suspend(struct hv_device *hdev)
> +{
> +	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
> +	enum hv_pcibus_state old_state;
> +	int ret;
> +
> +	tasklet_disable(&hdev->channel->callback_event);
> +
> +	/* Change the hbus state to prevent new work items. */
> +	old_state = hbus->state;
> +	if (hbus->state == hv_pcibus_installed)
> +		hbus->state = hv_pcibus_removing;
> +
> +	tasklet_enable(&hdev->channel->callback_event);
> +
> +	if (old_state != hv_pcibus_installed)
> +		return -EINVAL;
> +
> +	flush_workqueue(hbus->wq);
> +
> +	ret = hv_pci_bus_exit(hdev, true);
> +	if (ret)
> +		return ret;
> +
> +	vmbus_close(hdev->channel);
> +
> +	return 0;
> +}
> +
> +static int hv_pci_resume(struct hv_device *hdev)
> +{
> +	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
> +	enum pci_protocol_version_t version[1];
> +	int ret;
> +
> +	hbus->state = hv_pcibus_init;
> +
> +	ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0,
> +			 hv_pci_onchannelcallback, hbus);
> +	if (ret)
> +		return ret;
> +
> +	/* Only use the version that was in use before hibernation. */
> +	version[0] = pci_protocol_version;
> +	ret = hv_pci_protocol_negotiation(hdev, version, 1);
> +	if (ret)
> +		goto out;
> +
> +	ret = hv_pci_query_relations(hdev);
> +	if (ret)
> +		goto out;
> +
> +	ret = hv_pci_enter_d0(hdev);
> +	if (ret)
> +		goto out;
> +
> +	ret = hv_send_resources_allocated(hdev);
> +	if (ret)
> +		goto out;
> +
> +	prepopulate_bars(hbus);
> +
> +	hbus->state = hv_pcibus_installed;
> +	return 0;
> +out:
> +	vmbus_close(hdev->channel);
> +	return ret;
> +}
> +
>  static const struct hv_vmbus_device_id hv_pci_id_table[] = {
>  	/* PCI Pass-through Class ID */
>  	/* 44C4F61D-4444-4400-9D52-802E27EDE19F */
> @@ -3121,6 +3222,8 @@ static struct hv_driver hv_pci_drv = {
>  	.id_table	= hv_pci_id_table,
>  	.probe		= hv_pci_probe,
>  	.remove		= hv_pci_remove,
> +	.suspend	= hv_pci_suspend,
> +	.resume		= hv_pci_resume,
>  };
>  
>  static void __exit exit_hv_pci_drv(void)
> -- 
> 2.19.1
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH v2 2/4] PCI: hv: Add the support of hibernation
  2019-11-20 17:20   ` Lorenzo Pieralisi
@ 2019-11-21  0:50     ` Dexuan Cui
  2019-11-21 11:44       ` Lorenzo Pieralisi
  0 siblings, 1 reply; 15+ messages in thread
From: Dexuan Cui @ 2019-11-21  0:50 UTC (permalink / raw)
  To: Lorenzo Pieralisi
  Cc: KY Srinivasan, Haiyang Zhang, Stephen Hemminger, sashal,
	bhelgaas, linux-hyperv, linux-pci, linux-kernel, Michael Kelley,
	Sasha Levin

> From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> Sent: Wednesday, November 20, 2019 9:20 AM
> 
> On Tue, Nov 19, 2019 at 11:16:56PM -0800, Dexuan Cui wrote:
> > Implement the suspend/resume callbacks.
> >
> > We must make sure there is no pending work items before we call
> > vmbus_close().
> 
> Where ? Why ? Imagine a developer reading this log to try to understand
> why you made this change, do you really think this commit log is
> informative in its current form ?
> 
> I am not asking a book but this is a significant feature please make
> an effort to explain it (I can update the log for you but please
> write one and I shall do it).
> 
> Lorenzo

Sorry for being sloppy on this patch's changelog! Can you please use the
below? I can also post v3 with the new changelog if that's better.

PCI: hv: Add the support of hibernation

hv_pci_suspend() runs in a process context as a callback in dpm_suspend().
When it starts to run, the channel callback hv_pci_onchannelcallback(),
which runs in a tasklet context, can be still running concurrently and
scheduling new work items onto hbus->wq in hv_pci_devices_present() and
hv_pci_eject_device(), and the work item handlers can access the vmbus
channel, which can be being closed by hv_pci_suspend(), e.g. the work item
handler pci_devices_present_work() -> new_pcichild_device() writes to
the vmbus channel.

To eliminate the race, hv_pci_suspend() disables the channel callback
tasklet, sets hbus->state to hv_pcibus_removing, and re-enables the tasklet.

This way, when hv_pci_suspend() proceeds, it knows that no new work item
can be scheduled, and then it flushes hbus->wq and safely closes the vmbus
channel.

Thanks,
-- Dexuan

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH v2 4/4] PCI: hv: kmemleak: Track the page allocations for struct hv_pcibus_device
  2019-11-20 17:12   ` Lorenzo Pieralisi
@ 2019-11-21  0:53     ` Dexuan Cui
  0 siblings, 0 replies; 15+ messages in thread
From: Dexuan Cui @ 2019-11-21  0:53 UTC (permalink / raw)
  To: Lorenzo Pieralisi
  Cc: KY Srinivasan, Haiyang Zhang, Stephen Hemminger, sashal,
	bhelgaas, linux-hyperv, linux-pci, linux-kernel, Michael Kelley,
	Sasha Levin

> From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> Sent: Wednesday, November 20, 2019 9:12 AM
> 
> On Tue, Nov 19, 2019 at 11:16:58PM -0800, Dexuan Cui wrote:
> > The page allocated for struct hv_pcibus_device contains pointers to other
> > slab allocations in new_pcichild_device(). Since kmemleak does not track
> > and scan page allocations, the slab objects will be reported as leaks
> > (false positives). Use the kmemleak APIs to allow tracking of such pages.
> >
> > Reported-by: Lili Deng <v-lide@microsoft.com>
> > Signed-off-by: Dexuan Cui <decui@microsoft.com>
> > ---
> >
> > This is actually v1. I use "v2" in the Subject just to be consistent with
> > the other patches in the patchset.
> 
> That's a mistake, you should have posted patches separately. I need

Got it. I'll remember to do it separately in future.

> hyper-V ACKs on this series to get it through.
> 
> Thanks,
> Lorenzo

Sure. I'm asking the Hyper-V maintainers to review the patchset.

Thanks,
-- Dexuan

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 2/4] PCI: hv: Add the support of hibernation
  2019-11-21  0:50     ` Dexuan Cui
@ 2019-11-21 11:44       ` Lorenzo Pieralisi
  2019-11-24 22:19         ` Michael Kelley
  0 siblings, 1 reply; 15+ messages in thread
From: Lorenzo Pieralisi @ 2019-11-21 11:44 UTC (permalink / raw)
  To: Dexuan Cui
  Cc: KY Srinivasan, Haiyang Zhang, Stephen Hemminger, sashal,
	bhelgaas, linux-hyperv, linux-pci, linux-kernel, Michael Kelley,
	Sasha Levin

On Thu, Nov 21, 2019 at 12:50:17AM +0000, Dexuan Cui wrote:
> > From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> > Sent: Wednesday, November 20, 2019 9:20 AM
> > 
> > On Tue, Nov 19, 2019 at 11:16:56PM -0800, Dexuan Cui wrote:
> > > Implement the suspend/resume callbacks.
> > >
> > > We must make sure there is no pending work items before we call
> > > vmbus_close().
> > 
> > Where ? Why ? Imagine a developer reading this log to try to understand
> > why you made this change, do you really think this commit log is
> > informative in its current form ?
> > 
> > I am not asking a book but this is a significant feature please make
> > an effort to explain it (I can update the log for you but please
> > write one and I shall do it).
> > 
> > Lorenzo
> 
> Sorry for being sloppy on this patch's changelog! Can you please use the
> below? I can also post v3 with the new changelog if that's better.

As you wish but more importantly get hyper-V maintainers to ACK these
changes since time is running out for v5.5.

Lorenzo

> PCI: hv: Add the support of hibernation
> 
> hv_pci_suspend() runs in a process context as a callback in dpm_suspend().
> When it starts to run, the channel callback hv_pci_onchannelcallback(),
> which runs in a tasklet context, can be still running concurrently and
> scheduling new work items onto hbus->wq in hv_pci_devices_present() and
> hv_pci_eject_device(), and the work item handlers can access the vmbus
> channel, which can be being closed by hv_pci_suspend(), e.g. the work item
> handler pci_devices_present_work() -> new_pcichild_device() writes to
> the vmbus channel.
> 
> To eliminate the race, hv_pci_suspend() disables the channel callback
> tasklet, sets hbus->state to hv_pcibus_removing, and re-enables the tasklet.
> 
> This way, when hv_pci_suspend() proceeds, it knows that no new work item
> can be scheduled, and then it flushes hbus->wq and safely closes the vmbus
> channel.
> 
> Thanks,
> -- Dexuan

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH v2 4/4] PCI: hv: kmemleak: Track the page allocations for struct hv_pcibus_device
  2019-11-20  7:16 ` [PATCH v2 4/4] PCI: hv: kmemleak: Track the page allocations for struct hv_pcibus_device Dexuan Cui
  2019-11-20 17:12   ` Lorenzo Pieralisi
@ 2019-11-24 21:52   ` Michael Kelley
  1 sibling, 0 replies; 15+ messages in thread
From: Michael Kelley @ 2019-11-24 21:52 UTC (permalink / raw)
  To: Dexuan Cui, KY Srinivasan, Haiyang Zhang, Stephen Hemminger,
	sashal, lorenzo.pieralisi, bhelgaas, linux-hyperv, linux-pci,
	linux-kernel, Sasha Levin

From: Dexuan Cui <decui@microsoft.com> Sent: Tuesday, November 19, 2019 11:17 PM
> 
> The page allocated for struct hv_pcibus_device contains pointers to other
> slab allocations in new_pcichild_device(). Since kmemleak does not track
> and scan page allocations, the slab objects will be reported as leaks
> (false positives). Use the kmemleak APIs to allow tracking of such pages.
> 
> Reported-by: Lili Deng <v-lide@microsoft.com>
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
> 
> This is actually v1. I use "v2" in the Subject just to be consistent with
> the other patches in the patchset.
> 
> Without the patch, we can see the below warning in dmesg, if kmemleak is
> enabled:
> 
> kmemleak: 1 new suspected memory leaks (see /sys/kernel/debug/kmemleak)
> 
> and "cat /sys/kernel/debug/kmemleak" shows:
> 
> unreferenced object 0xffff9217d1f2bec0 (size 192):
>   comm "kworker/u256:7", pid 100821, jiffies 4501481057 (age 61409.997s)
>   hex dump (first 32 bytes):
>     a8 60 b1 63 17 92 ff ff a8 60 b1 63 17 92 ff ff  .`.c.....`.c....
>     02 00 00 00 00 00 00 00 80 92 cd 61 17 92 ff ff  ...........a....
>   backtrace:
>     [<0000000006f7ae93>] pci_devices_present_work+0x326/0x5e0 [pci_hyperv]
>     [<00000000278eb88a>] process_one_work+0x15f/0x360
>     [<00000000c59501e6>] worker_thread+0x49/0x3c0
>     [<000000000a0a7a94>] kthread+0xf8/0x130
> [<000000007075c2e7>] ret_from_fork+0x35/0x40
> 
>  drivers/pci/controller/pci-hyperv.c | 13 +++++++++++++
>  1 file changed, 13 insertions(+)
> 
> diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
> index d7e05d436b5e..cc73f49c9e03 100644
> --- a/drivers/pci/controller/pci-hyperv.c
> +++ b/drivers/pci/controller/pci-hyperv.c
> @@ -46,6 +46,7 @@
>  #include <asm/irqdomain.h>
>  #include <asm/apic.h>
>  #include <linux/irq.h>
> +#include <linux/kmemleak.h>
>  #include <linux/msi.h>
>  #include <linux/hyperv.h>
>  #include <linux/refcount.h>
> @@ -2907,6 +2908,16 @@ static int hv_pci_probe(struct hv_device *hdev,
>  	hbus = (struct hv_pcibus_device *)get_zeroed_page(GFP_KERNEL);
>  	if (!hbus)
>  		return -ENOMEM;
> +
> +	/*
> +	 * kmemleak doesn't track page allocations as they are not commonly
> +	 * used for kernel data structures, but here hbus->children indeed
> +	 * contains pointers to hv_pci_dev structs, which are dynamically
> +	 * created in new_pcichild_device(). Allow kmemleak to scan the page
> +	 * so we don't get a false warning of memory leak.
> +	 */
> +	kmemleak_alloc(hbus, PAGE_SIZE, 1, GFP_KERNEL);
> +

As noted in the existing comments, the hbus data structure must not cross
a page boundary, so that a portion of it can be used as a hypercall argument.
Historically kzalloc() didn't provide any alignment guarantee, so
get_zeroed_page() is used.  But a recent commit (59bb47985c1d) changes
the behavior of kmalloc()/kzalloc() so that alignment *is* guaranteed for
power of 2 allocation sizes.  With this change, the better fix is to use
kzalloc() instead of get_zeroed_page().  Note that the allocation size should
be HV_HYP_PAGE_SIZE instead of PAGE_SIZE, as the hbus structure must not
cross a page boundary based on Hyper-V's concept of a page, which may be
different from the guest page size on some architectures.

Michael

>  	hbus->state = hv_pcibus_init;
> 
>  	/*
> @@ -3133,6 +3144,8 @@ static int hv_pci_remove(struct hv_device *hdev)
> 
>  	hv_put_dom_num(hbus->sysdata.domain);
> 
> +	/* Remove kmemleak object previously allocated in hv_pci_probe() */
> +	kmemleak_free(hbus);
>  	free_page((unsigned long)hbus);
>  	return ret;
>  }
> --
> 2.19.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH v2 3/4] PCI: hv: Change pci_protocol_version to per-hbus
  2019-11-20  7:16 ` [PATCH v2 3/4] PCI: hv: Change pci_protocol_version to per-hbus Dexuan Cui
@ 2019-11-24 22:00   ` Michael Kelley
  0 siblings, 0 replies; 15+ messages in thread
From: Michael Kelley @ 2019-11-24 22:00 UTC (permalink / raw)
  To: Dexuan Cui, KY Srinivasan, Haiyang Zhang, Stephen Hemminger,
	sashal, lorenzo.pieralisi, bhelgaas, linux-hyperv, linux-pci,
	linux-kernel, Sasha Levin

From: Dexuan Cui <decui@microsoft.com>  Sent: Tuesday, November 19, 2019 11:17 PM
> 
> A VM can have multiple Hyper-V hbus. It's incorrect to set the global
> variable 'pci_protocol_version' when *every* hbus is initialized in
> hv_pci_protocol_negotiation(). This is not an issue in practice since
> every hbus should have the same value of hbus->protocol_version, but
> we should make the variable per-hbus, so in case we have busses
> with different protocol_versions, the driver can still work correctly.
> 
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
> 
> changes in v2: Improved the changelog by making it clear that this patch
> fixes a potential bug if we have busses with different protocol_versions.
> 
>  drivers/pci/controller/pci-hyperv.c | 22 ++++++++++------------
>  1 file changed, 10 insertions(+), 12 deletions(-)
> 

Reviewed-by: Michael Kelley <mikelley@microsoft.com>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH v2 2/4] PCI: hv: Add the support of hibernation
  2019-11-21 11:44       ` Lorenzo Pieralisi
@ 2019-11-24 22:19         ` Michael Kelley
  2019-11-25 10:44           ` Lorenzo Pieralisi
  0 siblings, 1 reply; 15+ messages in thread
From: Michael Kelley @ 2019-11-24 22:19 UTC (permalink / raw)
  To: Lorenzo Pieralisi, Dexuan Cui
  Cc: KY Srinivasan, Haiyang Zhang, Stephen Hemminger, sashal,
	bhelgaas, linux-hyperv, linux-pci, linux-kernel, Sasha Levin

From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>  Sent: Thursday, November 21, 2019 3:44 AM
> 
> On Thu, Nov 21, 2019 at 12:50:17AM +0000, Dexuan Cui wrote:
> > > From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> > > Sent: Wednesday, November 20, 2019 9:20 AM
> > >
> > > On Tue, Nov 19, 2019 at 11:16:56PM -0800, Dexuan Cui wrote:
> > > > Implement the suspend/resume callbacks.
> > > >
> > > > We must make sure there is no pending work items before we call
> > > > vmbus_close().
> > >
> > > Where ? Why ? Imagine a developer reading this log to try to understand
> > > why you made this change, do you really think this commit log is
> > > informative in its current form ?
> > >
> > > I am not asking a book but this is a significant feature please make
> > > an effort to explain it (I can update the log for you but please
> > > write one and I shall do it).
> > >
> > > Lorenzo
> >
> > Sorry for being sloppy on this patch's changelog! Can you please use the
> > below? I can also post v3 with the new changelog if that's better.
> 
> As you wish but more importantly get hyper-V maintainers to ACK these
> changes since time is running out for v5.5.
> 
> Lorenzo
> 
> > PCI: hv: Add the support of hibernation
> >
> > hv_pci_suspend() runs in a process context as a callback in dpm_suspend().
> > When it starts to run, the channel callback hv_pci_onchannelcallback(),
> > which runs in a tasklet context, can be still running concurrently and
> > scheduling new work items onto hbus->wq in hv_pci_devices_present() and
> > hv_pci_eject_device(), and the work item handlers can access the vmbus
> > channel, which can be being closed by hv_pci_suspend(), e.g. the work item
> > handler pci_devices_present_work() -> new_pcichild_device() writes to
> > the vmbus channel.
> >
> > To eliminate the race, hv_pci_suspend() disables the channel callback
> > tasklet, sets hbus->state to hv_pcibus_removing, and re-enables the tasklet.
> >
> > This way, when hv_pci_suspend() proceeds, it knows that no new work item
> > can be scheduled, and then it flushes hbus->wq and safely closes the vmbus
> > channel.
> >
> > Thanks,
> > -- Dexuan

FWIW, I'd like to see the above level of detail also as comments in the code
Itself so that whoever next looks at the code sees the explanation directly
without having to review the commit logs.

Also, the commit message doesn't say what the commit actually does and
why.  I'd suggest the commit message along these lines:

Add suspend() and resume() functions so that Hyper-V virtual PCI devices are
handled properly when the VM hibernates and resumes from hibernation.

Note that the suspend() function must make sure there are no pending work
items before calling vmbus_close(), since it runs in a process context as a
callback in dpm_suspend().  When it starts to run, the channel callback
hv_pci_onchannelcallback(), which runs in a tasklet context, can be still running
concurrently and scheduling new work items onto hbus->wq in
hv_pci_devices_present() and hv_pci_eject_device(), and the work item 
handlers can access the vmbus channel, which can be being closed by
hv_pci_suspend(), e.g. the work item handler pci_devices_present_work() ->
new_pcichild_device() writes to the vmbus channel.

To eliminate the race, hv_pci_suspend() disables the channel callback
tasklet, sets hbus->state to hv_pcibus_removing, and re-enables the tasklet.
This way, when hv_pci_suspend() proceeds, it knows that no new work item
can be scheduled, and then it flushes hbus->wq and safely closes the vmbus
channel.

Michael





^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH v2 1/4] PCI: hv: Reorganize the code in preparation of hibernation
  2019-11-20  7:16 ` [PATCH v2 1/4] PCI: hv: Reorganize the code in preparation of hibernation Dexuan Cui
@ 2019-11-24 22:21   ` Michael Kelley
  0 siblings, 0 replies; 15+ messages in thread
From: Michael Kelley @ 2019-11-24 22:21 UTC (permalink / raw)
  To: Dexuan Cui, KY Srinivasan, Haiyang Zhang, Stephen Hemminger,
	sashal, lorenzo.pieralisi, bhelgaas, linux-hyperv, linux-pci,
	linux-kernel, Sasha Levin

From: Dexuan Cui <decui@microsoft.com> Sent: Tuesday, November 19, 2019 11:17 PM
> 
> There is no functional change. This is just preparatory to a later
> patch which adds the hibernation support for the pci-hyperv driver.
> 
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
> 
> No change in v2.
> 
>  drivers/pci/controller/pci-hyperv.c | 43 +++++++++++++++++++----------
>  1 file changed, 28 insertions(+), 15 deletions(-)
> 

Reviewed-by: Michael Kelley <mikelley@microsoft.com>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 2/4] PCI: hv: Add the support of hibernation
  2019-11-24 22:19         ` Michael Kelley
@ 2019-11-25 10:44           ` Lorenzo Pieralisi
  0 siblings, 0 replies; 15+ messages in thread
From: Lorenzo Pieralisi @ 2019-11-25 10:44 UTC (permalink / raw)
  To: Michael Kelley
  Cc: Dexuan Cui, KY Srinivasan, Haiyang Zhang, Stephen Hemminger,
	sashal, bhelgaas, linux-hyperv, linux-pci, linux-kernel,
	Sasha Levin

On Sun, Nov 24, 2019 at 10:19:46PM +0000, Michael Kelley wrote:
> From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>  Sent: Thursday, November 21, 2019 3:44 AM
> > 
> > On Thu, Nov 21, 2019 at 12:50:17AM +0000, Dexuan Cui wrote:
> > > > From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> > > > Sent: Wednesday, November 20, 2019 9:20 AM
> > > >
> > > > On Tue, Nov 19, 2019 at 11:16:56PM -0800, Dexuan Cui wrote:
> > > > > Implement the suspend/resume callbacks.
> > > > >
> > > > > We must make sure there is no pending work items before we call
> > > > > vmbus_close().
> > > >
> > > > Where ? Why ? Imagine a developer reading this log to try to understand
> > > > why you made this change, do you really think this commit log is
> > > > informative in its current form ?
> > > >
> > > > I am not asking a book but this is a significant feature please make
> > > > an effort to explain it (I can update the log for you but please
> > > > write one and I shall do it).
> > > >
> > > > Lorenzo
> > >
> > > Sorry for being sloppy on this patch's changelog! Can you please use the
> > > below? I can also post v3 with the new changelog if that's better.
> > 
> > As you wish but more importantly get hyper-V maintainers to ACK these
> > changes since time is running out for v5.5.
> > 
> > Lorenzo
> > 
> > > PCI: hv: Add the support of hibernation
> > >
> > > hv_pci_suspend() runs in a process context as a callback in dpm_suspend().
> > > When it starts to run, the channel callback hv_pci_onchannelcallback(),
> > > which runs in a tasklet context, can be still running concurrently and
> > > scheduling new work items onto hbus->wq in hv_pci_devices_present() and
> > > hv_pci_eject_device(), and the work item handlers can access the vmbus
> > > channel, which can be being closed by hv_pci_suspend(), e.g. the work item
> > > handler pci_devices_present_work() -> new_pcichild_device() writes to
> > > the vmbus channel.
> > >
> > > To eliminate the race, hv_pci_suspend() disables the channel callback
> > > tasklet, sets hbus->state to hv_pcibus_removing, and re-enables the tasklet.
> > >
> > > This way, when hv_pci_suspend() proceeds, it knows that no new work item
> > > can be scheduled, and then it flushes hbus->wq and safely closes the vmbus
> > > channel.
> > >
> > > Thanks,
> > > -- Dexuan
> 
> FWIW, I'd like to see the above level of detail also as comments in the code
> Itself so that whoever next looks at the code sees the explanation directly
> without having to review the commit logs.
> 
> Also, the commit message doesn't say what the commit actually does and
> why.  I'd suggest the commit message along these lines:
> 
> Add suspend() and resume() functions so that Hyper-V virtual PCI devices are
> handled properly when the VM hibernates and resumes from hibernation.
> 
> Note that the suspend() function must make sure there are no pending work
> items before calling vmbus_close(), since it runs in a process context as a
> callback in dpm_suspend().  When it starts to run, the channel callback
> hv_pci_onchannelcallback(), which runs in a tasklet context, can be still running
> concurrently and scheduling new work items onto hbus->wq in
> hv_pci_devices_present() and hv_pci_eject_device(), and the work item 
> handlers can access the vmbus channel, which can be being closed by
> hv_pci_suspend(), e.g. the work item handler pci_devices_present_work() ->
> new_pcichild_device() writes to the vmbus channel.
> 
> To eliminate the race, hv_pci_suspend() disables the channel callback
> tasklet, sets hbus->state to hv_pcibus_removing, and re-enables the tasklet.
> This way, when hv_pci_suspend() proceeds, it knows that no new work item
> can be scheduled, and then it flushes hbus->wq and safely closes the vmbus
> channel.

This is much better, thank you, if you are happy with the patches
please add your tags so that I can pull the series asap, hopefully
we can merge it in v5.5.

Thanks,
Lorenzo

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, back to index

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-20  7:16 [PATCH v2 0/4] Enhance pci-hyperv to support hibernation, and 2 misc fixes Dexuan Cui
2019-11-20  7:16 ` [PATCH v2 1/4] PCI: hv: Reorganize the code in preparation of hibernation Dexuan Cui
2019-11-24 22:21   ` Michael Kelley
2019-11-20  7:16 ` [PATCH v2 2/4] PCI: hv: Add the support " Dexuan Cui
2019-11-20 17:20   ` Lorenzo Pieralisi
2019-11-21  0:50     ` Dexuan Cui
2019-11-21 11:44       ` Lorenzo Pieralisi
2019-11-24 22:19         ` Michael Kelley
2019-11-25 10:44           ` Lorenzo Pieralisi
2019-11-20  7:16 ` [PATCH v2 3/4] PCI: hv: Change pci_protocol_version to per-hbus Dexuan Cui
2019-11-24 22:00   ` Michael Kelley
2019-11-20  7:16 ` [PATCH v2 4/4] PCI: hv: kmemleak: Track the page allocations for struct hv_pcibus_device Dexuan Cui
2019-11-20 17:12   ` Lorenzo Pieralisi
2019-11-21  0:53     ` Dexuan Cui
2019-11-24 21:52   ` Michael Kelley

Linux-HyperV Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-hyperv/0 linux-hyperv/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-hyperv linux-hyperv/ https://lore.kernel.org/linux-hyperv \
		linux-hyperv@vger.kernel.org
	public-inbox-index linux-hyperv

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-hyperv


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git