linux-hyperv.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] PCI: hv: decouple the func definition in hv_dr_state from VSP message
@ 2019-11-23  1:57 longli
  2019-11-23  1:57 ` [PATCH 2/2] PCI: hv: Add support for protocol 1.3 and support PCI_BUS_RELATIONS2 longli
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: longli @ 2019-11-23  1:57 UTC (permalink / raw)
  To: K. Y. Srinivasan, Haiyang Zhang, Stephen Hemminger, Sasha Levin,
	Lorenzo Pieralisi, Andrew Murray, Bjorn Helgaas, linux-hyperv,
	linux-pci, linux-kernel
  Cc: Long Li

From: Long Li <longli@microsoft.com>

hv_dr_state is used to find present PCI devices on the bus. The structure
reuses struct pci_function_description from VSP message to describe a device.

To prepare support for pci_function_description v2, we need to decouple this
dependence in hv_dr_state so it can work with both v1 and v2 VSP messages.

There is no functionality change.

Signed-off-by: Long Li <longli@microsoft.com>
---
 drivers/pci/controller/pci-hyperv.c | 100 +++++++++++++++++++---------
 1 file changed, 69 insertions(+), 31 deletions(-)

diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index f1f300218fab..f2e028cfa7cd 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -507,10 +507,24 @@ struct hv_dr_work {
 	struct hv_pcibus_device *bus;
 };
 
+struct hv_pcidev_description {
+	u16	v_id;	/* vendor ID */
+	u16	d_id;	/* device ID */
+	u8	rev;
+	u8	prog_intf;
+	u8	subclass;
+	u8	base_class;
+	u32	subsystem_id;
+	union win_slot_encoding win_slot;
+	u32	ser;	/* serial number */
+	u32	flags;
+	u16	virtual_numa_node;
+};
+
 struct hv_dr_state {
 	struct list_head list_entry;
 	u32 device_count;
-	struct pci_function_description func[0];
+	struct hv_pcidev_description func[0];
 };
 
 enum hv_pcichild_state {
@@ -527,7 +541,7 @@ struct hv_pci_dev {
 	refcount_t refs;
 	enum hv_pcichild_state state;
 	struct pci_slot *pci_slot;
-	struct pci_function_description desc;
+	struct hv_pcidev_description desc;
 	bool reported_missing;
 	struct hv_pcibus_device *hbus;
 	struct work_struct wrk;
@@ -1862,7 +1876,7 @@ static void q_resource_requirements(void *context, struct pci_response *resp,
  * Return: Pointer to the new tracking struct
  */
 static struct hv_pci_dev *new_pcichild_device(struct hv_pcibus_device *hbus,
-		struct pci_function_description *desc)
+		struct hv_pcidev_description *desc)
 {
 	struct hv_pci_dev *hpdev;
 	struct pci_child_message *res_req;
@@ -1973,7 +1987,7 @@ static void pci_devices_present_work(struct work_struct *work)
 {
 	u32 child_no;
 	bool found;
-	struct pci_function_description *new_desc;
+	struct hv_pcidev_description *new_desc;
 	struct hv_pci_dev *hpdev;
 	struct hv_pcibus_device *hbus;
 	struct list_head removed;
@@ -2090,43 +2104,26 @@ static void pci_devices_present_work(struct work_struct *work)
 	put_hvpcibus(hbus);
 	kfree(dr);
 }
-
 /**
- * hv_pci_devices_present() - Handles list of new children
+ * hv_pci_start_relations_work() - Queue work to start device discovery
  * @hbus:	Root PCI bus, as understood by this driver
- * @relations:	Packet from host listing children
+ * @dr:		The list of children returned from host
  *
- * This function is invoked whenever a new list of devices for
- * this bus appears.
+ * Return:  0 on success, 1 on failure
  */
-static void hv_pci_devices_present(struct hv_pcibus_device *hbus,
-				   struct pci_bus_relations *relations)
+static int hv_pci_start_relations_work(struct hv_pcibus_device *hbus,
+				       struct hv_dr_state *dr)
 {
-	struct hv_dr_state *dr;
 	struct hv_dr_work *dr_wrk;
-	unsigned long flags;
 	bool pending_dr;
+	unsigned long flags;
 
 	dr_wrk = kzalloc(sizeof(*dr_wrk), GFP_NOWAIT);
 	if (!dr_wrk)
-		return;
-
-	dr = kzalloc(offsetof(struct hv_dr_state, func) +
-		     (sizeof(struct pci_function_description) *
-		      (relations->device_count)), GFP_NOWAIT);
-	if (!dr)  {
-		kfree(dr_wrk);
-		return;
-	}
+		return 1;
 
 	INIT_WORK(&dr_wrk->wrk, pci_devices_present_work);
 	dr_wrk->bus = hbus;
-	dr->device_count = relations->device_count;
-	if (dr->device_count != 0) {
-		memcpy(dr->func, relations->func,
-		       sizeof(struct pci_function_description) *
-		       dr->device_count);
-	}
 
 	spin_lock_irqsave(&hbus->device_list_lock, flags);
 	/*
@@ -2144,6 +2141,46 @@ static void hv_pci_devices_present(struct hv_pcibus_device *hbus,
 		get_hvpcibus(hbus);
 		queue_work(hbus->wq, &dr_wrk->wrk);
 	}
+
+	return 0;
+}
+
+/**
+ * hv_pci_devices_present() - Handles list of new children
+ * @hbus:	Root PCI bus, as understood by this driver
+ * @relations:	Packet from host listing children
+ *
+ * This function is invoked whenever a new list of devices for
+ * this bus appears.
+ */
+static void hv_pci_devices_present(struct hv_pcibus_device *hbus,
+				   struct pci_bus_relations *relations)
+{
+	struct hv_dr_state *dr;
+	int i;
+
+	dr = kzalloc(offsetof(struct hv_dr_state, func) +
+		     (sizeof(struct hv_pcidev_description) *
+		      (relations->device_count)), GFP_NOWAIT);
+
+	if (!dr)
+		return;
+
+	dr->device_count = relations->device_count;
+	for (i = 0; i < dr->device_count; i++) {
+		dr->func[i].v_id = relations->func[i].v_id;
+		dr->func[i].d_id = relations->func[i].d_id;
+		dr->func[i].rev = relations->func[i].rev;
+		dr->func[i].prog_intf = relations->func[i].prog_intf;
+		dr->func[i].subclass = relations->func[i].subclass;
+		dr->func[i].base_class = relations->func[i].base_class;
+		dr->func[i].subsystem_id = relations->func[i].subsystem_id;
+		dr->func[i].win_slot = relations->func[i].win_slot;
+		dr->func[i].ser = relations->func[i].ser;
+	}
+
+	if (hv_pci_start_relations_work(hbus, dr))
+		kfree(dr);
 }
 
 /**
@@ -3018,7 +3055,7 @@ static void hv_pci_bus_exit(struct hv_device *hdev)
 		struct pci_packet teardown_packet;
 		u8 buffer[sizeof(struct pci_message)];
 	} pkt;
-	struct pci_bus_relations relations;
+	struct hv_dr_state *dr;
 	struct hv_pci_compl comp_pkt;
 	int ret;
 
@@ -3030,8 +3067,9 @@ static void hv_pci_bus_exit(struct hv_device *hdev)
 		return;
 
 	/* Delete any children which might still exist. */
-	memset(&relations, 0, sizeof(relations));
-	hv_pci_devices_present(hbus, &relations);
+	dr = kzalloc(sizeof(*dr), GFP_ATOMIC);
+	if (dr && hv_pci_start_relations_work(hbus, dr))
+		kfree(dr);
 
 	ret = hv_send_resources_released(hdev);
 	if (ret)
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] PCI: hv: Add support for protocol 1.3 and support PCI_BUS_RELATIONS2
  2019-11-23  1:57 [PATCH 1/2] PCI: hv: decouple the func definition in hv_dr_state from VSP message longli
@ 2019-11-23  1:57 ` longli
  2019-11-30  4:45   ` [EXTERNAL] " Michael Kelley
  2019-12-02 23:59   ` Dexuan Cui
  2019-11-30  4:30 ` [EXTERNAL] [PATCH 1/2] PCI: hv: decouple the func definition in hv_dr_state from VSP message Michael Kelley
  2019-12-02 23:29 ` Dexuan Cui
  2 siblings, 2 replies; 9+ messages in thread
From: longli @ 2019-11-23  1:57 UTC (permalink / raw)
  To: K. Y. Srinivasan, Haiyang Zhang, Stephen Hemminger, Sasha Levin,
	Lorenzo Pieralisi, Andrew Murray, Bjorn Helgaas, linux-hyperv,
	linux-pci, linux-kernel
  Cc: Long Li

From: Long Li <longli@microsoft.com>

Starting with Hyper-V PCI protocol version 1.3, the host VSP can send
PCI_BUS_RELATIONS2 and pass the vNUMA node information for devices on the bus.
The vNUMA node tells which guest NUMA node this device is on based on guest
VM configuration topology and physical device inforamtion.

The patch adds code to negotiate v1.3 and process PCI_BUS_RELATIONS2.

Signed-off-by: Long Li <longli@microsoft.com>
---
 drivers/pci/controller/pci-hyperv.c | 107 ++++++++++++++++++++++++++++
 1 file changed, 107 insertions(+)

diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index f2e028cfa7cd..488235563c7d 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -63,6 +63,7 @@
 enum pci_protocol_version_t {
 	PCI_PROTOCOL_VERSION_1_1 = PCI_MAKE_VERSION(1, 1),	/* Win10 */
 	PCI_PROTOCOL_VERSION_1_2 = PCI_MAKE_VERSION(1, 2),	/* RS1 */
+	PCI_PROTOCOL_VERSION_1_3 = PCI_MAKE_VERSION(1, 3),	/* VB */
 };
 
 #define CPU_AFFINITY_ALL	-1ULL
@@ -72,6 +73,7 @@ enum pci_protocol_version_t {
  * first.
  */
 static enum pci_protocol_version_t pci_protocol_versions[] = {
+	PCI_PROTOCOL_VERSION_1_3,
 	PCI_PROTOCOL_VERSION_1_2,
 	PCI_PROTOCOL_VERSION_1_1,
 };
@@ -124,6 +126,7 @@ enum pci_message_type {
 	PCI_RESOURCES_ASSIGNED2		= PCI_MESSAGE_BASE + 0x16,
 	PCI_CREATE_INTERRUPT_MESSAGE2	= PCI_MESSAGE_BASE + 0x17,
 	PCI_DELETE_INTERRUPT_MESSAGE2	= PCI_MESSAGE_BASE + 0x18, /* unused */
+	PCI_BUS_RELATIONS2		= PCI_MESSAGE_BASE + 0x19,
 	PCI_MESSAGE_MAXIMUM
 };
 
@@ -169,6 +172,26 @@ struct pci_function_description {
 	u32	ser;	/* serial number */
 } __packed;
 
+enum pci_device_description_flags {
+	HV_PCI_DEVICE_FLAG_NONE			= 0x0,
+	HV_PCI_DEVICE_FLAG_NUMA_AFFINITY	= 0x1,
+};
+
+struct pci_function_description2 {
+	u16	v_id;	/* vendor ID */
+	u16	d_id;	/* device ID */
+	u8	rev;
+	u8	prog_intf;
+	u8	subclass;
+	u8	base_class;
+	u32	subsystem_id;
+	union win_slot_encoding win_slot;
+	u32	ser;	/* serial number */
+	u32	flags;
+	u16	virtual_numa_node;
+	u16	reserved;
+} __packed;
+
 /**
  * struct hv_msi_desc
  * @vector:		IDT entry
@@ -304,6 +327,12 @@ struct pci_bus_relations {
 	struct pci_function_description func[0];
 } __packed;
 
+struct pci_bus_relations2 {
+	struct pci_incoming_message incoming;
+	u32 device_count;
+	struct pci_function_description2 func[0];
+} __packed;
+
 struct pci_q_res_req_response {
 	struct vmpacket_descriptor hdr;
 	s32 status;			/* negative values are failures */
@@ -1417,6 +1446,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 		break;
 
 	case PCI_PROTOCOL_VERSION_1_2:
+	case PCI_PROTOCOL_VERSION_1_3:
 		size = hv_compose_msi_req_v2(&ctxt.int_pkts.v2,
 					dest,
 					hpdev->desc.win_slot.slot,
@@ -1798,6 +1828,25 @@ static void hv_pci_remove_slots(struct hv_pcibus_device *hbus)
 	}
 }
 
+/*
+ * Set NUMA node for the devices on the bus
+ */
+static void pci_assign_numa_node(struct hv_pcibus_device *hbus)
+{
+	struct pci_dev *dev;
+	struct pci_bus *bus = hbus->pci_bus;
+	struct hv_pci_dev *hv_dev;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		hv_dev = get_pcichild_wslot(hbus, devfn_to_wslot(dev->devfn));
+		if (!hv_dev)
+			continue;
+
+		if (hv_dev->desc.flags & HV_PCI_DEVICE_FLAG_NUMA_AFFINITY)
+			set_dev_node(&dev->dev, hv_dev->desc.virtual_numa_node);
+	}
+}
+
 /**
  * create_root_hv_pci_bus() - Expose a new root PCI bus
  * @hbus:	Root PCI bus, as understood by this driver
@@ -1820,6 +1869,7 @@ static int create_root_hv_pci_bus(struct hv_pcibus_device *hbus)
 
 	pci_lock_rescan_remove();
 	pci_scan_child_bus(hbus->pci_bus);
+	pci_assign_numa_node(hbus);
 	pci_bus_assign_resources(hbus->pci_bus);
 	hv_pci_assign_slots(hbus);
 	pci_bus_add_devices(hbus->pci_bus);
@@ -2088,6 +2138,7 @@ static void pci_devices_present_work(struct work_struct *work)
 		 */
 		pci_lock_rescan_remove();
 		pci_scan_child_bus(hbus->pci_bus);
+		pci_assign_numa_node(hbus);
 		hv_pci_assign_slots(hbus);
 		pci_unlock_rescan_remove();
 		break;
@@ -2183,6 +2234,46 @@ static void hv_pci_devices_present(struct hv_pcibus_device *hbus,
 		kfree(dr);
 }
 
+/**
+ * hv_pci_devices_present2() - Handles list of new children
+ * @hbus:	Root PCI bus, as understood by this driver
+ * @relations2:	Packet from host listing children
+ *
+ * This function is the v2 version of hv_pci_devices_present()
+ */
+static void hv_pci_devices_present2(struct hv_pcibus_device *hbus,
+				    struct pci_bus_relations2 *relations)
+{
+	struct hv_dr_state *dr;
+	int i;
+
+	dr = kzalloc(offsetof(struct hv_dr_state, func) +
+		     (sizeof(struct hv_pcidev_description) *
+		      (relations->device_count)), GFP_NOWAIT);
+
+	if (!dr)
+		return;
+
+	dr->device_count = relations->device_count;
+	for (i = 0; i < dr->device_count; i++) {
+		dr->func[i].v_id = relations->func[i].v_id;
+		dr->func[i].d_id = relations->func[i].d_id;
+		dr->func[i].rev = relations->func[i].rev;
+		dr->func[i].prog_intf = relations->func[i].prog_intf;
+		dr->func[i].subclass = relations->func[i].subclass;
+		dr->func[i].base_class = relations->func[i].base_class;
+		dr->func[i].subsystem_id = relations->func[i].subsystem_id;
+		dr->func[i].win_slot = relations->func[i].win_slot;
+		dr->func[i].ser = relations->func[i].ser;
+		dr->func[i].flags = relations->func[i].flags;
+		dr->func[i].virtual_numa_node =
+			relations->func[i].virtual_numa_node;
+	}
+
+	if (hv_pci_start_relations_work(hbus, dr))
+		kfree(dr);
+}
+
 /**
  * hv_eject_device_work() - Asynchronously handles ejection
  * @work:	Work struct embedded in internal device struct
@@ -2288,6 +2379,7 @@ static void hv_pci_onchannelcallback(void *context)
 	struct pci_response *response;
 	struct pci_incoming_message *new_message;
 	struct pci_bus_relations *bus_rel;
+	struct pci_bus_relations2 *bus_rel2;
 	struct pci_dev_inval_block *inval;
 	struct pci_dev_incoming *dev_message;
 	struct hv_pci_dev *hpdev;
@@ -2355,6 +2447,21 @@ static void hv_pci_onchannelcallback(void *context)
 				hv_pci_devices_present(hbus, bus_rel);
 				break;
 
+			case PCI_BUS_RELATIONS2:
+
+				bus_rel2 = (struct pci_bus_relations2 *)buffer;
+				if (bytes_recvd <
+				    offsetof(struct pci_bus_relations2, func) +
+				    (sizeof(struct pci_function_description2) *
+				     (bus_rel2->device_count))) {
+					dev_err(&hbus->hdev->device,
+						"bus relations v2 too small\n");
+					break;
+				}
+
+				hv_pci_devices_present2(hbus, bus_rel2);
+				break;
+
 			case PCI_EJECT:
 
 				dev_message = (struct pci_dev_incoming *)buffer;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* RE: [EXTERNAL] [PATCH 1/2] PCI: hv: decouple the func definition in hv_dr_state from VSP message
  2019-11-23  1:57 [PATCH 1/2] PCI: hv: decouple the func definition in hv_dr_state from VSP message longli
  2019-11-23  1:57 ` [PATCH 2/2] PCI: hv: Add support for protocol 1.3 and support PCI_BUS_RELATIONS2 longli
@ 2019-11-30  4:30 ` Michael Kelley
  2019-12-02 23:29 ` Dexuan Cui
  2 siblings, 0 replies; 9+ messages in thread
From: Michael Kelley @ 2019-11-30  4:30 UTC (permalink / raw)
  To: longli, KY Srinivasan, Haiyang Zhang, Stephen Hemminger,
	Sasha Levin, Lorenzo Pieralisi, Andrew Murray, Bjorn Helgaas,
	linux-hyperv, linux-pci, linux-kernel
  Cc: Long Li

From: longli@linuxonhyperv.com  Sent: Friday, November 22, 2019 5:57 PM
> 
> From: Long Li <longli@microsoft.com>
> 
> hv_dr_state is used to find present PCI devices on the bus. The structure
> reuses struct pci_function_description from VSP message to describe a device.
> 
> To prepare support for pci_function_description v2, we need to decouple this
> dependence in hv_dr_state so it can work with both v1 and v2 VSP messages.
> 
> There is no functionality change.
> 
> Signed-off-by: Long Li <longli@microsoft.com>
> ---
>  drivers/pci/controller/pci-hyperv.c | 100 +++++++++++++++++++---------
>  1 file changed, 69 insertions(+), 31 deletions(-)
> 

Reviewed-by: Michael Kelley <mikelley@microsoft.com>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [EXTERNAL] [PATCH 2/2] PCI: hv: Add support for protocol 1.3 and support PCI_BUS_RELATIONS2
  2019-11-23  1:57 ` [PATCH 2/2] PCI: hv: Add support for protocol 1.3 and support PCI_BUS_RELATIONS2 longli
@ 2019-11-30  4:45   ` Michael Kelley
  2019-12-02 21:23     ` Long Li
  2019-12-02 23:59   ` Dexuan Cui
  1 sibling, 1 reply; 9+ messages in thread
From: Michael Kelley @ 2019-11-30  4:45 UTC (permalink / raw)
  To: longli, KY Srinivasan, Haiyang Zhang, Stephen Hemminger,
	Sasha Levin, Lorenzo Pieralisi, Andrew Murray, Bjorn Helgaas,
	linux-hyperv, linux-pci, linux-kernel, Dexuan Cui
  Cc: Long Li

From: longli@linuxonhyperv.com Sent: Friday, November 22, 2019 5:57 PM
> 
> From: Long Li <longli@microsoft.com>
> 
> Starting with Hyper-V PCI protocol version 1.3, the host VSP can send
> PCI_BUS_RELATIONS2 and pass the vNUMA node information for devices on the bus.
> The vNUMA node tells which guest NUMA node this device is on based on guest
> VM configuration topology and physical device inforamtion.
> 
> The patch adds code to negotiate v1.3 and process PCI_BUS_RELATIONS2.
> 
> Signed-off-by: Long Li <longli@microsoft.com>
> ---
>  drivers/pci/controller/pci-hyperv.c | 107 ++++++++++++++++++++++++++++
>  1 file changed, 107 insertions(+)
> 

[snip]

> +/*
> + * Set NUMA node for the devices on the bus
> + */
> +static void pci_assign_numa_node(struct hv_pcibus_device *hbus)
> +{
> +	struct pci_dev *dev;
> +	struct pci_bus *bus = hbus->pci_bus;
> +	struct hv_pci_dev *hv_dev;
> +
> +	list_for_each_entry(dev, &bus->devices, bus_list) {
> +		hv_dev = get_pcichild_wslot(hbus, devfn_to_wslot(dev->devfn));
> +		if (!hv_dev)
> +			continue;
> +
> +		if (hv_dev->desc.flags & HV_PCI_DEVICE_FLAG_NUMA_AFFINITY)
> +			set_dev_node(&dev->dev, hv_dev->desc.virtual_numa_node);
> +	}
> +}
> +

get_pcichild_wslot() gets a reference to the hv_dev, so a call to put_pcichild() is
needed to balance.

But more broadly, is the call to set_dev_node() operating on the correct
struct device?  There's a struct device in the struct hv_device, and also one in the
struct pci_dev.  Everything in this module seems to be operating on the former.
For example, all the dev_err() calls identify the struct device in struct hv_device.
And enumerating all the devices on a virtual PCI bus is done by iterating through
the hbus->children list, not the bus->devices list.  I don't completely understand
the interplay between the two struct device entries, but the difference makes
me wonder if the above code should be setting the NUMA node on the struct
device that's in struct hv_device.

Michael

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [EXTERNAL] [PATCH 2/2] PCI: hv: Add support for protocol 1.3 and support PCI_BUS_RELATIONS2
  2019-11-30  4:45   ` [EXTERNAL] " Michael Kelley
@ 2019-12-02 21:23     ` Long Li
  0 siblings, 0 replies; 9+ messages in thread
From: Long Li @ 2019-12-02 21:23 UTC (permalink / raw)
  To: Michael Kelley, longli, KY Srinivasan, Haiyang Zhang,
	Stephen Hemminger, Sasha Levin, Lorenzo Pieralisi, Andrew Murray,
	Bjorn Helgaas, linux-hyperv, linux-pci, linux-kernel, Dexuan Cui

>Subject: RE: [EXTERNAL] [PATCH 2/2] PCI: hv: Add support for protocol 1.3 and
>support PCI_BUS_RELATIONS2
>
>From: longli@linuxonhyperv.com Sent: Friday, November 22, 2019 5:57 PM
>>
>> From: Long Li <longli@microsoft.com>
>>
>> Starting with Hyper-V PCI protocol version 1.3, the host VSP can send
>> PCI_BUS_RELATIONS2 and pass the vNUMA node information for devices
>on the bus.
>> The vNUMA node tells which guest NUMA node this device is on based on
>> guest VM configuration topology and physical device inforamtion.
>>
>> The patch adds code to negotiate v1.3 and process PCI_BUS_RELATIONS2.
>>
>> Signed-off-by: Long Li <longli@microsoft.com>
>> ---
>>  drivers/pci/controller/pci-hyperv.c | 107
>> ++++++++++++++++++++++++++++
>>  1 file changed, 107 insertions(+)
>>
>
>[snip]
>
>> +/*
>> + * Set NUMA node for the devices on the bus  */ static void
>> +pci_assign_numa_node(struct hv_pcibus_device *hbus) {
>> +	struct pci_dev *dev;
>> +	struct pci_bus *bus = hbus->pci_bus;
>> +	struct hv_pci_dev *hv_dev;
>> +
>> +	list_for_each_entry(dev, &bus->devices, bus_list) {
>> +		hv_dev = get_pcichild_wslot(hbus, devfn_to_wslot(dev-
>>devfn));
>> +		if (!hv_dev)
>> +			continue;
>> +
>> +		if (hv_dev->desc.flags &
>HV_PCI_DEVICE_FLAG_NUMA_AFFINITY)
>> +			set_dev_node(&dev->dev, hv_dev-
>>desc.virtual_numa_node);
>> +	}
>> +}
>> +
>
>get_pcichild_wslot() gets a reference to the hv_dev, so a call to put_pcichild()
>is needed to balance.

Thanks for pointing this out! I will send v2 to fix this.

>
>But more broadly, is the call to set_dev_node() operating on the correct struct
>device?  There's a struct device in the struct hv_device, and also one in the
>struct pci_dev.  Everything in this module seems to be operating on the
>former.
>For example, all the dev_err() calls identify the struct device in struct
>hv_device.
>And enumerating all the devices on a virtual PCI bus is done by iterating
>through the hbus->children list, not the bus->devices list.  I don't completely
>understand the interplay between the two struct device entries, but the
>difference makes me wonder if the above code should be setting the NUMA
>node on the struct device that's in struct hv_device.

There are two "bus" variables in this function. "bus" is a "struct pci_bus" from the PCI layer. "hbus" is a "struct hv_pcibus_device" defined in pci-hyperv.

The parameter passed to set_dev_node is &dev->dev, the dev is enumerated from bus->devices. So dev (struct pci_dev) is from the PCI layer, this function sets the node on the device that will be used to probe and load its corresponding driver.

There is a separate list of hbus->children. It's represents pci-hyperv's view of devices on its bus. pci-hyperv presents those devices to PCI layer when pci_scan_child_bus() is called.

Long

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [EXTERNAL] [PATCH 1/2] PCI: hv: decouple the func definition in hv_dr_state from VSP message
  2019-11-23  1:57 [PATCH 1/2] PCI: hv: decouple the func definition in hv_dr_state from VSP message longli
  2019-11-23  1:57 ` [PATCH 2/2] PCI: hv: Add support for protocol 1.3 and support PCI_BUS_RELATIONS2 longli
  2019-11-30  4:30 ` [EXTERNAL] [PATCH 1/2] PCI: hv: decouple the func definition in hv_dr_state from VSP message Michael Kelley
@ 2019-12-02 23:29 ` Dexuan Cui
  2019-12-03  0:39   ` Long Li
  2 siblings, 1 reply; 9+ messages in thread
From: Dexuan Cui @ 2019-12-02 23:29 UTC (permalink / raw)
  To: longli, KY Srinivasan, Haiyang Zhang, Stephen Hemminger,
	Sasha Levin, Lorenzo Pieralisi, Andrew Murray, Bjorn Helgaas,
	linux-hyperv, linux-pci, linux-kernel
  Cc: Long Li

> From: linux-hyperv-owner@vger.kernel.org
> Sent: Friday, November 22, 2019 5:57 PM
> ... 
> +struct hv_pcidev_description {
> +	u16	v_id;	/* vendor ID */
> +	u16	d_id;	/* device ID */
> +	u8	rev;
> +	u8	prog_intf;
> +	u8	subclass;
> +	u8	base_class;
> +	u32	subsystem_id;
> +	union win_slot_encoding win_slot;

Change the spact to a TAB? :-)

>  /**
> - * hv_pci_devices_present() - Handles list of new children
> + * hv_pci_start_relations_work() - Queue work to start device discovery
>   * @hbus:	Root PCI bus, as understood by this driver
> - * @relations:	Packet from host listing children
> + * @dr:		The list of children returned from host
>   *
> - * This function is invoked whenever a new list of devices for
> - * this bus appears.
> + * Return:  0 on success, 1 on failure
>   */

Usually we return a negative value upon error, if possible.

> -static void hv_pci_devices_present(struct hv_pcibus_device *hbus,
> -				   struct pci_bus_relations *relations)
> +static int hv_pci_start_relations_work(struct hv_pcibus_device *hbus,
> +				       struct hv_dr_state *dr)
>  {
> -	struct hv_dr_state *dr;
>  	struct hv_dr_work *dr_wrk;
> -	unsigned long flags;
>  	bool pending_dr;
> +	unsigned long flags;
> 
>  	dr_wrk = kzalloc(sizeof(*dr_wrk), GFP_NOWAIT);
>  	if (!dr_wrk)
> -		return;
> -
> -	dr = kzalloc(offsetof(struct hv_dr_state, func) +
> -		     (sizeof(struct pci_function_description) *
> -		      (relations->device_count)), GFP_NOWAIT);
> -	if (!dr)  {
> -		kfree(dr_wrk);
> -		return;
> -	}
> +		return 1;

How about "return -ENOMEM;" ?
 
> @@ -3018,7 +3055,7 @@ static void hv_pci_bus_exit(struct hv_device *hdev)
>  		struct pci_packet teardown_packet;
>  		u8 buffer[sizeof(struct pci_message)];
>  	} pkt;
> -	struct pci_bus_relations relations;
> +	struct hv_dr_state *dr;
>  	struct hv_pci_compl comp_pkt;
>  	int ret;
> 
> @@ -3030,8 +3067,9 @@ static void hv_pci_bus_exit(struct hv_device *hdev)
>  		return;
> 
>  	/* Delete any children which might still exist. */
> -	memset(&relations, 0, sizeof(relations));
> -	hv_pci_devices_present(hbus, &relations);
> +	dr = kzalloc(sizeof(*dr), GFP_ATOMIC);

Here we are in a process context, so GFP_KERNEL is preferred.

> +	if (dr && hv_pci_start_relations_work(hbus, dr))
> +		kfree(dr);

Thanks,
-- Dexuan

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [EXTERNAL] [PATCH 2/2] PCI: hv: Add support for protocol 1.3 and support PCI_BUS_RELATIONS2
  2019-11-23  1:57 ` [PATCH 2/2] PCI: hv: Add support for protocol 1.3 and support PCI_BUS_RELATIONS2 longli
  2019-11-30  4:45   ` [EXTERNAL] " Michael Kelley
@ 2019-12-02 23:59   ` Dexuan Cui
  2019-12-03  0:49     ` Long Li
  1 sibling, 1 reply; 9+ messages in thread
From: Dexuan Cui @ 2019-12-02 23:59 UTC (permalink / raw)
  To: longli, KY Srinivasan, Haiyang Zhang, Stephen Hemminger,
	Sasha Levin, Lorenzo Pieralisi, Andrew Murray, Bjorn Helgaas,
	linux-hyperv, linux-pci, linux-kernel
  Cc: Long Li

> From: linux-hyperv-owner@vger.kernel.org
> Sent: Friday, November 22, 2019 5:57 PM
>  ...
> @@ -63,6 +63,7 @@
>  enum pci_protocol_version_t {
>  	PCI_PROTOCOL_VERSION_1_1 = PCI_MAKE_VERSION(1, 1),	/* Win10
> */
>  	PCI_PROTOCOL_VERSION_1_2 = PCI_MAKE_VERSION(1, 2),	/* RS1 */
> +	PCI_PROTOCOL_VERSION_1_3 = PCI_MAKE_VERSION(1, 3),	/* VB */
>  };

What is "VB" ? Can we use a more meaningful name here? :-)

> +struct pci_function_description2 {
> +	u16	v_id;	/* vendor ID */
> +	u16	d_id;	/* device ID */
> +	u8	rev;
> +	u8	prog_intf;
> +	u8	subclass;
> +	u8	base_class;
> +	u32	subsystem_id;
> +	union win_slot_encoding win_slot;

space -> TAB?

> +/*
> + * Set NUMA node for the devices on the bus
> + */
> +static void pci_assign_numa_node(struct hv_pcibus_device *hbus)

IMO we'd better add a "hv_" prefix to this function's name, otherwise it looks
more like a generic PCI subsystem API.

> +{
> +	struct pci_dev *dev;
> +	struct pci_bus *bus = hbus->pci_bus;
> +	struct hv_pci_dev *hv_dev;
> +
> +	list_for_each_entry(dev, &bus->devices, bus_list) {
> +		hv_dev = get_pcichild_wslot(hbus, devfn_to_wslot(dev->devfn));
> +		if (!hv_dev)
> +			continue;
> +
> +		if (hv_dev->desc.flags & HV_PCI_DEVICE_FLAG_NUMA_AFFINITY)
> +			set_dev_node(&dev->dev, hv_dev->desc.virtual_numa_node);
> +	}
> +}

Can you please give a brief background introduction to dev->numa_node,
e.g. how is it used here? -- is it used when a PCI device driver (e.g. the mlx
driver) asks the kernel to allocate memory for DMA, or allocate MSI interrupts?
How big is the performance gain in the tests? I'm curious as it looks unclear
to me how dev->numa_node is used by the PCI subsystem.

Thanks,
-- Dexuan

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [EXTERNAL] [PATCH 1/2] PCI: hv: decouple the func definition in hv_dr_state from VSP message
  2019-12-02 23:29 ` Dexuan Cui
@ 2019-12-03  0:39   ` Long Li
  0 siblings, 0 replies; 9+ messages in thread
From: Long Li @ 2019-12-03  0:39 UTC (permalink / raw)
  To: Dexuan Cui, longli, KY Srinivasan, Haiyang Zhang,
	Stephen Hemminger, Sasha Levin, Lorenzo Pieralisi, Andrew Murray,
	Bjorn Helgaas, linux-hyperv, linux-pci, linux-kernel

>Subject: RE: [EXTERNAL] [PATCH 1/2] PCI: hv: decouple the func definition in
>hv_dr_state from VSP message
>
>> From: linux-hyperv-owner@vger.kernel.org
>> Sent: Friday, November 22, 2019 5:57 PM ...
>> +struct hv_pcidev_description {
>> +	u16	v_id;	/* vendor ID */
>> +	u16	d_id;	/* device ID */
>> +	u8	rev;
>> +	u8	prog_intf;
>> +	u8	subclass;
>> +	u8	base_class;
>> +	u32	subsystem_id;
>> +	union win_slot_encoding win_slot;
>
>Change the spact to a TAB? :-)
>
>>  /**
>> - * hv_pci_devices_present() - Handles list of new children
>> + * hv_pci_start_relations_work() - Queue work to start device
>> + discovery
>>   * @hbus:	Root PCI bus, as understood by this driver
>> - * @relations:	Packet from host listing children
>> + * @dr:		The list of children returned from host
>>   *
>> - * This function is invoked whenever a new list of devices for
>> - * this bus appears.
>> + * Return:  0 on success, 1 on failure
>>   */
>
>Usually we return a negative value upon error, if possible.
>
>> -static void hv_pci_devices_present(struct hv_pcibus_device *hbus,
>> -				   struct pci_bus_relations *relations)
>> +static int hv_pci_start_relations_work(struct hv_pcibus_device *hbus,
>> +				       struct hv_dr_state *dr)
>>  {
>> -	struct hv_dr_state *dr;
>>  	struct hv_dr_work *dr_wrk;
>> -	unsigned long flags;
>>  	bool pending_dr;
>> +	unsigned long flags;
>>
>>  	dr_wrk = kzalloc(sizeof(*dr_wrk), GFP_NOWAIT);
>>  	if (!dr_wrk)
>> -		return;
>> -
>> -	dr = kzalloc(offsetof(struct hv_dr_state, func) +
>> -		     (sizeof(struct pci_function_description) *
>> -		      (relations->device_count)), GFP_NOWAIT);
>> -	if (!dr)  {
>> -		kfree(dr_wrk);
>> -		return;
>> -	}
>> +		return 1;
>
>How about "return -ENOMEM;" ?
>
>> @@ -3018,7 +3055,7 @@ static void hv_pci_bus_exit(struct hv_device
>*hdev)
>>  		struct pci_packet teardown_packet;
>>  		u8 buffer[sizeof(struct pci_message)];
>>  	} pkt;
>> -	struct pci_bus_relations relations;
>> +	struct hv_dr_state *dr;
>>  	struct hv_pci_compl comp_pkt;
>>  	int ret;
>>
>> @@ -3030,8 +3067,9 @@ static void hv_pci_bus_exit(struct hv_device
>*hdev)
>>  		return;
>>
>>  	/* Delete any children which might still exist. */
>> -	memset(&relations, 0, sizeof(relations));
>> -	hv_pci_devices_present(hbus, &relations);
>> +	dr = kzalloc(sizeof(*dr), GFP_ATOMIC);
>
>Here we are in a process context, so GFP_KERNEL is preferred.
>
>> +	if (dr && hv_pci_start_relations_work(hbus, dr))
>> +		kfree(dr);
>
>Thanks,
>-- Dexuan

Thanks! I will send v2 to address those comments.

Long

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [EXTERNAL] [PATCH 2/2] PCI: hv: Add support for protocol 1.3 and support PCI_BUS_RELATIONS2
  2019-12-02 23:59   ` Dexuan Cui
@ 2019-12-03  0:49     ` Long Li
  0 siblings, 0 replies; 9+ messages in thread
From: Long Li @ 2019-12-03  0:49 UTC (permalink / raw)
  To: Dexuan Cui, longli, KY Srinivasan, Haiyang Zhang,
	Stephen Hemminger, Sasha Levin, Lorenzo Pieralisi, Andrew Murray,
	Bjorn Helgaas, linux-hyperv, linux-pci, linux-kernel

>Subject: RE: [EXTERNAL] [PATCH 2/2] PCI: hv: Add support for protocol 1.3 and
>support PCI_BUS_RELATIONS2
>
>> From: linux-hyperv-owner@vger.kernel.org
>> Sent: Friday, November 22, 2019 5:57 PM  ...
>> @@ -63,6 +63,7 @@
>>  enum pci_protocol_version_t {
>>  	PCI_PROTOCOL_VERSION_1_1 = PCI_MAKE_VERSION(1, 1),	/*
>Win10
>> */
>>  	PCI_PROTOCOL_VERSION_1_2 = PCI_MAKE_VERSION(1, 2),	/* RS1
>*/
>> +	PCI_PROTOCOL_VERSION_1_3 = PCI_MAKE_VERSION(1, 3),	/* VB
>*/
>>  };
>
>What is "VB" ? Can we use a more meaningful name here? :-)

Vibranium.

>
>> +struct pci_function_description2 {
>> +	u16	v_id;	/* vendor ID */
>> +	u16	d_id;	/* device ID */
>> +	u8	rev;
>> +	u8	prog_intf;
>> +	u8	subclass;
>> +	u8	base_class;
>> +	u32	subsystem_id;
>> +	union win_slot_encoding win_slot;
>
>space -> TAB?
>
>> +/*
>> + * Set NUMA node for the devices on the bus  */ static void
>> +pci_assign_numa_node(struct hv_pcibus_device *hbus)
>
>IMO we'd better add a "hv_" prefix to this function's name, otherwise it looks
>more like a generic PCI subsystem API.

I will send v2 to address comments above.

>
>> +{
>> +	struct pci_dev *dev;
>> +	struct pci_bus *bus = hbus->pci_bus;
>> +	struct hv_pci_dev *hv_dev;
>> +
>> +	list_for_each_entry(dev, &bus->devices, bus_list) {
>> +		hv_dev = get_pcichild_wslot(hbus, devfn_to_wslot(dev-
>>devfn));
>> +		if (!hv_dev)
>> +			continue;
>> +
>> +		if (hv_dev->desc.flags &
>HV_PCI_DEVICE_FLAG_NUMA_AFFINITY)
>> +			set_dev_node(&dev->dev, hv_dev-
>>desc.virtual_numa_node);
>> +	}
>> +}
>
>Can you please give a brief background introduction to dev->numa_node, e.g.
>how is it used here? -- is it used when a PCI device driver (e.g. the mlx
>driver) asks the kernel to allocate memory for DMA, or allocate MSI interrupts?
>How big is the performance gain in the tests? I'm curious as it looks unclear to
>me how dev->numa_node is used by the PCI subsystem.

numa_node can be used by a device driver (if it's numa aware) to setup its internal data structures and allocate its MSI.

As an example, you can look at "drivers/net/ethernet/mellanox/mlx4/main.c: mlx4_load_one()":
It stores the value at : dev->numa_node = dev_to_node(&pdev->dev);
after that, dev->numa_node  is used through driver.

numa_node is also exported though /sys to user-mode, so a NUMA aware application (e.g. MPI) can figure out how to pin itself to selected CPUs for the best latency.

>
>Thanks,
>-- Dexuan

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2019-12-03  0:50 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-23  1:57 [PATCH 1/2] PCI: hv: decouple the func definition in hv_dr_state from VSP message longli
2019-11-23  1:57 ` [PATCH 2/2] PCI: hv: Add support for protocol 1.3 and support PCI_BUS_RELATIONS2 longli
2019-11-30  4:45   ` [EXTERNAL] " Michael Kelley
2019-12-02 21:23     ` Long Li
2019-12-02 23:59   ` Dexuan Cui
2019-12-03  0:49     ` Long Li
2019-11-30  4:30 ` [EXTERNAL] [PATCH 1/2] PCI: hv: decouple the func definition in hv_dr_state from VSP message Michael Kelley
2019-12-02 23:29 ` Dexuan Cui
2019-12-03  0:39   ` Long Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).