linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] Drivers: hv: Implement VF association based on serial number
@ 2016-12-08  8:33 kys
  2016-12-08  8:33 ` [PATCH 1/3] hyperv: Move hv_pci_dev and related structs to hyperv.h kys
  0 siblings, 1 reply; 29+ messages in thread
From: kys @ 2016-12-08  8:33 UTC (permalink / raw)
  To: gregkh, linux-kernel, devel, olaf, apw, vkuznets, jasowang,
	leann.ogasawara, bjorn.helgaas
  Cc: K. Y. Srinivasan

From: K. Y. Srinivasan <kys@microsoft.com>

Implement VF association based on serial number published by
the host.

Greg, as promised here is the patchset that would use the API for
detecting if the device is a vmbus device.
If you can take the first two patches, we can submit the netvsc
patch to the net-next tree.

Haiyang Zhang (3):
  hyperv: Move hv_pci_dev and related structs to hyperv.h
  hyperv: Add a function to detect if the device is a vmbus dev
  hv_netvsc: Implement VF matching based on serial numbers

 drivers/hv/vmbus_drv.c          |    6 ++
 drivers/net/hyperv/netvsc_drv.c |   55 ++++++++++++++++++++--
 drivers/pci/host/pci-hyperv.c   |   91 -----------------------------------
 include/linux/hyperv.h          |  100 ++++++++++++++++++++++++++++++++++++++-
 4 files changed, 156 insertions(+), 96 deletions(-)

-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 1/3] hyperv: Move hv_pci_dev and related structs to hyperv.h
  2016-12-08  8:33 [PATCH 0/3] Drivers: hv: Implement VF association based on serial number kys
@ 2016-12-08  8:33 ` kys
  2016-12-08  8:33   ` [PATCH 2/3] hyperv: Add a function to detect if the device is a vmbus dev kys
                     ` (3 more replies)
  0 siblings, 4 replies; 29+ messages in thread
From: kys @ 2016-12-08  8:33 UTC (permalink / raw)
  To: gregkh, linux-kernel, devel, olaf, apw, vkuznets, jasowang,
	leann.ogasawara, bjorn.helgaas
  Cc: Haiyang Zhang, K. Y. Srinivasan

From: Haiyang Zhang <haiyangz@microsoft.com>

Move some vPCI data structures to hyperv.h,
because we share them with other module.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
---
 drivers/pci/host/pci-hyperv.c |   91 --------------------------------------
 include/linux/hyperv.h        |   98 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 97 insertions(+), 92 deletions(-)

diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
index a63c3a4..7a97b4f 100644
--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -49,12 +49,10 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/pci.h>
 #include <linux/semaphore.h>
 #include <linux/irqdomain.h>
 #include <asm/irqdomain.h>
 #include <asm/apic.h>
-#include <linux/msi.h>
 #include <linux/hyperv.h>
 #include <asm/mshyperv.h>
 
@@ -122,35 +120,6 @@ enum pci_message_type {
 	u32 version;
 } __packed;
 
-/*
- * Function numbers are 8-bits wide on Express, as interpreted through ARI,
- * which is all this driver does.  This representation is the one used in
- * Windows, which is what is expected when sending this back and forth with
- * the Hyper-V parent partition.
- */
-union win_slot_encoding {
-	struct {
-		u32	func:8;
-		u32	reserved:24;
-	} bits;
-	u32 slot;
-} __packed;
-
-/*
- * Pretty much as defined in the PCI Specifications.
- */
-struct pci_function_description {
-	u16	v_id;	/* vendor ID */
-	u16	d_id;	/* device ID */
-	u8	rev;
-	u8	prog_intf;
-	u8	subclass;
-	u8	base_class;
-	u32	subsystem_id;
-	union win_slot_encoding win_slot;
-	u32	ser;	/* serial number */
-} __packed;
-
 /**
  * struct hv_msi_desc
  * @vector:		IDT entry
@@ -345,41 +314,6 @@ struct retarget_msi_interrupt {
  * Driver specific state.
  */
 
-enum hv_pcibus_state {
-	hv_pcibus_init = 0,
-	hv_pcibus_probed,
-	hv_pcibus_installed,
-	hv_pcibus_maximum
-};
-
-struct hv_pcibus_device {
-	struct pci_sysdata sysdata;
-	enum hv_pcibus_state state;
-	atomic_t remove_lock;
-	struct hv_device *hdev;
-	resource_size_t low_mmio_space;
-	resource_size_t high_mmio_space;
-	struct resource *mem_config;
-	struct resource *low_mmio_res;
-	struct resource *high_mmio_res;
-	struct completion *survey_event;
-	struct completion remove_event;
-	struct pci_bus *pci_bus;
-	spinlock_t config_lock;	/* Avoid two threads writing index page */
-	spinlock_t device_list_lock;	/* Protect lists below */
-	void __iomem *cfg_addr;
-
-	struct semaphore enum_sem;
-	struct list_head resources_for_children;
-
-	struct list_head children;
-	struct list_head dr_list;
-
-	struct msi_domain_info msi_info;
-	struct msi_controller msi_chip;
-	struct irq_domain *irq_domain;
-};
-
 /*
  * Tracks "Device Relations" messages from the host, which must be both
  * processed in order and deferred so that they don't run in the context
@@ -396,14 +330,6 @@ struct hv_dr_state {
 	struct pci_function_description func[0];
 };
 
-enum hv_pcichild_state {
-	hv_pcichild_init = 0,
-	hv_pcichild_requirements,
-	hv_pcichild_resourced,
-	hv_pcichild_ejecting,
-	hv_pcichild_maximum
-};
-
 enum hv_pcidev_ref_reason {
 	hv_pcidev_ref_invalid = 0,
 	hv_pcidev_ref_initial,
@@ -415,23 +341,6 @@ enum hv_pcidev_ref_reason {
 	hv_pcidev_ref_max
 };
 
-struct hv_pci_dev {
-	/* List protected by pci_rescan_remove_lock */
-	struct list_head list_entry;
-	atomic_t refs;
-	enum hv_pcichild_state state;
-	struct pci_function_description desc;
-	bool reported_missing;
-	struct hv_pcibus_device *hbus;
-	struct work_struct wrk;
-
-	/*
-	 * What would be observed if one wrote 0xFFFFFFFF to a BAR and then
-	 * read it back, for each of the BAR offsets within config space.
-	 */
-	u32 probed_bar[6];
-};
-
 struct hv_pci_compl {
 	struct completion host_event;
 	s32 completion_status;
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 42fe43f..ff6cd3e 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -36,7 +36,8 @@
 #include <linux/completion.h>
 #include <linux/device.h>
 #include <linux/mod_devicetable.h>
-
+#include <linux/pci.h>
+#include <linux/msi.h>
 
 #define MAX_PAGE_BUFFER_COUNT				32
 #define MAX_MULTIPAGE_BUFFER_COUNT			32 /* 128K */
@@ -1590,5 +1591,100 @@ static inline void commit_rd_index(struct vmbus_channel *channel)
 	hv_signal_on_read(channel);
 }
 
+/* vPCI structures */
+
+/*
+ * Function numbers are 8-bits wide on Express, as interpreted through ARI,
+ * which is all this driver does.  This representation is the one used in
+ * Windows, which is what is expected when sending this back and forth with
+ * the Hyper-V parent partition.
+ */
+union win_slot_encoding {
+	struct {
+		u32	func:8;
+		u32	reserved:24;
+	} bits;
+	u32 slot;
+} __packed;
+
+/*
+ * Pretty much as defined in the PCI Specifications.
+ */
+struct pci_function_description {
+	u16	v_id;	/* vendor ID */
+	u16	d_id;	/* device ID */
+	u8	rev;
+	u8	prog_intf;
+	u8	subclass;
+	u8	base_class;
+	u32	subsystem_id;
+	union win_slot_encoding win_slot;
+	u32	ser;	/* serial number */
+} __packed;
+
+
+/*
+ * Driver specific state.
+ */
+
+enum hv_pcibus_state {
+	hv_pcibus_init = 0,
+	hv_pcibus_probed,
+	hv_pcibus_installed,
+	hv_pcibus_maximum
+};
+
+struct hv_pcibus_device {
+	struct pci_sysdata sysdata;
+	enum hv_pcibus_state state;
+	atomic_t remove_lock;
+	struct hv_device *hdev;
+	resource_size_t low_mmio_space;
+	resource_size_t high_mmio_space;
+	struct resource *mem_config;
+	struct resource *low_mmio_res;
+	struct resource *high_mmio_res;
+	struct completion *survey_event;
+	struct completion remove_event;
+	struct pci_bus *pci_bus;
+	spinlock_t config_lock;	/* Avoid two threads writing index page */
+	spinlock_t device_list_lock;	/* Protect lists below */
+	void __iomem *cfg_addr;
+
+	struct semaphore enum_sem;
+	struct list_head resources_for_children;
+
+	struct list_head children;
+	struct list_head dr_list;
+
+	struct msi_domain_info msi_info;
+	struct msi_controller msi_chip;
+	struct irq_domain *irq_domain;
+};
+
+enum hv_pcichild_state {
+	hv_pcichild_init = 0,
+	hv_pcichild_requirements,
+	hv_pcichild_resourced,
+	hv_pcichild_ejecting,
+	hv_pcichild_maximum
+};
+
+struct hv_pci_dev {
+	/* List protected by pci_rescan_remove_lock */
+	struct list_head list_entry;
+	atomic_t refs;
+	enum hv_pcichild_state state;
+	struct pci_function_description desc;
+	bool reported_missing;
+	struct hv_pcibus_device *hbus;
+	struct work_struct wrk;
+
+	/*
+	 * What would be observed if one wrote 0xFFFFFFFF to a BAR and then
+	 * read it back, for each of the BAR offsets within config space.
+	 */
+	u32 probed_bar[6];
+};
 
 #endif /* _HYPERV_H */
-- 
1.7.4.1

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 2/3] hyperv: Add a function to detect if the device is a vmbus dev
  2016-12-08  8:33 ` [PATCH 1/3] hyperv: Move hv_pci_dev and related structs to hyperv.h kys
@ 2016-12-08  8:33   ` kys
  2016-12-08  8:33   ` [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers kys
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 29+ messages in thread
From: kys @ 2016-12-08  8:33 UTC (permalink / raw)
  To: gregkh, linux-kernel, devel, olaf, apw, vkuznets, jasowang,
	leann.ogasawara, bjorn.helgaas
  Cc: Haiyang Zhang, K. Y. Srinivasan

From: Haiyang Zhang <haiyangz@microsoft.com>

On Hyper-V, every VF interface has a corresponding synthetic
interface managed by netvsc that share the same MAC
address. netvsc registers for netdev events to manage this
association. Currently we use the MAC address to manage this
association but going forward, we want to use a serial number that
the host publishes. To do this we need functionality similar
to dev_is_pci() for the vmbus devices. Implement such a function.

This function will be used in subsequent patches to netvsc and in the
interest of eliminating cross tree dependencies, this patch is being
submitted first.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
---
 drivers/hv/vmbus_drv.c |    6 ++++++
 include/linux/hyperv.h |    2 ++
 2 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 230c62e..64c40f3 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -845,6 +845,12 @@ struct onmessage_work_context {
 	struct hv_message msg;
 };
 
+bool dev_is_vmbus(struct device *dev)
+{
+	return dev->bus == &hv_bus;
+}
+EXPORT_SYMBOL_GPL(dev_is_vmbus);
+
 static void vmbus_onmessage_work(struct work_struct *work)
 {
 	struct onmessage_work_context *ctx;
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index ff6cd3e..2abb1bf 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -947,6 +947,8 @@ static inline void clear_low_latency_mode(struct vmbus_channel *c)
 	c->low_latency = false;
 }
 
+bool dev_is_vmbus(struct device *dev);
+
 void vmbus_onmessage(void *context);
 
 int vmbus_request_offers(void);
-- 
1.7.4.1

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers
  2016-12-08  8:33 ` [PATCH 1/3] hyperv: Move hv_pci_dev and related structs to hyperv.h kys
  2016-12-08  8:33   ` [PATCH 2/3] hyperv: Add a function to detect if the device is a vmbus dev kys
@ 2016-12-08  8:33   ` kys
  2016-12-08 15:56     ` Greg KH
  2016-12-08  9:49   ` [PATCH 1/3] hyperv: Move hv_pci_dev and related structs to hyperv.h kbuild test robot
  2016-12-08 11:15   ` kbuild test robot
  3 siblings, 1 reply; 29+ messages in thread
From: kys @ 2016-12-08  8:33 UTC (permalink / raw)
  To: gregkh, linux-kernel, devel, olaf, apw, vkuznets, jasowang,
	leann.ogasawara, bjorn.helgaas
  Cc: Haiyang Zhang, K. Y. Srinivasan

From: Haiyang Zhang <haiyangz@microsoft.com>

We currently use MAC address to match VF and synthetic NICs. Hyper-V
provides a serial number to both devices for this purpose. This patch
implements the matching based on VF serial numbers. This is the way
specified by the protocol and more reliable.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
---
 drivers/net/hyperv/netvsc_drv.c |   55 ++++++++++++++++++++++++++++++++++++---
 1 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 9522763..c5778cf 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1165,9 +1165,10 @@ static void netvsc_free_netdev(struct net_device *netdev)
 	free_netdev(netdev);
 }
 
-static struct net_device *get_netvsc_bymac(const u8 *mac)
+static struct net_device *get_netvsc_byvfser(u32 vfser)
 {
 	struct net_device *dev;
+	struct net_device_context *ndev_ctx;
 
 	ASSERT_RTNL();
 
@@ -1175,7 +1176,8 @@ static void netvsc_free_netdev(struct net_device *netdev)
 		if (dev->netdev_ops != &device_ops)
 			continue;	/* not a netvsc device */
 
-		if (ether_addr_equal(mac, dev->perm_addr))
+		ndev_ctx = netdev_priv(dev);
+		if (ndev_ctx->vf_serial == vfser)
 			return dev;
 	}
 
@@ -1205,21 +1207,66 @@ static void netvsc_free_netdev(struct net_device *netdev)
 	return NULL;
 }
 
+static u32 netvsc_get_vfser(struct net_device *vf_netdev)
+{
+	struct device *dev;
+	struct hv_device *hdev;
+	struct hv_pcibus_device *hbus = NULL;
+	struct list_head *iter;
+	struct hv_pci_dev *hpdev;
+	unsigned long flags;
+	u32 vfser = 0;
+	u32 count = 0;
+
+	for (dev = &vf_netdev->dev; dev; dev = dev->parent) {
+		if (!dev_is_vmbus(dev))
+			continue;
+
+		hdev = device_to_hv_device(dev);
+		if (hdev->device_id != HV_PCIE)
+			continue;
+
+		hbus = hv_get_drvdata(hdev);
+		break;
+	}
+
+	if (!hbus)
+		return 0;
+
+	spin_lock_irqsave(&hbus->device_list_lock, flags);
+	list_for_each(iter, &hbus->children) {
+		hpdev = container_of(iter, struct hv_pci_dev, list_entry);
+		vfser = hpdev->desc.ser;
+		count++;
+	}
+	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
+
+	if (count == 1)
+		return vfser;
+
+	return 0;
+}
+
 static int netvsc_register_vf(struct net_device *vf_netdev)
 {
 	struct net_device *ndev;
 	struct net_device_context *net_device_ctx;
 	struct netvsc_device *netvsc_dev;
+	u32 vfser;
 
 	if (vf_netdev->addr_len != ETH_ALEN)
 		return NOTIFY_DONE;
 
+	vfser = netvsc_get_vfser(vf_netdev);
+	if (!vfser)
+		return NOTIFY_DONE;
+
 	/*
-	 * We will use the MAC address to locate the synthetic interface to
+	 * We will use the VF serial to locate the synthetic interface to
 	 * associate with the VF interface. If we don't find a matching
 	 * synthetic interface, move on.
 	 */
-	ndev = get_netvsc_bymac(vf_netdev->perm_addr);
+	ndev = get_netvsc_byvfser(vfser);
 	if (!ndev)
 		return NOTIFY_DONE;
 
-- 
1.7.4.1

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/3] hyperv: Move hv_pci_dev and related structs to hyperv.h
  2016-12-08  8:33 ` [PATCH 1/3] hyperv: Move hv_pci_dev and related structs to hyperv.h kys
  2016-12-08  8:33   ` [PATCH 2/3] hyperv: Add a function to detect if the device is a vmbus dev kys
  2016-12-08  8:33   ` [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers kys
@ 2016-12-08  9:49   ` kbuild test robot
  2016-12-08 15:26     ` KY Srinivasan
  2016-12-08 11:15   ` kbuild test robot
  3 siblings, 1 reply; 29+ messages in thread
From: kbuild test robot @ 2016-12-08  9:49 UTC (permalink / raw)
  To: kys
  Cc: kbuild-all, gregkh, linux-kernel, devel, olaf, apw, vkuznets,
	jasowang, leann.ogasawara, bjorn.helgaas, Haiyang Zhang

[-- Attachment #1: Type: text/plain, Size: 1374 bytes --]

Hi Haiyang,

[auto build test ERROR on next-20161208]
[also build test ERROR on v4.9-rc8]
[cannot apply to linus/master linux/master pci/next v4.9-rc8 v4.9-rc7 v4.9-rc6]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/kys-exchange-microsoft-com/hyperv-Move-hv_pci_dev-and-related-structs-to-hyperv-h/20161208-171643
config: x86_64-randconfig-x012-201649 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All errors (new ones prefixed by >>):

   In file included from drivers/hid/hid-hyperv.c:22:0:
>> include/linux/hyperv.h:1660:25: error: field 'msi_info' has incomplete type
     struct msi_domain_info msi_info;
                            ^~~~~~~~

vim +/msi_info +1660 include/linux/hyperv.h

  1654		struct semaphore enum_sem;
  1655		struct list_head resources_for_children;
  1656	
  1657		struct list_head children;
  1658		struct list_head dr_list;
  1659	
> 1660		struct msi_domain_info msi_info;
  1661		struct msi_controller msi_chip;
  1662		struct irq_domain *irq_domain;
  1663	};

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 28304 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/3] hyperv: Move hv_pci_dev and related structs to hyperv.h
  2016-12-08  8:33 ` [PATCH 1/3] hyperv: Move hv_pci_dev and related structs to hyperv.h kys
                     ` (2 preceding siblings ...)
  2016-12-08  9:49   ` [PATCH 1/3] hyperv: Move hv_pci_dev and related structs to hyperv.h kbuild test robot
@ 2016-12-08 11:15   ` kbuild test robot
  3 siblings, 0 replies; 29+ messages in thread
From: kbuild test robot @ 2016-12-08 11:15 UTC (permalink / raw)
  To: kys
  Cc: kbuild-all, gregkh, linux-kernel, devel, olaf, apw, vkuznets,
	jasowang, leann.ogasawara, bjorn.helgaas, Haiyang Zhang

[-- Attachment #1: Type: text/plain, Size: 1444 bytes --]

Hi Haiyang,

[auto build test ERROR on next-20161208]
[also build test ERROR on v4.9-rc8]
[cannot apply to linus/master linux/master pci/next v4.9-rc8 v4.9-rc7 v4.9-rc6]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/kys-exchange-microsoft-com/hyperv-Move-hv_pci_dev-and-related-structs-to-hyperv-h/20161208-171643
config: x86_64-allyesdebian (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All errors (new ones prefixed by >>):

   In file included from drivers/input/serio/hyperv-keyboard.c:18:0:
>> include/linux/hyperv.h:1654:19: error: field 'enum_sem' has incomplete type
     struct semaphore enum_sem;
                      ^~~~~~~~

vim +/enum_sem +1654 include/linux/hyperv.h

  1648		struct completion remove_event;
  1649		struct pci_bus *pci_bus;
  1650		spinlock_t config_lock;	/* Avoid two threads writing index page */
  1651		spinlock_t device_list_lock;	/* Protect lists below */
  1652		void __iomem *cfg_addr;
  1653	
> 1654		struct semaphore enum_sem;
  1655		struct list_head resources_for_children;
  1656	
  1657		struct list_head children;

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 38037 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* RE: [PATCH 1/3] hyperv: Move hv_pci_dev and related structs to hyperv.h
  2016-12-08  9:49   ` [PATCH 1/3] hyperv: Move hv_pci_dev and related structs to hyperv.h kbuild test robot
@ 2016-12-08 15:26     ` KY Srinivasan
  2016-12-08 15:56       ` gregkh
  0 siblings, 1 reply; 29+ messages in thread
From: KY Srinivasan @ 2016-12-08 15:26 UTC (permalink / raw)
  To: kbuild test robot, kys
  Cc: olaf, gregkh, jasowang, Haiyang Zhang, linux-kernel,
	bjorn.helgaas, kbuild-all, apw, devel, leann.ogasawara



> -----Original Message-----
> From: devel [mailto:driverdev-devel-bounces@linuxdriverproject.org] On
> Behalf Of kbuild test robot
> Sent: Thursday, December 8, 2016 1:50 AM
> To: kys@exchange.microsoft.com
> Cc: olaf@aepfle.de; gregkh@linuxfoundation.org; jasowang@redhat.com;
> Haiyang Zhang <haiyangz@microsoft.com>; linux-kernel@vger.kernel.org;
> bjorn.helgaas@gmail.com; kbuild-all@01.org; apw@canonical.com;
> devel@linuxdriverproject.org; leann.ogasawara@canonical.com
> Subject: Re: [PATCH 1/3] hyperv: Move hv_pci_dev and related structs to
> hyperv.h
> 
> Hi Haiyang,
> 
> [auto build test ERROR on next-20161208]
> [also build test ERROR on v4.9-rc8]
> [cannot apply to linus/master linux/master pci/next v4.9-rc8 v4.9-rc7 v4.9-
> rc6]
> [if your patch is applied to the wrong git tree, please drop us a note to help
> improve the system]
> 
> url:
> https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub
> .com%2F0day-ci%2Flinux%2Fcommits%2Fkys-exchange-microsoft-
> com%2Fhyperv-Move-hv_pci_dev-and-related-structs-to-hyperv-
> h%2F20161208-
> 171643&data=02%7C01%7Ckys%40microsoft.com%7Ca000870068f645a67492
> 08d41f4fa963%7C72f988bf86f141af91ab2d7cd011db47%7C1%7C0%7C6361678
> 75686353145&sdata=2cR2dMEa6sjz94NYUFMx5qC8hEVvXF6uokmCCny%2FgL
> E%3D&reserved=0
> config: x86_64-randconfig-x012-201649 (attached as .config)
> compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
> reproduce:
>         # save the attached .config to linux build tree
>         make ARCH=x86_64
> 
> All errors (new ones prefixed by >>):
> 
>    In file included from drivers/hid/hid-hyperv.c:22:0:
> >> include/linux/hyperv.h:1660:25: error: field 'msi_info' has incomplete type
>      struct msi_domain_info msi_info;
>                             ^~~~~~~~
> 
> vim +/msi_info +1660 include/linux/hyperv.h
> 
>   1654		struct semaphore enum_sem;
>   1655		struct list_head resources_for_children;
>   1656
>   1657		struct list_head children;
>   1658		struct list_head dr_list;
>   1659
> > 1660		struct msi_domain_info msi_info;
>   1661		struct msi_controller msi_chip;
>   1662		struct irq_domain *irq_domain;
>   1663	};
> 


Greg,

I sent this patch set to show how we plan to use the API to check if a device is a 
VMBUS device. Given that multiple trees are involved, the patches were based on
the linux-next tree. I was planning on submitting these patches with minimal
inter-tree dependency. 

Regards,

K. Y
> ---
> 0-DAY kernel test infrastructure                Open Source Technology Center
> https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.01
> .org%2Fpipermail%2Fkbuild-
> all&data=02%7C01%7Ckys%40microsoft.com%7Ca000870068f645a6749208d4
> 1f4fa963%7C72f988bf86f141af91ab2d7cd011db47%7C1%7C0%7C63616787568
> 6353145&sdata=5T776prLOj7qzsQa%2B3Giit1UFvZJ6D%2FdL0I8k1qKfIg%3D&r
> eserved=0                   Intel Corporation

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers
  2016-12-08  8:33   ` [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers kys
@ 2016-12-08 15:56     ` Greg KH
  2016-12-09  0:05       ` KY Srinivasan
  0 siblings, 1 reply; 29+ messages in thread
From: Greg KH @ 2016-12-08 15:56 UTC (permalink / raw)
  To: kys
  Cc: linux-kernel, devel, olaf, apw, vkuznets, jasowang,
	leann.ogasawara, bjorn.helgaas, Haiyang Zhang

On Thu, Dec 08, 2016 at 12:33:43AM -0800, kys@exchange.microsoft.com wrote:
> From: Haiyang Zhang <haiyangz@microsoft.com>
> 
> We currently use MAC address to match VF and synthetic NICs. Hyper-V
> provides a serial number to both devices for this purpose. This patch
> implements the matching based on VF serial numbers. This is the way
> specified by the protocol and more reliable.
> 
> Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
> ---
>  drivers/net/hyperv/netvsc_drv.c |   55 ++++++++++++++++++++++++++++++++++++---
>  1 files changed, 51 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
> index 9522763..c5778cf 100644
> --- a/drivers/net/hyperv/netvsc_drv.c
> +++ b/drivers/net/hyperv/netvsc_drv.c
> @@ -1165,9 +1165,10 @@ static void netvsc_free_netdev(struct net_device *netdev)
>  	free_netdev(netdev);
>  }
>  
> -static struct net_device *get_netvsc_bymac(const u8 *mac)
> +static struct net_device *get_netvsc_byvfser(u32 vfser)
>  {
>  	struct net_device *dev;
> +	struct net_device_context *ndev_ctx;
>  
>  	ASSERT_RTNL();
>  
> @@ -1175,7 +1176,8 @@ static void netvsc_free_netdev(struct net_device *netdev)
>  		if (dev->netdev_ops != &device_ops)
>  			continue;	/* not a netvsc device */
>  
> -		if (ether_addr_equal(mac, dev->perm_addr))
> +		ndev_ctx = netdev_priv(dev);
> +		if (ndev_ctx->vf_serial == vfser)
>  			return dev;
>  	}
>  
> @@ -1205,21 +1207,66 @@ static void netvsc_free_netdev(struct net_device *netdev)
>  	return NULL;
>  }
>  
> +static u32 netvsc_get_vfser(struct net_device *vf_netdev)
> +{
> +	struct device *dev;
> +	struct hv_device *hdev;
> +	struct hv_pcibus_device *hbus = NULL;
> +	struct list_head *iter;
> +	struct hv_pci_dev *hpdev;
> +	unsigned long flags;
> +	u32 vfser = 0;
> +	u32 count = 0;
> +
> +	for (dev = &vf_netdev->dev; dev; dev = dev->parent) {

You are going to walk the whole device tree backwards?  That's crazy.
And foolish.  And racy and broken (what happens if the tree changes
while you do this?)  Where is the lock being grabbed while this happens?
What about reference counts?  Do you see other drivers ever doing this
(if you do, point them out and I'll go yell at them too...)

> +		if (!dev_is_vmbus(dev))
> +			continue;

Ick.

Why isn't your parent pointer a vmbus device all the time?  How could
you get burried down in the device hierarchy when you are the driver for
a specific bus type in the first place?  How could this function ever be
called for a device that is NOT of this type?

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/3] hyperv: Move hv_pci_dev and related structs to hyperv.h
  2016-12-08 15:26     ` KY Srinivasan
@ 2016-12-08 15:56       ` gregkh
  2016-12-08 16:54         ` KY Srinivasan
  0 siblings, 1 reply; 29+ messages in thread
From: gregkh @ 2016-12-08 15:56 UTC (permalink / raw)
  To: KY Srinivasan
  Cc: kbuild test robot, kys, olaf, jasowang, Haiyang Zhang,
	linux-kernel, bjorn.helgaas, kbuild-all, apw, devel,
	leann.ogasawara

On Thu, Dec 08, 2016 at 03:26:06PM +0000, KY Srinivasan wrote:
> 
> 
> > -----Original Message-----
> > From: devel [mailto:driverdev-devel-bounces@linuxdriverproject.org] On
> > Behalf Of kbuild test robot
> > Sent: Thursday, December 8, 2016 1:50 AM
> > To: kys@exchange.microsoft.com
> > Cc: olaf@aepfle.de; gregkh@linuxfoundation.org; jasowang@redhat.com;
> > Haiyang Zhang <haiyangz@microsoft.com>; linux-kernel@vger.kernel.org;
> > bjorn.helgaas@gmail.com; kbuild-all@01.org; apw@canonical.com;
> > devel@linuxdriverproject.org; leann.ogasawara@canonical.com
> > Subject: Re: [PATCH 1/3] hyperv: Move hv_pci_dev and related structs to
> > hyperv.h
> > 
> > Hi Haiyang,
> > 
> > [auto build test ERROR on next-20161208]
> > [also build test ERROR on v4.9-rc8]
> > [cannot apply to linus/master linux/master pci/next v4.9-rc8 v4.9-rc7 v4.9-
> > rc6]
> > [if your patch is applied to the wrong git tree, please drop us a note to help
> > improve the system]
> > 
> > url:
> > https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub
> > .com%2F0day-ci%2Flinux%2Fcommits%2Fkys-exchange-microsoft-
> > com%2Fhyperv-Move-hv_pci_dev-and-related-structs-to-hyperv-
> > h%2F20161208-
> > 171643&data=02%7C01%7Ckys%40microsoft.com%7Ca000870068f645a67492
> > 08d41f4fa963%7C72f988bf86f141af91ab2d7cd011db47%7C1%7C0%7C6361678
> > 75686353145&sdata=2cR2dMEa6sjz94NYUFMx5qC8hEVvXF6uokmCCny%2FgL
> > E%3D&reserved=0
> > config: x86_64-randconfig-x012-201649 (attached as .config)
> > compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
> > reproduce:
> >         # save the attached .config to linux build tree
> >         make ARCH=x86_64
> > 
> > All errors (new ones prefixed by >>):
> > 
> >    In file included from drivers/hid/hid-hyperv.c:22:0:
> > >> include/linux/hyperv.h:1660:25: error: field 'msi_info' has incomplete type
> >      struct msi_domain_info msi_info;
> >                             ^~~~~~~~
> > 
> > vim +/msi_info +1660 include/linux/hyperv.h
> > 
> >   1654		struct semaphore enum_sem;
> >   1655		struct list_head resources_for_children;
> >   1656
> >   1657		struct list_head children;
> >   1658		struct list_head dr_list;
> >   1659
> > > 1660		struct msi_domain_info msi_info;
> >   1661		struct msi_controller msi_chip;
> >   1662		struct irq_domain *irq_domain;
> >   1663	};
> > 
> 
> 
> Greg,
> 
> I sent this patch set to show how we plan to use the API to check if a device is a 
> VMBUS device. Given that multiple trees are involved, the patches were based on
> the linux-next tree. I was planning on submitting these patches with minimal
> inter-tree dependency. 

Ok, but how does this simple patch fail so badly?  Maybe just wait for
after 4.10-rc1 is out and then try this as everything will be synced up
then?  It's pretty much past the merge window for all subsystems now
anyway...

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 29+ messages in thread

* RE: [PATCH 1/3] hyperv: Move hv_pci_dev and related structs to hyperv.h
  2016-12-08 15:56       ` gregkh
@ 2016-12-08 16:54         ` KY Srinivasan
  0 siblings, 0 replies; 29+ messages in thread
From: KY Srinivasan @ 2016-12-08 16:54 UTC (permalink / raw)
  To: gregkh
  Cc: kbuild test robot, kys, olaf, jasowang, Haiyang Zhang,
	linux-kernel, bjorn.helgaas, kbuild-all, apw, devel,
	leann.ogasawara



> -----Original Message-----
> From: gregkh@linuxfoundation.org [mailto:gregkh@linuxfoundation.org]
> Sent: Thursday, December 8, 2016 7:57 AM
> To: KY Srinivasan <kys@microsoft.com>
> Cc: kbuild test robot <lkp@intel.com>; kys@exchange.microsoft.com;
> olaf@aepfle.de; jasowang@redhat.com; Haiyang Zhang
> <haiyangz@microsoft.com>; linux-kernel@vger.kernel.org;
> bjorn.helgaas@gmail.com; kbuild-all@01.org; apw@canonical.com;
> devel@linuxdriverproject.org; leann.ogasawara@canonical.com
> Subject: Re: [PATCH 1/3] hyperv: Move hv_pci_dev and related structs to
> hyperv.h
> 
> On Thu, Dec 08, 2016 at 03:26:06PM +0000, KY Srinivasan wrote:
> >
> >
> > > -----Original Message-----
> > > From: devel [mailto:driverdev-devel-bounces@linuxdriverproject.org]
> On
> > > Behalf Of kbuild test robot
> > > Sent: Thursday, December 8, 2016 1:50 AM
> > > To: kys@exchange.microsoft.com
> > > Cc: olaf@aepfle.de; gregkh@linuxfoundation.org;
> jasowang@redhat.com;
> > > Haiyang Zhang <haiyangz@microsoft.com>; linux-
> kernel@vger.kernel.org;
> > > bjorn.helgaas@gmail.com; kbuild-all@01.org; apw@canonical.com;
> > > devel@linuxdriverproject.org; leann.ogasawara@canonical.com
> > > Subject: Re: [PATCH 1/3] hyperv: Move hv_pci_dev and related structs to
> > > hyperv.h
> > >
> > > Hi Haiyang,
> > >
> > > [auto build test ERROR on next-20161208]
> > > [also build test ERROR on v4.9-rc8]
> > > [cannot apply to linus/master linux/master pci/next v4.9-rc8 v4.9-rc7
> v4.9-
> > > rc6]
> > > [if your patch is applied to the wrong git tree, please drop us a note to
> help
> > > improve the system]
> > >
> > > url:
> > >
> https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub
> > > .com%2F0day-ci%2Flinux%2Fcommits%2Fkys-exchange-microsoft-
> > > com%2Fhyperv-Move-hv_pci_dev-and-related-structs-to-hyperv-
> > > h%2F20161208-
> > >
> 171643&data=02%7C01%7Ckys%40microsoft.com%7Ca000870068f645a67492
> > >
> 08d41f4fa963%7C72f988bf86f141af91ab2d7cd011db47%7C1%7C0%7C6361678
> > >
> 75686353145&sdata=2cR2dMEa6sjz94NYUFMx5qC8hEVvXF6uokmCCny%2FgL
> > > E%3D&reserved=0
> > > config: x86_64-randconfig-x012-201649 (attached as .config)
> > > compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
> > > reproduce:
> > >         # save the attached .config to linux build tree
> > >         make ARCH=x86_64
> > >
> > > All errors (new ones prefixed by >>):
> > >
> > >    In file included from drivers/hid/hid-hyperv.c:22:0:
> > > >> include/linux/hyperv.h:1660:25: error: field 'msi_info' has incomplete
> type
> > >      struct msi_domain_info msi_info;
> > >                             ^~~~~~~~
> > >
> > > vim +/msi_info +1660 include/linux/hyperv.h
> > >
> > >   1654		struct semaphore enum_sem;
> > >   1655		struct list_head resources_for_children;
> > >   1656
> > >   1657		struct list_head children;
> > >   1658		struct list_head dr_list;
> > >   1659
> > > > 1660		struct msi_domain_info msi_info;
> > >   1661		struct msi_controller msi_chip;
> > >   1662		struct irq_domain *irq_domain;
> > >   1663	};
> > >
> >
> >
> > Greg,
> >
> > I sent this patch set to show how we plan to use the API to check if a device
> is a
> > VMBUS device. Given that multiple trees are involved, the patches were
> based on
> > the linux-next tree. I was planning on submitting these patches with
> minimal
> > inter-tree dependency.
> 
> Ok, but how does this simple patch fail so badly?  Maybe just wait for
> after 4.10-rc1 is out and then try this as everything will be synced up
> then?  It's pretty much past the merge window for all subsystems now
> anyway...

Will do.

Thanks,

K. Y

^ permalink raw reply	[flat|nested] 29+ messages in thread

* RE: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers
  2016-12-08 15:56     ` Greg KH
@ 2016-12-09  0:05       ` KY Srinivasan
  2016-12-09  7:31         ` Greg KH
  0 siblings, 1 reply; 29+ messages in thread
From: KY Srinivasan @ 2016-12-09  0:05 UTC (permalink / raw)
  To: Greg KH
  Cc: linux-kernel, devel, olaf, apw, vkuznets, jasowang,
	leann.ogasawara, bjorn.helgaas, Haiyang Zhang



> -----Original Message-----
> From: Greg KH [mailto:gregkh@linuxfoundation.org]
> Sent: Thursday, December 8, 2016 7:56 AM
> To: KY Srinivasan <kys@microsoft.com>
> Cc: linux-kernel@vger.kernel.org; devel@linuxdriverproject.org;
> olaf@aepfle.de; apw@canonical.com; vkuznets@redhat.com;
> jasowang@redhat.com; leann.ogasawara@canonical.com;
> bjorn.helgaas@gmail.com; Haiyang Zhang <haiyangz@microsoft.com>
> Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial
> numbers
> 
> On Thu, Dec 08, 2016 at 12:33:43AM -0800, kys@exchange.microsoft.com
> wrote:
> > From: Haiyang Zhang <haiyangz@microsoft.com>
> >
> > We currently use MAC address to match VF and synthetic NICs. Hyper-V
> > provides a serial number to both devices for this purpose. This patch
> > implements the matching based on VF serial numbers. This is the way
> > specified by the protocol and more reliable.
> >
> > Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> > Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
> > ---
> >  drivers/net/hyperv/netvsc_drv.c |   55
> ++++++++++++++++++++++++++++++++++++---
> >  1 files changed, 51 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/net/hyperv/netvsc_drv.c
> b/drivers/net/hyperv/netvsc_drv.c
> > index 9522763..c5778cf 100644
> > --- a/drivers/net/hyperv/netvsc_drv.c
> > +++ b/drivers/net/hyperv/netvsc_drv.c
> > @@ -1165,9 +1165,10 @@ static void netvsc_free_netdev(struct
> net_device *netdev)
> >  	free_netdev(netdev);
> >  }
> >
> > -static struct net_device *get_netvsc_bymac(const u8 *mac)
> > +static struct net_device *get_netvsc_byvfser(u32 vfser)
> >  {
> >  	struct net_device *dev;
> > +	struct net_device_context *ndev_ctx;
> >
> >  	ASSERT_RTNL();
> >
> > @@ -1175,7 +1176,8 @@ static void netvsc_free_netdev(struct net_device
> *netdev)
> >  		if (dev->netdev_ops != &device_ops)
> >  			continue;	/* not a netvsc device */
> >
> > -		if (ether_addr_equal(mac, dev->perm_addr))
> > +		ndev_ctx = netdev_priv(dev);
> > +		if (ndev_ctx->vf_serial == vfser)
> >  			return dev;
> >  	}
> >
> > @@ -1205,21 +1207,66 @@ static void netvsc_free_netdev(struct
> net_device *netdev)
> >  	return NULL;
> >  }
> >
> > +static u32 netvsc_get_vfser(struct net_device *vf_netdev)
> > +{
> > +	struct device *dev;
> > +	struct hv_device *hdev;
> > +	struct hv_pcibus_device *hbus = NULL;
> > +	struct list_head *iter;
> > +	struct hv_pci_dev *hpdev;
> > +	unsigned long flags;
> > +	u32 vfser = 0;
> > +	u32 count = 0;
> > +
> > +	for (dev = &vf_netdev->dev; dev; dev = dev->parent) {
> 
> You are going to walk the whole device tree backwards?  That's crazy.
> And foolish.  And racy and broken (what happens if the tree changes
> while you do this?)  Where is the lock being grabbed while this happens?
> What about reference counts?  Do you see other drivers ever doing this
> (if you do, point them out and I'll go yell at them too...)

Greg,

We are registering for netdev events. Coming into this function, the caller
guarantees that the list of netdevs does not change - we assert this on entry:
ASSERT_RTNL(). We are only walking up the device tree for the netdevs whose
state change is being notified to us - the device tree being walked here is limited to
netdevs under question. 

We have a reference to the device and we know the device is not going away. Is it not
safe to dereference the parent pointer - after all the child has taken a reference on
the parent as part of  device_add() call.
 
> 
> > +		if (!dev_is_vmbus(dev))
> > +			continue;
> 
> Ick.
> 
> Why isn't your parent pointer a vmbus device all the time?  How could
> you get burried down in the device hierarchy when you are the driver for
> a specific bus type in the first place?  How could this function ever be
> called for a device that is NOT of this type?

We get notified when state changes on any of the netdev devices in the system.
Not all netdevs in the system belong to vmbus. Consider for instance the 
emulated NIC that can be configured. This is an emulated PCI NIC. We are only
interested in netdevs that correspond to the VF instance that we are interested in.

Regards,

K. Y

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers
  2016-12-09  0:05       ` KY Srinivasan
@ 2016-12-09  7:31         ` Greg KH
  2016-12-09 18:20           ` Stephen Hemminger
  0 siblings, 1 reply; 29+ messages in thread
From: Greg KH @ 2016-12-09  7:31 UTC (permalink / raw)
  To: KY Srinivasan
  Cc: olaf, jasowang, Haiyang Zhang, linux-kernel, bjorn.helgaas, apw,
	devel, leann.ogasawara

On Fri, Dec 09, 2016 at 12:05:53AM +0000, KY Srinivasan wrote:
> 
> 
> > -----Original Message-----
> > From: Greg KH [mailto:gregkh@linuxfoundation.org]
> > Sent: Thursday, December 8, 2016 7:56 AM
> > To: KY Srinivasan <kys@microsoft.com>
> > Cc: linux-kernel@vger.kernel.org; devel@linuxdriverproject.org;
> > olaf@aepfle.de; apw@canonical.com; vkuznets@redhat.com;
> > jasowang@redhat.com; leann.ogasawara@canonical.com;
> > bjorn.helgaas@gmail.com; Haiyang Zhang <haiyangz@microsoft.com>
> > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial
> > numbers
> > 
> > On Thu, Dec 08, 2016 at 12:33:43AM -0800, kys@exchange.microsoft.com
> > wrote:
> > > From: Haiyang Zhang <haiyangz@microsoft.com>
> > >
> > > We currently use MAC address to match VF and synthetic NICs. Hyper-V
> > > provides a serial number to both devices for this purpose. This patch
> > > implements the matching based on VF serial numbers. This is the way
> > > specified by the protocol and more reliable.
> > >
> > > Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> > > Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
> > > ---
> > >  drivers/net/hyperv/netvsc_drv.c |   55
> > ++++++++++++++++++++++++++++++++++++---
> > >  1 files changed, 51 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/drivers/net/hyperv/netvsc_drv.c
> > b/drivers/net/hyperv/netvsc_drv.c
> > > index 9522763..c5778cf 100644
> > > --- a/drivers/net/hyperv/netvsc_drv.c
> > > +++ b/drivers/net/hyperv/netvsc_drv.c
> > > @@ -1165,9 +1165,10 @@ static void netvsc_free_netdev(struct
> > net_device *netdev)
> > >  	free_netdev(netdev);
> > >  }
> > >
> > > -static struct net_device *get_netvsc_bymac(const u8 *mac)
> > > +static struct net_device *get_netvsc_byvfser(u32 vfser)
> > >  {
> > >  	struct net_device *dev;
> > > +	struct net_device_context *ndev_ctx;
> > >
> > >  	ASSERT_RTNL();
> > >
> > > @@ -1175,7 +1176,8 @@ static void netvsc_free_netdev(struct net_device
> > *netdev)
> > >  		if (dev->netdev_ops != &device_ops)
> > >  			continue;	/* not a netvsc device */
> > >
> > > -		if (ether_addr_equal(mac, dev->perm_addr))
> > > +		ndev_ctx = netdev_priv(dev);
> > > +		if (ndev_ctx->vf_serial == vfser)
> > >  			return dev;
> > >  	}
> > >
> > > @@ -1205,21 +1207,66 @@ static void netvsc_free_netdev(struct
> > net_device *netdev)
> > >  	return NULL;
> > >  }
> > >
> > > +static u32 netvsc_get_vfser(struct net_device *vf_netdev)
> > > +{
> > > +	struct device *dev;
> > > +	struct hv_device *hdev;
> > > +	struct hv_pcibus_device *hbus = NULL;
> > > +	struct list_head *iter;
> > > +	struct hv_pci_dev *hpdev;
> > > +	unsigned long flags;
> > > +	u32 vfser = 0;
> > > +	u32 count = 0;
> > > +
> > > +	for (dev = &vf_netdev->dev; dev; dev = dev->parent) {
> > 
> > You are going to walk the whole device tree backwards?  That's crazy.
> > And foolish.  And racy and broken (what happens if the tree changes
> > while you do this?)  Where is the lock being grabbed while this happens?
> > What about reference counts?  Do you see other drivers ever doing this
> > (if you do, point them out and I'll go yell at them too...)
> 
> Greg,
> 
> We are registering for netdev events. Coming into this function, the caller
> guarantees that the list of netdevs does not change - we assert this on entry:
> ASSERT_RTNL(). We are only walking up the device tree for the netdevs whose
> state change is being notified to us - the device tree being walked here is limited to
> netdevs under question. 

But a netdev is a child of some type of "real" device, and you are now
walking the tree of all devices up to the "root" parent device, which
means you will hit PCI bridges, USB controllers, and all sorts of fun
things if you are a child of those types of devices.

And can't you tell if the netdev for this event, really is "your"
netdev?  Or are you getting called this for "all" netdevs?  Sorry, I
don't know this api, any pointers to it would be appreciated.

> We have a reference to the device and we know the device is not going away. Is it not
> safe to dereference the parent pointer - after all the child has taken a reference on
> the parent as part of  device_add() call.

It might be, and might not be.  There's a reason you don't see this
pattern anywhere in the kernel because of this...

> > > +		if (!dev_is_vmbus(dev))
> > > +			continue;
> > 
> > Ick.
> > 
> > Why isn't your parent pointer a vmbus device all the time?  How could
> > you get burried down in the device hierarchy when you are the driver for
> > a specific bus type in the first place?  How could this function ever be
> > called for a device that is NOT of this type?
> 
> We get notified when state changes on any of the netdev devices in the system.
> Not all netdevs in the system belong to vmbus. Consider for instance the 
> emulated NIC that can be configured. This is an emulated PCI NIC. We are only
> interested in netdevs that correspond to the VF instance that we are interested in.

Can you "know" this is your netdev by some other way than having to walk
the device tree?  Name?  local device type?  Something else?  This seems
like an odd api in that everyone would have to do gyrations like this in
order to determine if the netdev is "theirs" or not...

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers
  2016-12-09  7:31         ` Greg KH
@ 2016-12-09 18:20           ` Stephen Hemminger
  2016-12-09 20:09             ` Haiyang Zhang
  0 siblings, 1 reply; 29+ messages in thread
From: Stephen Hemminger @ 2016-12-09 18:20 UTC (permalink / raw)
  To: Greg KH
  Cc: KY Srinivasan, olaf, Haiyang Zhang, linux-kernel, bjorn.helgaas,
	apw, devel, leann.ogasawara, jasowang

On Fri, 9 Dec 2016 08:31:22 +0100
Greg KH <gregkh@linuxfoundation.org> wrote:

> On Fri, Dec 09, 2016 at 12:05:53AM +0000, KY Srinivasan wrote:
> > 
> >   
> > > -----Original Message-----
> > > From: Greg KH [mailto:gregkh@linuxfoundation.org]
> > > Sent: Thursday, December 8, 2016 7:56 AM
> > > To: KY Srinivasan <kys@microsoft.com>
> > > Cc: linux-kernel@vger.kernel.org; devel@linuxdriverproject.org;
> > > olaf@aepfle.de; apw@canonical.com; vkuznets@redhat.com;
> > > jasowang@redhat.com; leann.ogasawara@canonical.com;
> > > bjorn.helgaas@gmail.com; Haiyang Zhang <haiyangz@microsoft.com>
> > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial
> > > numbers
> > > 
> > > On Thu, Dec 08, 2016 at 12:33:43AM -0800, kys@exchange.microsoft.com
> > > wrote:  
> > > > From: Haiyang Zhang <haiyangz@microsoft.com>
> > > >
> > > > We currently use MAC address to match VF and synthetic NICs. Hyper-V
> > > > provides a serial number to both devices for this purpose. This patch
> > > > implements the matching based on VF serial numbers. This is the way
> > > > specified by the protocol and more reliable.
> > > >
> > > > Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> > > > Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
> > > > ---
> > > >  drivers/net/hyperv/netvsc_drv.c |   55  
> > > ++++++++++++++++++++++++++++++++++++---  
> > > >  1 files changed, 51 insertions(+), 4 deletions(-)
> > > >
> > > > diff --git a/drivers/net/hyperv/netvsc_drv.c  
> > > b/drivers/net/hyperv/netvsc_drv.c  
> > > > index 9522763..c5778cf 100644
> > > > --- a/drivers/net/hyperv/netvsc_drv.c
> > > > +++ b/drivers/net/hyperv/netvsc_drv.c
> > > > @@ -1165,9 +1165,10 @@ static void netvsc_free_netdev(struct  
> > > net_device *netdev)  
> > > >  	free_netdev(netdev);
> > > >  }
> > > >
> > > > -static struct net_device *get_netvsc_bymac(const u8 *mac)
> > > > +static struct net_device *get_netvsc_byvfser(u32 vfser)
> > > >  {
> > > >  	struct net_device *dev;
> > > > +	struct net_device_context *ndev_ctx;
> > > >
> > > >  	ASSERT_RTNL();
> > > >
> > > > @@ -1175,7 +1176,8 @@ static void netvsc_free_netdev(struct net_device  
> > > *netdev)  
> > > >  		if (dev->netdev_ops != &device_ops)
> > > >  			continue;	/* not a netvsc device */
> > > >
> > > > -		if (ether_addr_equal(mac, dev->perm_addr))
> > > > +		ndev_ctx = netdev_priv(dev);
> > > > +		if (ndev_ctx->vf_serial == vfser)
> > > >  			return dev;
> > > >  	}
> > > >
> > > > @@ -1205,21 +1207,66 @@ static void netvsc_free_netdev(struct  
> > > net_device *netdev)  
> > > >  	return NULL;
> > > >  }
> > > >
> > > > +static u32 netvsc_get_vfser(struct net_device *vf_netdev)
> > > > +{
> > > > +	struct device *dev;
> > > > +	struct hv_device *hdev;
> > > > +	struct hv_pcibus_device *hbus = NULL;
> > > > +	struct list_head *iter;
> > > > +	struct hv_pci_dev *hpdev;
> > > > +	unsigned long flags;
> > > > +	u32 vfser = 0;
> > > > +	u32 count = 0;
> > > > +
> > > > +	for (dev = &vf_netdev->dev; dev; dev = dev->parent) {  
> > > 
> > > You are going to walk the whole device tree backwards?  That's crazy.
> > > And foolish.  And racy and broken (what happens if the tree changes
> > > while you do this?)  Where is the lock being grabbed while this happens?
> > > What about reference counts?  Do you see other drivers ever doing this
> > > (if you do, point them out and I'll go yell at them too...)  
> > 
> > Greg,
> > 
> > We are registering for netdev events. Coming into this function, the caller
> > guarantees that the list of netdevs does not change - we assert this on entry:
> > ASSERT_RTNL(). We are only walking up the device tree for the netdevs whose
> > state change is being notified to us - the device tree being walked here is limited to
> > netdevs under question.   
> 
> But a netdev is a child of some type of "real" device, and you are now
> walking the tree of all devices up to the "root" parent device, which
> means you will hit PCI bridges, USB controllers, and all sorts of fun
> things if you are a child of those types of devices.
> 
> And can't you tell if the netdev for this event, really is "your"
> netdev?  Or are you getting called this for "all" netdevs?  Sorry, I
> don't know this api, any pointers to it would be appreciated.
> 
> > We have a reference to the device and we know the device is not going away. Is it not
> > safe to dereference the parent pointer - after all the child has taken a reference on
> > the parent as part of  device_add() call.  
> 
> It might be, and might not be.  There's a reason you don't see this
> pattern anywhere in the kernel because of this...
> 
> > > > +		if (!dev_is_vmbus(dev))
> > > > +			continue;  
> > > 
> > > Ick.
> > > 
> > > Why isn't your parent pointer a vmbus device all the time?  How could
> > > you get burried down in the device hierarchy when you are the driver for
> > > a specific bus type in the first place?  How could this function ever be
> > > called for a device that is NOT of this type?  
> > 
> > We get notified when state changes on any of the netdev devices in the system.
> > Not all netdevs in the system belong to vmbus. Consider for instance the 
> > emulated NIC that can be configured. This is an emulated PCI NIC. We are only
> > interested in netdevs that correspond to the VF instance that we are interested in.  
> 
> Can you "know" this is your netdev by some other way than having to walk
> the device tree?  Name?  local device type?  Something else?  This seems
> like an odd api in that everyone would have to do gyrations like this in
> order to determine if the netdev is "theirs" or not...

The scenario is SR-IOV on Hyper-V. In the case of VF device, the host hands the
guest OS a PCI device for the virtual function device. The VF device is placed
on a special synthetic PCI bus (ie not part of the other buses on the system).
The VF device also has a synthetic network interface (netvsc) which lives
on VMBUS.  This code is about managing the interaction between the two.

The association between VF and synthetic NIC is done in response to the
VF network device being registered. Initial version was based on MAC address
which is the same.  Later refinement used permanent MAC address to
avoid bugs if MAC address changed.  This version is to use serial number
instead which is safer than MAC address.

The code to walk up/down maybe not be needed to find serial number.
Perhaps a more direct single set of conditions is possible?

Something like:

In pci-hyperv.c

u32 hv_pcifront_get_serial(struct pci_bus *bus, unsigned int devfn)
{
	struct hv_pcibus_device *hbus
		= container_of(bus->sysdata,
			       struct hv_pcibus_device, sysdata);
	struct hf_pci_dev *hpdev;
	u32 serial;
	
	hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
	if (!hpdev)
		return 0;

	serial = hpdev->devs.ser;
	put_pcichild(hpdev, hv_pcidev_ref_by_slot);
	return serial;
}

In netvsc_drv.c

static u32 netvsc_get_vfser(struct net_device *vf_netdev)
{
	struct device *dev = vf_netdev->dev.parent;
	struct pci_dev *pdev;
	u32 wslot;

	if (!dev || !dev_is_pci(dev))
		return 0;

	pdev = container_of(dev, struct pci_device, dev);

	return hv_pcifront_get_serial(pdev->bus, pdev->devfn);
}





P.S: it would be good to be able to get win_slot out through sysfs as
well for systemd/udev.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* RE: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers
  2016-12-09 18:20           ` Stephen Hemminger
@ 2016-12-09 20:09             ` Haiyang Zhang
  2016-12-09 20:29               ` Stephen Hemminger
  0 siblings, 1 reply; 29+ messages in thread
From: Haiyang Zhang @ 2016-12-09 20:09 UTC (permalink / raw)
  To: Stephen Hemminger, Greg KH
  Cc: KY Srinivasan, olaf, linux-kernel, bjorn.helgaas, apw, devel,
	leann.ogasawara, jasowang



> -----Original Message-----
> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Friday, December 9, 2016 1:21 PM
> To: Greg KH <gregkh@linuxfoundation.org>
> Cc: KY Srinivasan <kys@microsoft.com>; olaf@aepfle.de; Haiyang Zhang
> <haiyangz@microsoft.com>; linux-kernel@vger.kernel.org;
> bjorn.helgaas@gmail.com; apw@canonical.com; devel@linuxdriverproject.org;
> leann.ogasawara@canonical.com; jasowang@redhat.com
> Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> serial numbers
> 
> On Fri, 9 Dec 2016 08:31:22 +0100
> Greg KH <gregkh@linuxfoundation.org> wrote:
> 
> > On Fri, Dec 09, 2016 at 12:05:53AM +0000, KY Srinivasan wrote:
> > >
> > >
> > > > -----Original Message-----
> > > > From: Greg KH [mailto:gregkh@linuxfoundation.org]
> > > > Sent: Thursday, December 8, 2016 7:56 AM
> > > > To: KY Srinivasan <kys@microsoft.com>
> > > > Cc: linux-kernel@vger.kernel.org; devel@linuxdriverproject.org;
> > > > olaf@aepfle.de; apw@canonical.com; vkuznets@redhat.com;
> > > > jasowang@redhat.com; leann.ogasawara@canonical.com;
> > > > bjorn.helgaas@gmail.com; Haiyang Zhang <haiyangz@microsoft.com>
> > > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> serial
> > > > numbers
> > > >
> > > > On Thu, Dec 08, 2016 at 12:33:43AM -0800,
> kys@exchange.microsoft.com
> > > > wrote:
> > > > > From: Haiyang Zhang <haiyangz@microsoft.com>
> > > > >
> > > > > We currently use MAC address to match VF and synthetic NICs.
> Hyper-V
> > > > > provides a serial number to both devices for this purpose. This
> patch
> > > > > implements the matching based on VF serial numbers. This is the
> way
> > > > > specified by the protocol and more reliable.
> > > > >
> > > > > Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> > > > > Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
> > > > > ---
> > > > >  drivers/net/hyperv/netvsc_drv.c |   55
> > > > ++++++++++++++++++++++++++++++++++++---
> > > > >  1 files changed, 51 insertions(+), 4 deletions(-)
> > > > >
> > > > > diff --git a/drivers/net/hyperv/netvsc_drv.c
> > > > b/drivers/net/hyperv/netvsc_drv.c
> > > > > index 9522763..c5778cf 100644
> > > > > --- a/drivers/net/hyperv/netvsc_drv.c
> > > > > +++ b/drivers/net/hyperv/netvsc_drv.c
> > > > > @@ -1165,9 +1165,10 @@ static void netvsc_free_netdev(struct
> > > > net_device *netdev)
> > > > >  	free_netdev(netdev);
> > > > >  }
> > > > >
> > > > > -static struct net_device *get_netvsc_bymac(const u8 *mac)
> > > > > +static struct net_device *get_netvsc_byvfser(u32 vfser)
> > > > >  {
> > > > >  	struct net_device *dev;
> > > > > +	struct net_device_context *ndev_ctx;
> > > > >
> > > > >  	ASSERT_RTNL();
> > > > >
> > > > > @@ -1175,7 +1176,8 @@ static void netvsc_free_netdev(struct
> net_device
> > > > *netdev)
> > > > >  		if (dev->netdev_ops != &device_ops)
> > > > >  			continue;	/* not a netvsc device */
> > > > >
> > > > > -		if (ether_addr_equal(mac, dev->perm_addr))
> > > > > +		ndev_ctx = netdev_priv(dev);
> > > > > +		if (ndev_ctx->vf_serial == vfser)
> > > > >  			return dev;
> > > > >  	}
> > > > >
> > > > > @@ -1205,21 +1207,66 @@ static void netvsc_free_netdev(struct
> > > > net_device *netdev)
> > > > >  	return NULL;
> > > > >  }
> > > > >
> > > > > +static u32 netvsc_get_vfser(struct net_device *vf_netdev)
> > > > > +{
> > > > > +	struct device *dev;
> > > > > +	struct hv_device *hdev;
> > > > > +	struct hv_pcibus_device *hbus = NULL;
> > > > > +	struct list_head *iter;
> > > > > +	struct hv_pci_dev *hpdev;
> > > > > +	unsigned long flags;
> > > > > +	u32 vfser = 0;
> > > > > +	u32 count = 0;
> > > > > +
> > > > > +	for (dev = &vf_netdev->dev; dev; dev = dev->parent) {
> > > >
> > > > You are going to walk the whole device tree backwards?  That's
> crazy.
> > > > And foolish.  And racy and broken (what happens if the tree
> changes
> > > > while you do this?)  Where is the lock being grabbed while this
> happens?
> > > > What about reference counts?  Do you see other drivers ever doing
> this
> > > > (if you do, point them out and I'll go yell at them too...)
> > >
> > > Greg,
> > >
> > > We are registering for netdev events. Coming into this function, the
> caller
> > > guarantees that the list of netdevs does not change - we assert this
> on entry:
> > > ASSERT_RTNL(). We are only walking up the device tree for the
> netdevs whose
> > > state change is being notified to us - the device tree being walked
> here is limited to
> > > netdevs under question.
> >
> > But a netdev is a child of some type of "real" device, and you are now
> > walking the tree of all devices up to the "root" parent device, which
> > means you will hit PCI bridges, USB controllers, and all sorts of fun
> > things if you are a child of those types of devices.
> >
> > And can't you tell if the netdev for this event, really is "your"
> > netdev?  Or are you getting called this for "all" netdevs?  Sorry, I
> > don't know this api, any pointers to it would be appreciated.
> >
> > > We have a reference to the device and we know the device is not
> going away. Is it not
> > > safe to dereference the parent pointer - after all the child has
> taken a reference on
> > > the parent as part of  device_add() call.
> >
> > It might be, and might not be.  There's a reason you don't see this
> > pattern anywhere in the kernel because of this...
> >
> > > > > +		if (!dev_is_vmbus(dev))
> > > > > +			continue;
> > > >
> > > > Ick.
> > > >
> > > > Why isn't your parent pointer a vmbus device all the time?  How
> could
> > > > you get burried down in the device hierarchy when you are the
> driver for
> > > > a specific bus type in the first place?  How could this function
> ever be
> > > > called for a device that is NOT of this type?
> > >
> > > We get notified when state changes on any of the netdev devices in
> the system.
> > > Not all netdevs in the system belong to vmbus. Consider for instance
> the
> > > emulated NIC that can be configured. This is an emulated PCI NIC. We
> are only
> > > interested in netdevs that correspond to the VF instance that we are
> interested in.
> >
> > Can you "know" this is your netdev by some other way than having to
> walk
> > the device tree?  Name?  local device type?  Something else?  This
> seems
> > like an odd api in that everyone would have to do gyrations like this
> in
> > order to determine if the netdev is "theirs" or not...
> 
> The scenario is SR-IOV on Hyper-V. In the case of VF device, the host
> hands the
> guest OS a PCI device for the virtual function device. The VF device is
> placed
> on a special synthetic PCI bus (ie not part of the other buses on the
> system).
> The VF device also has a synthetic network interface (netvsc) which
> lives
> on VMBUS.  This code is about managing the interaction between the two.
> 
> The association between VF and synthetic NIC is done in response to the
> VF network device being registered. Initial version was based on MAC
> address
> which is the same.  Later refinement used permanent MAC address to
> avoid bugs if MAC address changed.  This version is to use serial number
> instead which is safer than MAC address.
> 
> The code to walk up/down maybe not be needed to find serial number.
> Perhaps a more direct single set of conditions is possible?
> 
> Something like:
> 
> In pci-hyperv.c
> 
> u32 hv_pcifront_get_serial(struct pci_bus *bus, unsigned int devfn)
> {
> 	struct hv_pcibus_device *hbus
> 		= container_of(bus->sysdata,
> 			       struct hv_pcibus_device, sysdata);
> 	struct hf_pci_dev *hpdev;
> 	u32 serial;
> 
> 	hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
> 	if (!hpdev)
> 		return 0;
> 
> 	serial = hpdev->devs.ser;
> 	put_pcichild(hpdev, hv_pcidev_ref_by_slot);
> 	return serial;
> }
> 
> In netvsc_drv.c
> 
> static u32 netvsc_get_vfser(struct net_device *vf_netdev)
> {
> 	struct device *dev = vf_netdev->dev.parent;
> 	struct pci_dev *pdev;
> 	u32 wslot;
> 
> 	if (!dev || !dev_is_pci(dev))
> 		return 0;
> 
> 	pdev = container_of(dev, struct pci_device, dev);
> 
> 	return hv_pcifront_get_serial(pdev->bus, pdev->devfn);
> }
> 
> 
> 
> 
> 
> P.S: it would be good to be able to get win_slot out through sysfs as
> well for systemd/udev.

Stephen,

Thanks for suggestion. Actually, in my earlier implementation of this 
feature (VF serial based matching), I thought about export a function
from vPCI driver, then calling it from netvsc. So I don't need to 
move structs between headers... But, it creates a dependency of netvsc
on vPCI driver's symbol. So, even if on a VM without SRIOV, we have to
load vPCI driver, which we don't want.

Also, hv_vpci device is 3 parent layers above the vf_netdevice:
Here is the VF drv hierarchy --
Should we assume it's always 3 parents above vf_netdevice, or search for it?

[  368.185259] HZINFO:NETDEV_REGISTER:
[  368.185261] HZINFO: dev:ffff88007c10d518, bus:          (null), busName:(null), drvName:(null)
[  368.185262] HZINFO: dev:ffff88007c10c0a0, bus:ffffffff81ce4b60, busName:pci, drvName:ixgbevf
[  368.185263] HZINFO: dev:ffff8800355c0000, bus:          (null), busName:(null), drvName:(null)
[  368.185264] HZINFO: dev:ffff8800355c5428, bus:ffffffffa0008160, busName:vmbus, drvName:hv_pci
[  368.185264] HZINFO: dev:ffff88007c49e268, bus:ffffffff81ce9800, busName:acpi, drvName:vmbus
[  368.185265] HZINFO: dev:ffff88007c48ea68, bus:ffffffff81ce9800, busName:acpi, drvName:(null)
[  368.185266] HZINFO: dev:ffff88007c48aa68, bus:ffffffff81ce9800, busName:acpi, drvName:(null)
[  368.185266] HZINFO: dev:ffff88007c48a268, bus:ffffffff81ce9800, busName:acpi, drvName:(null)
[  368.185267] HZINFO: dev:ffff88007c489a68, bus:ffffffff81ce9800, busName:acpi, drvName:(null)

Thanks,
- Haiyang

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers
  2016-12-09 20:09             ` Haiyang Zhang
@ 2016-12-09 20:29               ` Stephen Hemminger
  2016-12-09 21:31                 ` Haiyang Zhang
  0 siblings, 1 reply; 29+ messages in thread
From: Stephen Hemminger @ 2016-12-09 20:29 UTC (permalink / raw)
  To: Haiyang Zhang
  Cc: Greg KH, KY Srinivasan, olaf, linux-kernel, bjorn.helgaas, apw,
	devel, leann.ogasawara, jasowang

On Fri, 9 Dec 2016 20:09:49 +0000
Haiyang Zhang <haiyangz@microsoft.com> wrote:

> > -----Original Message-----
> > From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> > Sent: Friday, December 9, 2016 1:21 PM
> > To: Greg KH <gregkh@linuxfoundation.org>
> > Cc: KY Srinivasan <kys@microsoft.com>; olaf@aepfle.de; Haiyang Zhang
> > <haiyangz@microsoft.com>; linux-kernel@vger.kernel.org;
> > bjorn.helgaas@gmail.com; apw@canonical.com; devel@linuxdriverproject.org;
> > leann.ogasawara@canonical.com; jasowang@redhat.com
> > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> > serial numbers
> > 
> > On Fri, 9 Dec 2016 08:31:22 +0100
> > Greg KH <gregkh@linuxfoundation.org> wrote:
> >   
> > > On Fri, Dec 09, 2016 at 12:05:53AM +0000, KY Srinivasan wrote:  
> > > >
> > > >  
> > > > > -----Original Message-----
> > > > > From: Greg KH [mailto:gregkh@linuxfoundation.org]
> > > > > Sent: Thursday, December 8, 2016 7:56 AM
> > > > > To: KY Srinivasan <kys@microsoft.com>
> > > > > Cc: linux-kernel@vger.kernel.org; devel@linuxdriverproject.org;
> > > > > olaf@aepfle.de; apw@canonical.com; vkuznets@redhat.com;
> > > > > jasowang@redhat.com; leann.ogasawara@canonical.com;
> > > > > bjorn.helgaas@gmail.com; Haiyang Zhang <haiyangz@microsoft.com>
> > > > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on  
> > serial  
> > > > > numbers
> > > > >
> > > > > On Thu, Dec 08, 2016 at 12:33:43AM -0800,  
> > kys@exchange.microsoft.com  
> > > > > wrote:  
> > > > > > From: Haiyang Zhang <haiyangz@microsoft.com>
> > > > > >
> > > > > > We currently use MAC address to match VF and synthetic NICs.  
> > Hyper-V  
> > > > > > provides a serial number to both devices for this purpose. This  
> > patch  
> > > > > > implements the matching based on VF serial numbers. This is the  
> > way  
> > > > > > specified by the protocol and more reliable.
> > > > > >
> > > > > > Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> > > > > > Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
> > > > > > ---
> > > > > >  drivers/net/hyperv/netvsc_drv.c |   55  
> > > > > ++++++++++++++++++++++++++++++++++++---  
> > > > > >  1 files changed, 51 insertions(+), 4 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/net/hyperv/netvsc_drv.c  
> > > > > b/drivers/net/hyperv/netvsc_drv.c  
> > > > > > index 9522763..c5778cf 100644
> > > > > > --- a/drivers/net/hyperv/netvsc_drv.c
> > > > > > +++ b/drivers/net/hyperv/netvsc_drv.c
> > > > > > @@ -1165,9 +1165,10 @@ static void netvsc_free_netdev(struct  
> > > > > net_device *netdev)  
> > > > > >  	free_netdev(netdev);
> > > > > >  }
> > > > > >
> > > > > > -static struct net_device *get_netvsc_bymac(const u8 *mac)
> > > > > > +static struct net_device *get_netvsc_byvfser(u32 vfser)
> > > > > >  {
> > > > > >  	struct net_device *dev;
> > > > > > +	struct net_device_context *ndev_ctx;
> > > > > >
> > > > > >  	ASSERT_RTNL();
> > > > > >
> > > > > > @@ -1175,7 +1176,8 @@ static void netvsc_free_netdev(struct  
> > net_device  
> > > > > *netdev)  
> > > > > >  		if (dev->netdev_ops != &device_ops)
> > > > > >  			continue;	/* not a netvsc device */
> > > > > >
> > > > > > -		if (ether_addr_equal(mac, dev->perm_addr))
> > > > > > +		ndev_ctx = netdev_priv(dev);
> > > > > > +		if (ndev_ctx->vf_serial == vfser)
> > > > > >  			return dev;
> > > > > >  	}
> > > > > >
> > > > > > @@ -1205,21 +1207,66 @@ static void netvsc_free_netdev(struct  
> > > > > net_device *netdev)  
> > > > > >  	return NULL;
> > > > > >  }
> > > > > >
> > > > > > +static u32 netvsc_get_vfser(struct net_device *vf_netdev)
> > > > > > +{
> > > > > > +	struct device *dev;
> > > > > > +	struct hv_device *hdev;
> > > > > > +	struct hv_pcibus_device *hbus = NULL;
> > > > > > +	struct list_head *iter;
> > > > > > +	struct hv_pci_dev *hpdev;
> > > > > > +	unsigned long flags;
> > > > > > +	u32 vfser = 0;
> > > > > > +	u32 count = 0;
> > > > > > +
> > > > > > +	for (dev = &vf_netdev->dev; dev; dev = dev->parent) {  
> > > > >
> > > > > You are going to walk the whole device tree backwards?  That's  
> > crazy.  
> > > > > And foolish.  And racy and broken (what happens if the tree  
> > changes  
> > > > > while you do this?)  Where is the lock being grabbed while this  
> > happens?  
> > > > > What about reference counts?  Do you see other drivers ever doing  
> > this  
> > > > > (if you do, point them out and I'll go yell at them too...)  
> > > >
> > > > Greg,
> > > >
> > > > We are registering for netdev events. Coming into this function, the  
> > caller  
> > > > guarantees that the list of netdevs does not change - we assert this  
> > on entry:  
> > > > ASSERT_RTNL(). We are only walking up the device tree for the  
> > netdevs whose  
> > > > state change is being notified to us - the device tree being walked  
> > here is limited to  
> > > > netdevs under question.  
> > >
> > > But a netdev is a child of some type of "real" device, and you are now
> > > walking the tree of all devices up to the "root" parent device, which
> > > means you will hit PCI bridges, USB controllers, and all sorts of fun
> > > things if you are a child of those types of devices.
> > >
> > > And can't you tell if the netdev for this event, really is "your"
> > > netdev?  Or are you getting called this for "all" netdevs?  Sorry, I
> > > don't know this api, any pointers to it would be appreciated.
> > >  
> > > > We have a reference to the device and we know the device is not  
> > going away. Is it not  
> > > > safe to dereference the parent pointer - after all the child has  
> > taken a reference on  
> > > > the parent as part of  device_add() call.  
> > >
> > > It might be, and might not be.  There's a reason you don't see this
> > > pattern anywhere in the kernel because of this...
> > >  
> > > > > > +		if (!dev_is_vmbus(dev))
> > > > > > +			continue;  
> > > > >
> > > > > Ick.
> > > > >
> > > > > Why isn't your parent pointer a vmbus device all the time?  How  
> > could  
> > > > > you get burried down in the device hierarchy when you are the  
> > driver for  
> > > > > a specific bus type in the first place?  How could this function  
> > ever be  
> > > > > called for a device that is NOT of this type?  
> > > >
> > > > We get notified when state changes on any of the netdev devices in  
> > the system.  
> > > > Not all netdevs in the system belong to vmbus. Consider for instance  
> > the  
> > > > emulated NIC that can be configured. This is an emulated PCI NIC. We  
> > are only  
> > > > interested in netdevs that correspond to the VF instance that we are  
> > interested in.  
> > >
> > > Can you "know" this is your netdev by some other way than having to  
> > walk  
> > > the device tree?  Name?  local device type?  Something else?  This  
> > seems  
> > > like an odd api in that everyone would have to do gyrations like this  
> > in  
> > > order to determine if the netdev is "theirs" or not...  
> > 
> > The scenario is SR-IOV on Hyper-V. In the case of VF device, the host
> > hands the
> > guest OS a PCI device for the virtual function device. The VF device is
> > placed
> > on a special synthetic PCI bus (ie not part of the other buses on the
> > system).
> > The VF device also has a synthetic network interface (netvsc) which
> > lives
> > on VMBUS.  This code is about managing the interaction between the two.
> > 
> > The association between VF and synthetic NIC is done in response to the
> > VF network device being registered. Initial version was based on MAC
> > address
> > which is the same.  Later refinement used permanent MAC address to
> > avoid bugs if MAC address changed.  This version is to use serial number
> > instead which is safer than MAC address.
> > 
> > The code to walk up/down maybe not be needed to find serial number.
> > Perhaps a more direct single set of conditions is possible?
> > 
> > Something like:
> > 
> > In pci-hyperv.c
> > 
> > u32 hv_pcifront_get_serial(struct pci_bus *bus, unsigned int devfn)
> > {
> > 	struct hv_pcibus_device *hbus
> > 		= container_of(bus->sysdata,
> > 			       struct hv_pcibus_device, sysdata);
> > 	struct hf_pci_dev *hpdev;
> > 	u32 serial;
> > 
> > 	hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
> > 	if (!hpdev)
> > 		return 0;
> > 
> > 	serial = hpdev->devs.ser;
> > 	put_pcichild(hpdev, hv_pcidev_ref_by_slot);
> > 	return serial;
> > }
> > 
> > In netvsc_drv.c
> > 
> > static u32 netvsc_get_vfser(struct net_device *vf_netdev)
> > {
> > 	struct device *dev = vf_netdev->dev.parent;
> > 	struct pci_dev *pdev;
> > 	u32 wslot;
> > 
> > 	if (!dev || !dev_is_pci(dev))
> > 		return 0;
> > 
> > 	pdev = container_of(dev, struct pci_device, dev);
> > 
> > 	return hv_pcifront_get_serial(pdev->bus, pdev->devfn);
> > }
> > 
> > 
> > 
> > 
> > 
> > P.S: it would be good to be able to get win_slot out through sysfs as
> > well for systemd/udev.  
> 
> Stephen,
> 
> Thanks for suggestion. Actually, in my earlier implementation of this 
> feature (VF serial based matching), I thought about export a function
> from vPCI driver, then calling it from netvsc. So I don't need to 
> move structs between headers... But, it creates a dependency of netvsc
> on vPCI driver's symbol. So, even if on a VM without SRIOV, we have to
> load vPCI driver, which we don't want.
> 
> Also, hv_vpci device is 3 parent layers above the vf_netdevice:
> Here is the VF drv hierarchy --
> Should we assume it's always 3 parents above vf_netdevice, or search for it?
> 
> [  368.185259] HZINFO:NETDEV_REGISTER:
> [  368.185261] HZINFO: dev:ffff88007c10d518, bus:          (null), busName:(null), drvName:(null)
> [  368.185262] HZINFO: dev:ffff88007c10c0a0, bus:ffffffff81ce4b60, busName:pci, drvName:ixgbevf
> [  368.185263] HZINFO: dev:ffff8800355c0000, bus:          (null), busName:(null), drvName:(null)
> [  368.185264] HZINFO: dev:ffff8800355c5428, bus:ffffffffa0008160, busName:vmbus, drvName:hv_pci
> [  368.185264] HZINFO: dev:ffff88007c49e268, bus:ffffffff81ce9800, busName:acpi, drvName:vmbus
> [  368.185265] HZINFO: dev:ffff88007c48ea68, bus:ffffffff81ce9800, busName:acpi, drvName:(null)
> [  368.185266] HZINFO: dev:ffff88007c48aa68, bus:ffffffff81ce9800, busName:acpi, drvName:(null)
> [  368.185266] HZINFO: dev:ffff88007c48a268, bus:ffffffff81ce9800, busName:acpi, drvName:(null)
> [  368.185267] HZINFO: dev:ffff88007c489a68, bus:ffffffff81ce9800, busName:acpi, drvName:(null)
> 
> Thanks,
> - Haiyang

Since this is a synthetic bus, the topology should not change unless host side
software changes. The vf_netdev device has to be PCI device, so that is going to
be certain.  After that there maybe intermediate up to hv_pci. The code in hyperv-pci
already has similar stuff (ie for read_config).

^ permalink raw reply	[flat|nested] 29+ messages in thread

* RE: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers
  2016-12-09 20:29               ` Stephen Hemminger
@ 2016-12-09 21:31                 ` Haiyang Zhang
  2016-12-09 21:45                   ` Stephen Hemminger
  0 siblings, 1 reply; 29+ messages in thread
From: Haiyang Zhang @ 2016-12-09 21:31 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Greg KH, KY Srinivasan, olaf, linux-kernel, bjorn.helgaas, apw,
	devel, leann.ogasawara, jasowang



> -----Original Message-----
> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Friday, December 9, 2016 3:30 PM
> To: Haiyang Zhang <haiyangz@microsoft.com>
> Cc: Greg KH <gregkh@linuxfoundation.org>; KY Srinivasan
> <kys@microsoft.com>; olaf@aepfle.de; linux-kernel@vger.kernel.org;
> bjorn.helgaas@gmail.com; apw@canonical.com; devel@linuxdriverproject.org;
> leann.ogasawara@canonical.com; jasowang@redhat.com
> Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> serial numbers
> 
> On Fri, 9 Dec 2016 20:09:49 +0000
> Haiyang Zhang <haiyangz@microsoft.com> wrote:
> 
> > > -----Original Message-----
> > > From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> > > Sent: Friday, December 9, 2016 1:21 PM
> > > To: Greg KH <gregkh@linuxfoundation.org>
> > > Cc: KY Srinivasan <kys@microsoft.com>; olaf@aepfle.de; Haiyang Zhang
> > > <haiyangz@microsoft.com>; linux-kernel@vger.kernel.org;
> > > bjorn.helgaas@gmail.com; apw@canonical.com;
> devel@linuxdriverproject.org;
> > > leann.ogasawara@canonical.com; jasowang@redhat.com
> > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> > > serial numbers
> > >
> > > On Fri, 9 Dec 2016 08:31:22 +0100
> > > Greg KH <gregkh@linuxfoundation.org> wrote:
> > >
> > > > On Fri, Dec 09, 2016 at 12:05:53AM +0000, KY Srinivasan wrote:
> > > > >
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Greg KH [mailto:gregkh@linuxfoundation.org]
> > > > > > Sent: Thursday, December 8, 2016 7:56 AM
> > > > > > To: KY Srinivasan <kys@microsoft.com>
> > > > > > Cc: linux-kernel@vger.kernel.org; devel@linuxdriverproject.org;
> > > > > > olaf@aepfle.de; apw@canonical.com; vkuznets@redhat.com;
> > > > > > jasowang@redhat.com; leann.ogasawara@canonical.com;
> > > > > > bjorn.helgaas@gmail.com; Haiyang Zhang <haiyangz@microsoft.com>
> > > > > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching
> based on
> > > serial
> > > > > > numbers
> > > > > >
> > > > > > On Thu, Dec 08, 2016 at 12:33:43AM -0800,
> > > kys@exchange.microsoft.com
> > > > > > wrote:
> > > > > > > From: Haiyang Zhang <haiyangz@microsoft.com>
> > > > > > >
> > > > > > > We currently use MAC address to match VF and synthetic NICs.
> > > Hyper-V
> > > > > > > provides a serial number to both devices for this purpose.
> This
> > > patch
> > > > > > > implements the matching based on VF serial numbers. This is
> the
> > > way
> > > > > > > specified by the protocol and more reliable.
> > > > > > >
> > > > > > > Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> > > > > > > Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
> > > > > > > ---
> > > > > > >  drivers/net/hyperv/netvsc_drv.c |   55
> > > > > > ++++++++++++++++++++++++++++++++++++---
> > > > > > >  1 files changed, 51 insertions(+), 4 deletions(-)
> > > > > > >
> > > > > > > diff --git a/drivers/net/hyperv/netvsc_drv.c
> > > > > > b/drivers/net/hyperv/netvsc_drv.c
> > > > > > > index 9522763..c5778cf 100644
> > > > > > > --- a/drivers/net/hyperv/netvsc_drv.c
> > > > > > > +++ b/drivers/net/hyperv/netvsc_drv.c
> > > > > > > @@ -1165,9 +1165,10 @@ static void netvsc_free_netdev(struct
> > > > > > net_device *netdev)
> > > > > > >  	free_netdev(netdev);
> > > > > > >  }
> > > > > > >
> > > > > > > -static struct net_device *get_netvsc_bymac(const u8 *mac)
> > > > > > > +static struct net_device *get_netvsc_byvfser(u32 vfser)
> > > > > > >  {
> > > > > > >  	struct net_device *dev;
> > > > > > > +	struct net_device_context *ndev_ctx;
> > > > > > >
> > > > > > >  	ASSERT_RTNL();
> > > > > > >
> > > > > > > @@ -1175,7 +1176,8 @@ static void netvsc_free_netdev(struct
> > > net_device
> > > > > > *netdev)
> > > > > > >  		if (dev->netdev_ops != &device_ops)
> > > > > > >  			continue;	/* not a netvsc device */
> > > > > > >
> > > > > > > -		if (ether_addr_equal(mac, dev->perm_addr))
> > > > > > > +		ndev_ctx = netdev_priv(dev);
> > > > > > > +		if (ndev_ctx->vf_serial == vfser)
> > > > > > >  			return dev;
> > > > > > >  	}
> > > > > > >
> > > > > > > @@ -1205,21 +1207,66 @@ static void
> netvsc_free_netdev(struct
> > > > > > net_device *netdev)
> > > > > > >  	return NULL;
> > > > > > >  }
> > > > > > >
> > > > > > > +static u32 netvsc_get_vfser(struct net_device *vf_netdev)
> > > > > > > +{
> > > > > > > +	struct device *dev;
> > > > > > > +	struct hv_device *hdev;
> > > > > > > +	struct hv_pcibus_device *hbus = NULL;
> > > > > > > +	struct list_head *iter;
> > > > > > > +	struct hv_pci_dev *hpdev;
> > > > > > > +	unsigned long flags;
> > > > > > > +	u32 vfser = 0;
> > > > > > > +	u32 count = 0;
> > > > > > > +
> > > > > > > +	for (dev = &vf_netdev->dev; dev; dev = dev->parent) {
> > > > > >
> > > > > > You are going to walk the whole device tree backwards?  That's
> > > crazy.
> > > > > > And foolish.  And racy and broken (what happens if the tree
> > > changes
> > > > > > while you do this?)  Where is the lock being grabbed while
> this
> > > happens?
> > > > > > What about reference counts?  Do you see other drivers ever
> doing
> > > this
> > > > > > (if you do, point them out and I'll go yell at them too...)
> > > > >
> > > > > Greg,
> > > > >
> > > > > We are registering for netdev events. Coming into this function,
> the
> > > caller
> > > > > guarantees that the list of netdevs does not change - we assert
> this
> > > on entry:
> > > > > ASSERT_RTNL(). We are only walking up the device tree for the
> > > netdevs whose
> > > > > state change is being notified to us - the device tree being
> walked
> > > here is limited to
> > > > > netdevs under question.
> > > >
> > > > But a netdev is a child of some type of "real" device, and you are
> now
> > > > walking the tree of all devices up to the "root" parent device,
> which
> > > > means you will hit PCI bridges, USB controllers, and all sorts of
> fun
> > > > things if you are a child of those types of devices.
> > > >
> > > > And can't you tell if the netdev for this event, really is "your"
> > > > netdev?  Or are you getting called this for "all" netdevs?  Sorry,
> I
> > > > don't know this api, any pointers to it would be appreciated.
> > > >
> > > > > We have a reference to the device and we know the device is not
> > > going away. Is it not
> > > > > safe to dereference the parent pointer - after all the child has
> > > taken a reference on
> > > > > the parent as part of  device_add() call.
> > > >
> > > > It might be, and might not be.  There's a reason you don't see
> this
> > > > pattern anywhere in the kernel because of this...
> > > >
> > > > > > > +		if (!dev_is_vmbus(dev))
> > > > > > > +			continue;
> > > > > >
> > > > > > Ick.
> > > > > >
> > > > > > Why isn't your parent pointer a vmbus device all the time?
> How
> > > could
> > > > > > you get burried down in the device hierarchy when you are the
> > > driver for
> > > > > > a specific bus type in the first place?  How could this
> function
> > > ever be
> > > > > > called for a device that is NOT of this type?
> > > > >
> > > > > We get notified when state changes on any of the netdev devices
> in
> > > the system.
> > > > > Not all netdevs in the system belong to vmbus. Consider for
> instance
> > > the
> > > > > emulated NIC that can be configured. This is an emulated PCI NIC.
> We
> > > are only
> > > > > interested in netdevs that correspond to the VF instance that we
> are
> > > interested in.
> > > >
> > > > Can you "know" this is your netdev by some other way than having
> to
> > > walk
> > > > the device tree?  Name?  local device type?  Something else?  This
> > > seems
> > > > like an odd api in that everyone would have to do gyrations like
> this
> > > in
> > > > order to determine if the netdev is "theirs" or not...
> > >
> > > The scenario is SR-IOV on Hyper-V. In the case of VF device, the
> host
> > > hands the
> > > guest OS a PCI device for the virtual function device. The VF device
> is
> > > placed
> > > on a special synthetic PCI bus (ie not part of the other buses on
> the
> > > system).
> > > The VF device also has a synthetic network interface (netvsc) which
> > > lives
> > > on VMBUS.  This code is about managing the interaction between the
> two.
> > >
> > > The association between VF and synthetic NIC is done in response to
> the
> > > VF network device being registered. Initial version was based on MAC
> > > address
> > > which is the same.  Later refinement used permanent MAC address to
> > > avoid bugs if MAC address changed.  This version is to use serial
> number
> > > instead which is safer than MAC address.
> > >
> > > The code to walk up/down maybe not be needed to find serial number.
> > > Perhaps a more direct single set of conditions is possible?
> > >
> > > Something like:
> > >
> > > In pci-hyperv.c
> > >
> > > u32 hv_pcifront_get_serial(struct pci_bus *bus, unsigned int devfn)
> > > {
> > > 	struct hv_pcibus_device *hbus
> > > 		= container_of(bus->sysdata,
> > > 			       struct hv_pcibus_device, sysdata);
> > > 	struct hf_pci_dev *hpdev;
> > > 	u32 serial;
> > >
> > > 	hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
> > > 	if (!hpdev)
> > > 		return 0;
> > >
> > > 	serial = hpdev->devs.ser;
> > > 	put_pcichild(hpdev, hv_pcidev_ref_by_slot);
> > > 	return serial;
> > > }
> > >
> > > In netvsc_drv.c
> > >
> > > static u32 netvsc_get_vfser(struct net_device *vf_netdev)
> > > {
> > > 	struct device *dev = vf_netdev->dev.parent;
> > > 	struct pci_dev *pdev;
> > > 	u32 wslot;
> > >
> > > 	if (!dev || !dev_is_pci(dev))
> > > 		return 0;
> > >
> > > 	pdev = container_of(dev, struct pci_device, dev);
> > >
> > > 	return hv_pcifront_get_serial(pdev->bus, pdev->devfn);
> > > }
> > >
> > >
> > >
> > >
> > >
> > > P.S: it would be good to be able to get win_slot out through sysfs
> as
> > > well for systemd/udev.
> >
> > Stephen,
> >
> > Thanks for suggestion. Actually, in my earlier implementation of this
> > feature (VF serial based matching), I thought about export a function
> > from vPCI driver, then calling it from netvsc. So I don't need to
> > move structs between headers... But, it creates a dependency of netvsc
> > on vPCI driver's symbol. So, even if on a VM without SRIOV, we have to
> > load vPCI driver, which we don't want.
> >
> > Also, hv_vpci device is 3 parent layers above the vf_netdevice:
> > Here is the VF drv hierarchy --
> > Should we assume it's always 3 parents above vf_netdevice, or search
> for it?
> >
> > [  368.185259] HZINFO:NETDEV_REGISTER:
> > [  368.185261] HZINFO: dev:ffff88007c10d518, bus:          (null),
> busName:(null), drvName:(null)
> > [  368.185262] HZINFO: dev:ffff88007c10c0a0, bus:ffffffff81ce4b60,
> busName:pci, drvName:ixgbevf
> > [  368.185263] HZINFO: dev:ffff8800355c0000, bus:          (null),
> busName:(null), drvName:(null)
> > [  368.185264] HZINFO: dev:ffff8800355c5428, bus:ffffffffa0008160,
> busName:vmbus, drvName:hv_pci
> > [  368.185264] HZINFO: dev:ffff88007c49e268, bus:ffffffff81ce9800,
> busName:acpi, drvName:vmbus
> > [  368.185265] HZINFO: dev:ffff88007c48ea68, bus:ffffffff81ce9800,
> busName:acpi, drvName:(null)
> > [  368.185266] HZINFO: dev:ffff88007c48aa68, bus:ffffffff81ce9800,
> busName:acpi, drvName:(null)
> > [  368.185266] HZINFO: dev:ffff88007c48a268, bus:ffffffff81ce9800,
> busName:acpi, drvName:(null)
> > [  368.185267] HZINFO: dev:ffff88007c489a68, bus:ffffffff81ce9800,
> busName:acpi, drvName:(null)
> >
> > Thanks,
> > - Haiyang
> 
> Since this is a synthetic bus, the topology should not change unless
> host side
> software changes. The vf_netdev device has to be PCI device, so that is
> going to
> be certain.  After that there maybe intermediate up to hv_pci. The code
> in hyperv-pci
> already has similar stuff (ie for read_config).

Other netdevice, like emulated NIC can also trigger this notification.
They are not vPCI.

Thanks,
- Haiyang

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers
  2016-12-09 21:31                 ` Haiyang Zhang
@ 2016-12-09 21:45                   ` Stephen Hemminger
  2016-12-09 21:53                     ` Haiyang Zhang
  0 siblings, 1 reply; 29+ messages in thread
From: Stephen Hemminger @ 2016-12-09 21:45 UTC (permalink / raw)
  To: Haiyang Zhang
  Cc: Greg KH, KY Srinivasan, olaf, linux-kernel, bjorn.helgaas, apw,
	devel, leann.ogasawara, jasowang

On Fri, 9 Dec 2016 21:31:25 +0000
Haiyang Zhang <haiyangz@microsoft.com> wrote:

> > -----Original Message-----
> > From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> > Sent: Friday, December 9, 2016 3:30 PM
> > To: Haiyang Zhang <haiyangz@microsoft.com>
> > Cc: Greg KH <gregkh@linuxfoundation.org>; KY Srinivasan
> > <kys@microsoft.com>; olaf@aepfle.de; linux-kernel@vger.kernel.org;
> > bjorn.helgaas@gmail.com; apw@canonical.com; devel@linuxdriverproject.org;
> > leann.ogasawara@canonical.com; jasowang@redhat.com
> > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> > serial numbers
> > 
> > On Fri, 9 Dec 2016 20:09:49 +0000
> > Haiyang Zhang <haiyangz@microsoft.com> wrote:
> >   
> > > > -----Original Message-----
> > > > From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> > > > Sent: Friday, December 9, 2016 1:21 PM
> > > > To: Greg KH <gregkh@linuxfoundation.org>
> > > > Cc: KY Srinivasan <kys@microsoft.com>; olaf@aepfle.de; Haiyang Zhang
> > > > <haiyangz@microsoft.com>; linux-kernel@vger.kernel.org;
> > > > bjorn.helgaas@gmail.com; apw@canonical.com;  
> > devel@linuxdriverproject.org;  
> > > > leann.ogasawara@canonical.com; jasowang@redhat.com
> > > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> > > > serial numbers
> > > >
> > > > On Fri, 9 Dec 2016 08:31:22 +0100
> > > > Greg KH <gregkh@linuxfoundation.org> wrote:
> > > >  
> > > > > On Fri, Dec 09, 2016 at 12:05:53AM +0000, KY Srinivasan wrote:  
> > > > > >
> > > > > >  
> > > > > > > -----Original Message-----
> > > > > > > From: Greg KH [mailto:gregkh@linuxfoundation.org]
> > > > > > > Sent: Thursday, December 8, 2016 7:56 AM
> > > > > > > To: KY Srinivasan <kys@microsoft.com>
> > > > > > > Cc: linux-kernel@vger.kernel.org; devel@linuxdriverproject.org;
> > > > > > > olaf@aepfle.de; apw@canonical.com; vkuznets@redhat.com;
> > > > > > > jasowang@redhat.com; leann.ogasawara@canonical.com;
> > > > > > > bjorn.helgaas@gmail.com; Haiyang Zhang <haiyangz@microsoft.com>
> > > > > > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching  
> > based on  
> > > > serial  
> > > > > > > numbers
> > > > > > >
> > > > > > > On Thu, Dec 08, 2016 at 12:33:43AM -0800,  
> > > > kys@exchange.microsoft.com  
> > > > > > > wrote:  
> > > > > > > > From: Haiyang Zhang <haiyangz@microsoft.com>
> > > > > > > >
> > > > > > > > We currently use MAC address to match VF and synthetic NICs.  
> > > > Hyper-V  
> > > > > > > > provides a serial number to both devices for this purpose.  
> > This  
> > > > patch  
> > > > > > > > implements the matching based on VF serial numbers. This is  
> > the  
> > > > way  
> > > > > > > > specified by the protocol and more reliable.
> > > > > > > >
> > > > > > > > Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> > > > > > > > Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
> > > > > > > > ---
> > > > > > > >  drivers/net/hyperv/netvsc_drv.c |   55  
> > > > > > > ++++++++++++++++++++++++++++++++++++---  
> > > > > > > >  1 files changed, 51 insertions(+), 4 deletions(-)
> > > > > > > >
> > > > > > > > diff --git a/drivers/net/hyperv/netvsc_drv.c  
> > > > > > > b/drivers/net/hyperv/netvsc_drv.c  
> > > > > > > > index 9522763..c5778cf 100644
> > > > > > > > --- a/drivers/net/hyperv/netvsc_drv.c
> > > > > > > > +++ b/drivers/net/hyperv/netvsc_drv.c
> > > > > > > > @@ -1165,9 +1165,10 @@ static void netvsc_free_netdev(struct  
> > > > > > > net_device *netdev)  
> > > > > > > >  	free_netdev(netdev);
> > > > > > > >  }
> > > > > > > >
> > > > > > > > -static struct net_device *get_netvsc_bymac(const u8 *mac)
> > > > > > > > +static struct net_device *get_netvsc_byvfser(u32 vfser)
> > > > > > > >  {
> > > > > > > >  	struct net_device *dev;
> > > > > > > > +	struct net_device_context *ndev_ctx;
> > > > > > > >
> > > > > > > >  	ASSERT_RTNL();
> > > > > > > >
> > > > > > > > @@ -1175,7 +1176,8 @@ static void netvsc_free_netdev(struct  
> > > > net_device  
> > > > > > > *netdev)  
> > > > > > > >  		if (dev->netdev_ops != &device_ops)
> > > > > > > >  			continue;	/* not a netvsc device */
> > > > > > > >
> > > > > > > > -		if (ether_addr_equal(mac, dev->perm_addr))
> > > > > > > > +		ndev_ctx = netdev_priv(dev);
> > > > > > > > +		if (ndev_ctx->vf_serial == vfser)
> > > > > > > >  			return dev;
> > > > > > > >  	}
> > > > > > > >
> > > > > > > > @@ -1205,21 +1207,66 @@ static void  
> > netvsc_free_netdev(struct  
> > > > > > > net_device *netdev)  
> > > > > > > >  	return NULL;
> > > > > > > >  }
> > > > > > > >
> > > > > > > > +static u32 netvsc_get_vfser(struct net_device *vf_netdev)
> > > > > > > > +{
> > > > > > > > +	struct device *dev;
> > > > > > > > +	struct hv_device *hdev;
> > > > > > > > +	struct hv_pcibus_device *hbus = NULL;
> > > > > > > > +	struct list_head *iter;
> > > > > > > > +	struct hv_pci_dev *hpdev;
> > > > > > > > +	unsigned long flags;
> > > > > > > > +	u32 vfser = 0;
> > > > > > > > +	u32 count = 0;
> > > > > > > > +
> > > > > > > > +	for (dev = &vf_netdev->dev; dev; dev = dev->parent) {  
> > > > > > >
> > > > > > > You are going to walk the whole device tree backwards?  That's  
> > > > crazy.  
> > > > > > > And foolish.  And racy and broken (what happens if the tree  
> > > > changes  
> > > > > > > while you do this?)  Where is the lock being grabbed while  
> > this  
> > > > happens?  
> > > > > > > What about reference counts?  Do you see other drivers ever  
> > doing  
> > > > this  
> > > > > > > (if you do, point them out and I'll go yell at them too...)  
> > > > > >
> > > > > > Greg,
> > > > > >
> > > > > > We are registering for netdev events. Coming into this function,  
> > the  
> > > > caller  
> > > > > > guarantees that the list of netdevs does not change - we assert  
> > this  
> > > > on entry:  
> > > > > > ASSERT_RTNL(). We are only walking up the device tree for the  
> > > > netdevs whose  
> > > > > > state change is being notified to us - the device tree being  
> > walked  
> > > > here is limited to  
> > > > > > netdevs under question.  
> > > > >
> > > > > But a netdev is a child of some type of "real" device, and you are  
> > now  
> > > > > walking the tree of all devices up to the "root" parent device,  
> > which  
> > > > > means you will hit PCI bridges, USB controllers, and all sorts of  
> > fun  
> > > > > things if you are a child of those types of devices.
> > > > >
> > > > > And can't you tell if the netdev for this event, really is "your"
> > > > > netdev?  Or are you getting called this for "all" netdevs?  Sorry,  
> > I  
> > > > > don't know this api, any pointers to it would be appreciated.
> > > > >  
> > > > > > We have a reference to the device and we know the device is not  
> > > > going away. Is it not  
> > > > > > safe to dereference the parent pointer - after all the child has  
> > > > taken a reference on  
> > > > > > the parent as part of  device_add() call.  
> > > > >
> > > > > It might be, and might not be.  There's a reason you don't see  
> > this  
> > > > > pattern anywhere in the kernel because of this...
> > > > >  
> > > > > > > > +		if (!dev_is_vmbus(dev))
> > > > > > > > +			continue;  
> > > > > > >
> > > > > > > Ick.
> > > > > > >
> > > > > > > Why isn't your parent pointer a vmbus device all the time?  
> > How  
> > > > could  
> > > > > > > you get burried down in the device hierarchy when you are the  
> > > > driver for  
> > > > > > > a specific bus type in the first place?  How could this  
> > function  
> > > > ever be  
> > > > > > > called for a device that is NOT of this type?  
> > > > > >
> > > > > > We get notified when state changes on any of the netdev devices  
> > in  
> > > > the system.  
> > > > > > Not all netdevs in the system belong to vmbus. Consider for  
> > instance  
> > > > the  
> > > > > > emulated NIC that can be configured. This is an emulated PCI NIC.  
> > We  
> > > > are only  
> > > > > > interested in netdevs that correspond to the VF instance that we  
> > are  
> > > > interested in.  
> > > > >
> > > > > Can you "know" this is your netdev by some other way than having  
> > to  
> > > > walk  
> > > > > the device tree?  Name?  local device type?  Something else?  This  
> > > > seems  
> > > > > like an odd api in that everyone would have to do gyrations like  
> > this  
> > > > in  
> > > > > order to determine if the netdev is "theirs" or not...  
> > > >
> > > > The scenario is SR-IOV on Hyper-V. In the case of VF device, the  
> > host  
> > > > hands the
> > > > guest OS a PCI device for the virtual function device. The VF device  
> > is  
> > > > placed
> > > > on a special synthetic PCI bus (ie not part of the other buses on  
> > the  
> > > > system).
> > > > The VF device also has a synthetic network interface (netvsc) which
> > > > lives
> > > > on VMBUS.  This code is about managing the interaction between the  
> > two.  
> > > >
> > > > The association between VF and synthetic NIC is done in response to  
> > the  
> > > > VF network device being registered. Initial version was based on MAC
> > > > address
> > > > which is the same.  Later refinement used permanent MAC address to
> > > > avoid bugs if MAC address changed.  This version is to use serial  
> > number  
> > > > instead which is safer than MAC address.
> > > >
> > > > The code to walk up/down maybe not be needed to find serial number.
> > > > Perhaps a more direct single set of conditions is possible?
> > > >
> > > > Something like:
> > > >
> > > > In pci-hyperv.c
> > > >
> > > > u32 hv_pcifront_get_serial(struct pci_bus *bus, unsigned int devfn)
> > > > {
> > > > 	struct hv_pcibus_device *hbus
> > > > 		= container_of(bus->sysdata,
> > > > 			       struct hv_pcibus_device, sysdata);
> > > > 	struct hf_pci_dev *hpdev;
> > > > 	u32 serial;
> > > >
> > > > 	hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
> > > > 	if (!hpdev)
> > > > 		return 0;
> > > >
> > > > 	serial = hpdev->devs.ser;
> > > > 	put_pcichild(hpdev, hv_pcidev_ref_by_slot);
> > > > 	return serial;
> > > > }
> > > >
> > > > In netvsc_drv.c
> > > >
> > > > static u32 netvsc_get_vfser(struct net_device *vf_netdev)
> > > > {
> > > > 	struct device *dev = vf_netdev->dev.parent;
> > > > 	struct pci_dev *pdev;
> > > > 	u32 wslot;
> > > >
> > > > 	if (!dev || !dev_is_pci(dev))
> > > > 		return 0;
> > > >
> > > > 	pdev = container_of(dev, struct pci_device, dev);
> > > >
> > > > 	return hv_pcifront_get_serial(pdev->bus, pdev->devfn);
> > > > }
> > > >
> > > >
> > > >
> > > >
> > > >
> > > > P.S: it would be good to be able to get win_slot out through sysfs  
> > as  
> > > > well for systemd/udev.  
> > >
> > > Stephen,
> > >
> > > Thanks for suggestion. Actually, in my earlier implementation of this
> > > feature (VF serial based matching), I thought about export a function
> > > from vPCI driver, then calling it from netvsc. So I don't need to
> > > move structs between headers... But, it creates a dependency of netvsc
> > > on vPCI driver's symbol. So, even if on a VM without SRIOV, we have to
> > > load vPCI driver, which we don't want.
> > >
> > > Also, hv_vpci device is 3 parent layers above the vf_netdevice:
> > > Here is the VF drv hierarchy --
> > > Should we assume it's always 3 parents above vf_netdevice, or search  
> > for it?  
> > >
> > > [  368.185259] HZINFO:NETDEV_REGISTER:
> > > [  368.185261] HZINFO: dev:ffff88007c10d518, bus:          (null),  
> > busName:(null), drvName:(null)  
> > > [  368.185262] HZINFO: dev:ffff88007c10c0a0, bus:ffffffff81ce4b60,  
> > busName:pci, drvName:ixgbevf  
> > > [  368.185263] HZINFO: dev:ffff8800355c0000, bus:          (null),  
> > busName:(null), drvName:(null)  
> > > [  368.185264] HZINFO: dev:ffff8800355c5428, bus:ffffffffa0008160,  
> > busName:vmbus, drvName:hv_pci  
> > > [  368.185264] HZINFO: dev:ffff88007c49e268, bus:ffffffff81ce9800,  
> > busName:acpi, drvName:vmbus  
> > > [  368.185265] HZINFO: dev:ffff88007c48ea68, bus:ffffffff81ce9800,  
> > busName:acpi, drvName:(null)  
> > > [  368.185266] HZINFO: dev:ffff88007c48aa68, bus:ffffffff81ce9800,  
> > busName:acpi, drvName:(null)  
> > > [  368.185266] HZINFO: dev:ffff88007c48a268, bus:ffffffff81ce9800,  
> > busName:acpi, drvName:(null)  
> > > [  368.185267] HZINFO: dev:ffff88007c489a68, bus:ffffffff81ce9800,  
> > busName:acpi, drvName:(null)  
> > >
> > > Thanks,
> > > - Haiyang  
> > 
> > Since this is a synthetic bus, the topology should not change unless
> > host side
> > software changes. The vf_netdev device has to be PCI device, so that is
> > going to
> > be certain.  After that there maybe intermediate up to hv_pci. The code
> > in hyperv-pci
> > already has similar stuff (ie for read_config).  
> 
> Other netdevice, like emulated NIC can also trigger this notification.
> They are not vPCI.
> 
> Thanks,
> - Haiyang

Emulated NIC is already excluded in start of netvc notifier handler.

static int netvsc_netdev_event(struct notifier_block *this,
			       unsigned long event, void *ptr)
{
	struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);

	/* Skip our own events */
	if (event_dev->netdev_ops == &device_ops)
		return NOTIFY_DONE;

^ permalink raw reply	[flat|nested] 29+ messages in thread

* RE: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers
  2016-12-09 21:45                   ` Stephen Hemminger
@ 2016-12-09 21:53                     ` Haiyang Zhang
  2016-12-09 22:05                       ` Stephen Hemminger
  0 siblings, 1 reply; 29+ messages in thread
From: Haiyang Zhang @ 2016-12-09 21:53 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Greg KH, KY Srinivasan, olaf, linux-kernel, bjorn.helgaas, apw,
	devel, leann.ogasawara, jasowang



> -----Original Message-----
> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Friday, December 9, 2016 4:45 PM
> To: Haiyang Zhang <haiyangz@microsoft.com>
> Cc: Greg KH <gregkh@linuxfoundation.org>; KY Srinivasan
> <kys@microsoft.com>; olaf@aepfle.de; linux-kernel@vger.kernel.org;
> bjorn.helgaas@gmail.com; apw@canonical.com; devel@linuxdriverproject.org;
> leann.ogasawara@canonical.com; jasowang@redhat.com
> Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> serial numbers
> 
> On Fri, 9 Dec 2016 21:31:25 +0000
> Haiyang Zhang <haiyangz@microsoft.com> wrote:
> 
> > > -----Original Message-----
> > > From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> > > Sent: Friday, December 9, 2016 3:30 PM
> > > To: Haiyang Zhang <haiyangz@microsoft.com>
> > > Cc: Greg KH <gregkh@linuxfoundation.org>; KY Srinivasan
> > > <kys@microsoft.com>; olaf@aepfle.de; linux-kernel@vger.kernel.org;
> > > bjorn.helgaas@gmail.com; apw@canonical.com;
> devel@linuxdriverproject.org;
> > > leann.ogasawara@canonical.com; jasowang@redhat.com
> > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> > > serial numbers
> > >
> > > On Fri, 9 Dec 2016 20:09:49 +0000
> > > Haiyang Zhang <haiyangz@microsoft.com> wrote:
> > >
> > > > > -----Original Message-----
> > > > > From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> > > > > Sent: Friday, December 9, 2016 1:21 PM
> > > > > To: Greg KH <gregkh@linuxfoundation.org>
> > > > > Cc: KY Srinivasan <kys@microsoft.com>; olaf@aepfle.de; Haiyang
> Zhang
> > > > > <haiyangz@microsoft.com>; linux-kernel@vger.kernel.org;
> > > > > bjorn.helgaas@gmail.com; apw@canonical.com;
> > > devel@linuxdriverproject.org;
> > > > > leann.ogasawara@canonical.com; jasowang@redhat.com
> > > > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based
> on
> > > > > serial numbers
> > > > >
> > > > > On Fri, 9 Dec 2016 08:31:22 +0100
> > > > > Greg KH <gregkh@linuxfoundation.org> wrote:
> > > > >
> > > > > > On Fri, Dec 09, 2016 at 12:05:53AM +0000, KY Srinivasan wrote:
> > > > > > >
> > > > > > >
> > > > > > > > -----Original Message-----
> > > > > > > > From: Greg KH [mailto:gregkh@linuxfoundation.org]
> > > > > > > > Sent: Thursday, December 8, 2016 7:56 AM
> > > > > > > > To: KY Srinivasan <kys@microsoft.com>
> > > > > > > > Cc: linux-kernel@vger.kernel.org;
> devel@linuxdriverproject.org;
> > > > > > > > olaf@aepfle.de; apw@canonical.com; vkuznets@redhat.com;
> > > > > > > > jasowang@redhat.com; leann.ogasawara@canonical.com;
> > > > > > > > bjorn.helgaas@gmail.com; Haiyang Zhang
> <haiyangz@microsoft.com>
> > > > > > > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching
> > > based on
> > > > > serial
> > > > > > > > numbers
> > > > > > > >
> > > > > > > > On Thu, Dec 08, 2016 at 12:33:43AM -0800,
> > > > > kys@exchange.microsoft.com
> > > > > > > > wrote:
> > > > > > > > > From: Haiyang Zhang <haiyangz@microsoft.com>
> > > > > > > > >
> > > > > > > > > We currently use MAC address to match VF and synthetic
> NICs.
> > > > > Hyper-V
> > > > > > > > > provides a serial number to both devices for this
> purpose.
> > > This
> > > > > patch
> > > > > > > > > implements the matching based on VF serial numbers. This
> is
> > > the
> > > > > way
> > > > > > > > > specified by the protocol and more reliable.
> > > > > > > > >
> > > > > > > > > Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> > > > > > > > > Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
> > > > > > > > > ---
> > > > > > > > >  drivers/net/hyperv/netvsc_drv.c |   55
> > > > > > > > ++++++++++++++++++++++++++++++++++++---
> > > > > > > > >  1 files changed, 51 insertions(+), 4 deletions(-)
> > > > > > > > >
> > > > > > > > > diff --git a/drivers/net/hyperv/netvsc_drv.c
> > > > > > > > b/drivers/net/hyperv/netvsc_drv.c
> > > > > > > > > index 9522763..c5778cf 100644
> > > > > > > > > --- a/drivers/net/hyperv/netvsc_drv.c
> > > > > > > > > +++ b/drivers/net/hyperv/netvsc_drv.c
> > > > > > > > > @@ -1165,9 +1165,10 @@ static void
> netvsc_free_netdev(struct
> > > > > > > > net_device *netdev)
> > > > > > > > >  	free_netdev(netdev);
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > -static struct net_device *get_netvsc_bymac(const u8
> *mac)
> > > > > > > > > +static struct net_device *get_netvsc_byvfser(u32 vfser)
> > > > > > > > >  {
> > > > > > > > >  	struct net_device *dev;
> > > > > > > > > +	struct net_device_context *ndev_ctx;
> > > > > > > > >
> > > > > > > > >  	ASSERT_RTNL();
> > > > > > > > >
> > > > > > > > > @@ -1175,7 +1176,8 @@ static void
> netvsc_free_netdev(struct
> > > > > net_device
> > > > > > > > *netdev)
> > > > > > > > >  		if (dev->netdev_ops != &device_ops)
> > > > > > > > >  			continue;	/* not a netvsc device */
> > > > > > > > >
> > > > > > > > > -		if (ether_addr_equal(mac, dev->perm_addr))
> > > > > > > > > +		ndev_ctx = netdev_priv(dev);
> > > > > > > > > +		if (ndev_ctx->vf_serial == vfser)
> > > > > > > > >  			return dev;
> > > > > > > > >  	}
> > > > > > > > >
> > > > > > > > > @@ -1205,21 +1207,66 @@ static void
> > > netvsc_free_netdev(struct
> > > > > > > > net_device *netdev)
> > > > > > > > >  	return NULL;
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > +static u32 netvsc_get_vfser(struct net_device
> *vf_netdev)
> > > > > > > > > +{
> > > > > > > > > +	struct device *dev;
> > > > > > > > > +	struct hv_device *hdev;
> > > > > > > > > +	struct hv_pcibus_device *hbus = NULL;
> > > > > > > > > +	struct list_head *iter;
> > > > > > > > > +	struct hv_pci_dev *hpdev;
> > > > > > > > > +	unsigned long flags;
> > > > > > > > > +	u32 vfser = 0;
> > > > > > > > > +	u32 count = 0;
> > > > > > > > > +
> > > > > > > > > +	for (dev = &vf_netdev->dev; dev; dev = dev->parent)
> {
> > > > > > > >
> > > > > > > > You are going to walk the whole device tree backwards?
> That's
> > > > > crazy.
> > > > > > > > And foolish.  And racy and broken (what happens if the
> tree
> > > > > changes
> > > > > > > > while you do this?)  Where is the lock being grabbed while
> > > this
> > > > > happens?
> > > > > > > > What about reference counts?  Do you see other drivers
> ever
> > > doing
> > > > > this
> > > > > > > > (if you do, point them out and I'll go yell at them too...)
> > > > > > >
> > > > > > > Greg,
> > > > > > >
> > > > > > > We are registering for netdev events. Coming into this
> function,
> > > the
> > > > > caller
> > > > > > > guarantees that the list of netdevs does not change - we
> assert
> > > this
> > > > > on entry:
> > > > > > > ASSERT_RTNL(). We are only walking up the device tree for
> the
> > > > > netdevs whose
> > > > > > > state change is being notified to us - the device tree being
> > > walked
> > > > > here is limited to
> > > > > > > netdevs under question.
> > > > > >
> > > > > > But a netdev is a child of some type of "real" device, and you
> are
> > > now
> > > > > > walking the tree of all devices up to the "root" parent device,
> > > which
> > > > > > means you will hit PCI bridges, USB controllers, and all sorts
> of
> > > fun
> > > > > > things if you are a child of those types of devices.
> > > > > >
> > > > > > And can't you tell if the netdev for this event, really is
> "your"
> > > > > > netdev?  Or are you getting called this for "all" netdevs?
> Sorry,
> > > I
> > > > > > don't know this api, any pointers to it would be appreciated.
> > > > > >
> > > > > > > We have a reference to the device and we know the device is
> not
> > > > > going away. Is it not
> > > > > > > safe to dereference the parent pointer - after all the child
> has
> > > > > taken a reference on
> > > > > > > the parent as part of  device_add() call.
> > > > > >
> > > > > > It might be, and might not be.  There's a reason you don't see
> > > this
> > > > > > pattern anywhere in the kernel because of this...
> > > > > >
> > > > > > > > > +		if (!dev_is_vmbus(dev))
> > > > > > > > > +			continue;
> > > > > > > >
> > > > > > > > Ick.
> > > > > > > >
> > > > > > > > Why isn't your parent pointer a vmbus device all the time?
> > > How
> > > > > could
> > > > > > > > you get burried down in the device hierarchy when you are
> the
> > > > > driver for
> > > > > > > > a specific bus type in the first place?  How could this
> > > function
> > > > > ever be
> > > > > > > > called for a device that is NOT of this type?
> > > > > > >
> > > > > > > We get notified when state changes on any of the netdev
> devices
> > > in
> > > > > the system.
> > > > > > > Not all netdevs in the system belong to vmbus. Consider for
> > > instance
> > > > > the
> > > > > > > emulated NIC that can be configured. This is an emulated PCI
> NIC.
> > > We
> > > > > are only
> > > > > > > interested in netdevs that correspond to the VF instance
> that we
> > > are
> > > > > interested in.
> > > > > >
> > > > > > Can you "know" this is your netdev by some other way than
> having
> > > to
> > > > > walk
> > > > > > the device tree?  Name?  local device type?  Something else?
> This
> > > > > seems
> > > > > > like an odd api in that everyone would have to do gyrations
> like
> > > this
> > > > > in
> > > > > > order to determine if the netdev is "theirs" or not...
> > > > >
> > > > > The scenario is SR-IOV on Hyper-V. In the case of VF device, the
> > > host
> > > > > hands the
> > > > > guest OS a PCI device for the virtual function device. The VF
> device
> > > is
> > > > > placed
> > > > > on a special synthetic PCI bus (ie not part of the other buses
> on
> > > the
> > > > > system).
> > > > > The VF device also has a synthetic network interface (netvsc)
> which
> > > > > lives
> > > > > on VMBUS.  This code is about managing the interaction between
> the
> > > two.
> > > > >
> > > > > The association between VF and synthetic NIC is done in response
> to
> > > the
> > > > > VF network device being registered. Initial version was based on
> MAC
> > > > > address
> > > > > which is the same.  Later refinement used permanent MAC address
> to
> > > > > avoid bugs if MAC address changed.  This version is to use
> serial
> > > number
> > > > > instead which is safer than MAC address.
> > > > >
> > > > > The code to walk up/down maybe not be needed to find serial
> number.
> > > > > Perhaps a more direct single set of conditions is possible?
> > > > >
> > > > > Something like:
> > > > >
> > > > > In pci-hyperv.c
> > > > >
> > > > > u32 hv_pcifront_get_serial(struct pci_bus *bus, unsigned int
> devfn)
> > > > > {
> > > > > 	struct hv_pcibus_device *hbus
> > > > > 		= container_of(bus->sysdata,
> > > > > 			       struct hv_pcibus_device, sysdata);
> > > > > 	struct hf_pci_dev *hpdev;
> > > > > 	u32 serial;
> > > > >
> > > > > 	hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
> > > > > 	if (!hpdev)
> > > > > 		return 0;
> > > > >
> > > > > 	serial = hpdev->devs.ser;
> > > > > 	put_pcichild(hpdev, hv_pcidev_ref_by_slot);
> > > > > 	return serial;
> > > > > }
> > > > >
> > > > > In netvsc_drv.c
> > > > >
> > > > > static u32 netvsc_get_vfser(struct net_device *vf_netdev)
> > > > > {
> > > > > 	struct device *dev = vf_netdev->dev.parent;
> > > > > 	struct pci_dev *pdev;
> > > > > 	u32 wslot;
> > > > >
> > > > > 	if (!dev || !dev_is_pci(dev))
> > > > > 		return 0;
> > > > >
> > > > > 	pdev = container_of(dev, struct pci_device, dev);
> > > > >
> > > > > 	return hv_pcifront_get_serial(pdev->bus, pdev->devfn);
> > > > > }
> > > > >
> > > > >
> > > > >
> > > > >
> > > > >
> > > > > P.S: it would be good to be able to get win_slot out through
> sysfs
> > > as
> > > > > well for systemd/udev.
> > > >
> > > > Stephen,
> > > >
> > > > Thanks for suggestion. Actually, in my earlier implementation of
> this
> > > > feature (VF serial based matching), I thought about export a
> function
> > > > from vPCI driver, then calling it from netvsc. So I don't need to
> > > > move structs between headers... But, it creates a dependency of
> netvsc
> > > > on vPCI driver's symbol. So, even if on a VM without SRIOV, we
> have to
> > > > load vPCI driver, which we don't want.
> > > >
> > > > Also, hv_vpci device is 3 parent layers above the vf_netdevice:
> > > > Here is the VF drv hierarchy --
> > > > Should we assume it's always 3 parents above vf_netdevice, or
> search
> > > for it?
> > > >
> > > > [  368.185259] HZINFO:NETDEV_REGISTER:
> > > > [  368.185261] HZINFO: dev:ffff88007c10d518, bus:          (null),
> > > busName:(null), drvName:(null)
> > > > [  368.185262] HZINFO: dev:ffff88007c10c0a0, bus:ffffffff81ce4b60,
> > > busName:pci, drvName:ixgbevf
> > > > [  368.185263] HZINFO: dev:ffff8800355c0000, bus:          (null),
> > > busName:(null), drvName:(null)
> > > > [  368.185264] HZINFO: dev:ffff8800355c5428, bus:ffffffffa0008160,
> > > busName:vmbus, drvName:hv_pci
> > > > [  368.185264] HZINFO: dev:ffff88007c49e268, bus:ffffffff81ce9800,
> > > busName:acpi, drvName:vmbus
> > > > [  368.185265] HZINFO: dev:ffff88007c48ea68, bus:ffffffff81ce9800,
> > > busName:acpi, drvName:(null)
> > > > [  368.185266] HZINFO: dev:ffff88007c48aa68, bus:ffffffff81ce9800,
> > > busName:acpi, drvName:(null)
> > > > [  368.185266] HZINFO: dev:ffff88007c48a268, bus:ffffffff81ce9800,
> > > busName:acpi, drvName:(null)
> > > > [  368.185267] HZINFO: dev:ffff88007c489a68, bus:ffffffff81ce9800,
> > > busName:acpi, drvName:(null)
> > > >
> > > > Thanks,
> > > > - Haiyang
> > >
> > > Since this is a synthetic bus, the topology should not change unless
> > > host side
> > > software changes. The vf_netdev device has to be PCI device, so that
> is
> > > going to
> > > be certain.  After that there maybe intermediate up to hv_pci. The
> code
> > > in hyperv-pci
> > > already has similar stuff (ie for read_config).
> >
> > Other netdevice, like emulated NIC can also trigger this notification.
> > They are not vPCI.
> >
> > Thanks,
> > - Haiyang
> 
> Emulated NIC is already excluded in start of netvc notifier handler.
> 
> static int netvsc_netdev_event(struct notifier_block *this,
> 			       unsigned long event, void *ptr)
> {
> 	struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
> 
> 	/* Skip our own events */
> 	if (event_dev->netdev_ops == &device_ops)
> 		return NOTIFY_DONE;
> 

Emulated device is not based on netvsc. It's the native Linux (dec100M?)
Driver. So this line doesn't exclude it. And how about other NIC type 
may be added in the future?

Thanks,
- Haiyang

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers
  2016-12-09 21:53                     ` Haiyang Zhang
@ 2016-12-09 22:05                       ` Stephen Hemminger
  2016-12-09 22:35                         ` Haiyang Zhang
  0 siblings, 1 reply; 29+ messages in thread
From: Stephen Hemminger @ 2016-12-09 22:05 UTC (permalink / raw)
  To: Haiyang Zhang
  Cc: Greg KH, KY Srinivasan, olaf, linux-kernel, bjorn.helgaas, apw,
	devel, leann.ogasawara, jasowang

On Fri, 9 Dec 2016 21:53:49 +0000
Haiyang Zhang <haiyangz@microsoft.com> wrote:

> > -----Original Message-----
> > From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> > Sent: Friday, December 9, 2016 4:45 PM
> > To: Haiyang Zhang <haiyangz@microsoft.com>
> > Cc: Greg KH <gregkh@linuxfoundation.org>; KY Srinivasan
> > <kys@microsoft.com>; olaf@aepfle.de; linux-kernel@vger.kernel.org;
> > bjorn.helgaas@gmail.com; apw@canonical.com; devel@linuxdriverproject.org;
> > leann.ogasawara@canonical.com; jasowang@redhat.com
> > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> > serial numbers
> > 
> > On Fri, 9 Dec 2016 21:31:25 +0000
> > Haiyang Zhang <haiyangz@microsoft.com> wrote:
> >   
> > > > -----Original Message-----
> > > > From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> > > > Sent: Friday, December 9, 2016 3:30 PM
> > > > To: Haiyang Zhang <haiyangz@microsoft.com>
> > > > Cc: Greg KH <gregkh@linuxfoundation.org>; KY Srinivasan
> > > > <kys@microsoft.com>; olaf@aepfle.de; linux-kernel@vger.kernel.org;
> > > > bjorn.helgaas@gmail.com; apw@canonical.com;  
> > devel@linuxdriverproject.org;  
> > > > leann.ogasawara@canonical.com; jasowang@redhat.com
> > > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> > > > serial numbers
> > > >
> > > > On Fri, 9 Dec 2016 20:09:49 +0000
> > > > Haiyang Zhang <haiyangz@microsoft.com> wrote:
> > > >  
> > > > > > -----Original Message-----
> > > > > > From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> > > > > > Sent: Friday, December 9, 2016 1:21 PM
> > > > > > To: Greg KH <gregkh@linuxfoundation.org>
> > > > > > Cc: KY Srinivasan <kys@microsoft.com>; olaf@aepfle.de; Haiyang  
> > Zhang  
> > > > > > <haiyangz@microsoft.com>; linux-kernel@vger.kernel.org;
> > > > > > bjorn.helgaas@gmail.com; apw@canonical.com;  
> > > > devel@linuxdriverproject.org;  
> > > > > > leann.ogasawara@canonical.com; jasowang@redhat.com
> > > > > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based  
> > on  
> > > > > > serial numbers
> > > > > >
> > > > > > On Fri, 9 Dec 2016 08:31:22 +0100
> > > > > > Greg KH <gregkh@linuxfoundation.org> wrote:
> > > > > >  
> > > > > > > On Fri, Dec 09, 2016 at 12:05:53AM +0000, KY Srinivasan wrote:  
> > > > > > > >
> > > > > > > >  
> > > > > > > > > -----Original Message-----
> > > > > > > > > From: Greg KH [mailto:gregkh@linuxfoundation.org]
> > > > > > > > > Sent: Thursday, December 8, 2016 7:56 AM
> > > > > > > > > To: KY Srinivasan <kys@microsoft.com>
> > > > > > > > > Cc: linux-kernel@vger.kernel.org;  
> > devel@linuxdriverproject.org;  
> > > > > > > > > olaf@aepfle.de; apw@canonical.com; vkuznets@redhat.com;
> > > > > > > > > jasowang@redhat.com; leann.ogasawara@canonical.com;
> > > > > > > > > bjorn.helgaas@gmail.com; Haiyang Zhang  
> > <haiyangz@microsoft.com>  
> > > > > > > > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching  
> > > > based on  
> > > > > > serial  
> > > > > > > > > numbers
> > > > > > > > >
> > > > > > > > > On Thu, Dec 08, 2016 at 12:33:43AM -0800,  
> > > > > > kys@exchange.microsoft.com  
> > > > > > > > > wrote:  
> > > > > > > > > > From: Haiyang Zhang <haiyangz@microsoft.com>
> > > > > > > > > >
> > > > > > > > > > We currently use MAC address to match VF and synthetic  
> > NICs.  
> > > > > > Hyper-V  
> > > > > > > > > > provides a serial number to both devices for this  
> > purpose.  
> > > > This  
> > > > > > patch  
> > > > > > > > > > implements the matching based on VF serial numbers. This  
> > is  
> > > > the  
> > > > > > way  
> > > > > > > > > > specified by the protocol and more reliable.
> > > > > > > > > >
> > > > > > > > > > Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> > > > > > > > > > Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
> > > > > > > > > > ---
> > > > > > > > > >  drivers/net/hyperv/netvsc_drv.c |   55  
> > > > > > > > > ++++++++++++++++++++++++++++++++++++---  
> > > > > > > > > >  1 files changed, 51 insertions(+), 4 deletions(-)
> > > > > > > > > >
> > > > > > > > > > diff --git a/drivers/net/hyperv/netvsc_drv.c  
> > > > > > > > > b/drivers/net/hyperv/netvsc_drv.c  
> > > > > > > > > > index 9522763..c5778cf 100644
> > > > > > > > > > --- a/drivers/net/hyperv/netvsc_drv.c
> > > > > > > > > > +++ b/drivers/net/hyperv/netvsc_drv.c
> > > > > > > > > > @@ -1165,9 +1165,10 @@ static void  
> > netvsc_free_netdev(struct  
> > > > > > > > > net_device *netdev)  
> > > > > > > > > >  	free_netdev(netdev);
> > > > > > > > > >  }
> > > > > > > > > >
> > > > > > > > > > -static struct net_device *get_netvsc_bymac(const u8  
> > *mac)  
> > > > > > > > > > +static struct net_device *get_netvsc_byvfser(u32 vfser)
> > > > > > > > > >  {
> > > > > > > > > >  	struct net_device *dev;
> > > > > > > > > > +	struct net_device_context *ndev_ctx;
> > > > > > > > > >
> > > > > > > > > >  	ASSERT_RTNL();
> > > > > > > > > >
> > > > > > > > > > @@ -1175,7 +1176,8 @@ static void  
> > netvsc_free_netdev(struct  
> > > > > > net_device  
> > > > > > > > > *netdev)  
> > > > > > > > > >  		if (dev->netdev_ops != &device_ops)
> > > > > > > > > >  			continue;	/* not a netvsc device */
> > > > > > > > > >
> > > > > > > > > > -		if (ether_addr_equal(mac, dev->perm_addr))
> > > > > > > > > > +		ndev_ctx = netdev_priv(dev);
> > > > > > > > > > +		if (ndev_ctx->vf_serial == vfser)
> > > > > > > > > >  			return dev;
> > > > > > > > > >  	}
> > > > > > > > > >
> > > > > > > > > > @@ -1205,21 +1207,66 @@ static void  
> > > > netvsc_free_netdev(struct  
> > > > > > > > > net_device *netdev)  
> > > > > > > > > >  	return NULL;
> > > > > > > > > >  }
> > > > > > > > > >
> > > > > > > > > > +static u32 netvsc_get_vfser(struct net_device  
> > *vf_netdev)  
> > > > > > > > > > +{
> > > > > > > > > > +	struct device *dev;
> > > > > > > > > > +	struct hv_device *hdev;
> > > > > > > > > > +	struct hv_pcibus_device *hbus = NULL;
> > > > > > > > > > +	struct list_head *iter;
> > > > > > > > > > +	struct hv_pci_dev *hpdev;
> > > > > > > > > > +	unsigned long flags;
> > > > > > > > > > +	u32 vfser = 0;
> > > > > > > > > > +	u32 count = 0;
> > > > > > > > > > +
> > > > > > > > > > +	for (dev = &vf_netdev->dev; dev; dev = dev->parent)  
> > {  
> > > > > > > > >
> > > > > > > > > You are going to walk the whole device tree backwards?  
> > That's  
> > > > > > crazy.  
> > > > > > > > > And foolish.  And racy and broken (what happens if the  
> > tree  
> > > > > > changes  
> > > > > > > > > while you do this?)  Where is the lock being grabbed while  
> > > > this  
> > > > > > happens?  
> > > > > > > > > What about reference counts?  Do you see other drivers  
> > ever  
> > > > doing  
> > > > > > this  
> > > > > > > > > (if you do, point them out and I'll go yell at them too...)  
> > > > > > > >
> > > > > > > > Greg,
> > > > > > > >
> > > > > > > > We are registering for netdev events. Coming into this  
> > function,  
> > > > the  
> > > > > > caller  
> > > > > > > > guarantees that the list of netdevs does not change - we  
> > assert  
> > > > this  
> > > > > > on entry:  
> > > > > > > > ASSERT_RTNL(). We are only walking up the device tree for  
> > the  
> > > > > > netdevs whose  
> > > > > > > > state change is being notified to us - the device tree being  
> > > > walked  
> > > > > > here is limited to  
> > > > > > > > netdevs under question.  
> > > > > > >
> > > > > > > But a netdev is a child of some type of "real" device, and you  
> > are  
> > > > now  
> > > > > > > walking the tree of all devices up to the "root" parent device,  
> > > > which  
> > > > > > > means you will hit PCI bridges, USB controllers, and all sorts  
> > of  
> > > > fun  
> > > > > > > things if you are a child of those types of devices.
> > > > > > >
> > > > > > > And can't you tell if the netdev for this event, really is  
> > "your"  
> > > > > > > netdev?  Or are you getting called this for "all" netdevs?  
> > Sorry,  
> > > > I  
> > > > > > > don't know this api, any pointers to it would be appreciated.
> > > > > > >  
> > > > > > > > We have a reference to the device and we know the device is  
> > not  
> > > > > > going away. Is it not  
> > > > > > > > safe to dereference the parent pointer - after all the child  
> > has  
> > > > > > taken a reference on  
> > > > > > > > the parent as part of  device_add() call.  
> > > > > > >
> > > > > > > It might be, and might not be.  There's a reason you don't see  
> > > > this  
> > > > > > > pattern anywhere in the kernel because of this...
> > > > > > >  
> > > > > > > > > > +		if (!dev_is_vmbus(dev))
> > > > > > > > > > +			continue;  
> > > > > > > > >
> > > > > > > > > Ick.
> > > > > > > > >
> > > > > > > > > Why isn't your parent pointer a vmbus device all the time?  
> > > > How  
> > > > > > could  
> > > > > > > > > you get burried down in the device hierarchy when you are  
> > the  
> > > > > > driver for  
> > > > > > > > > a specific bus type in the first place?  How could this  
> > > > function  
> > > > > > ever be  
> > > > > > > > > called for a device that is NOT of this type?  
> > > > > > > >
> > > > > > > > We get notified when state changes on any of the netdev  
> > devices  
> > > > in  
> > > > > > the system.  
> > > > > > > > Not all netdevs in the system belong to vmbus. Consider for  
> > > > instance  
> > > > > > the  
> > > > > > > > emulated NIC that can be configured. This is an emulated PCI  
> > NIC.  
> > > > We  
> > > > > > are only  
> > > > > > > > interested in netdevs that correspond to the VF instance  
> > that we  
> > > > are  
> > > > > > interested in.  
> > > > > > >
> > > > > > > Can you "know" this is your netdev by some other way than  
> > having  
> > > > to  
> > > > > > walk  
> > > > > > > the device tree?  Name?  local device type?  Something else?  
> > This  
> > > > > > seems  
> > > > > > > like an odd api in that everyone would have to do gyrations  
> > like  
> > > > this  
> > > > > > in  
> > > > > > > order to determine if the netdev is "theirs" or not...  
> > > > > >
> > > > > > The scenario is SR-IOV on Hyper-V. In the case of VF device, the  
> > > > host  
> > > > > > hands the
> > > > > > guest OS a PCI device for the virtual function device. The VF  
> > device  
> > > > is  
> > > > > > placed
> > > > > > on a special synthetic PCI bus (ie not part of the other buses  
> > on  
> > > > the  
> > > > > > system).
> > > > > > The VF device also has a synthetic network interface (netvsc)  
> > which  
> > > > > > lives
> > > > > > on VMBUS.  This code is about managing the interaction between  
> > the  
> > > > two.  
> > > > > >
> > > > > > The association between VF and synthetic NIC is done in response  
> > to  
> > > > the  
> > > > > > VF network device being registered. Initial version was based on  
> > MAC  
> > > > > > address
> > > > > > which is the same.  Later refinement used permanent MAC address  
> > to  
> > > > > > avoid bugs if MAC address changed.  This version is to use  
> > serial  
> > > > number  
> > > > > > instead which is safer than MAC address.
> > > > > >
> > > > > > The code to walk up/down maybe not be needed to find serial  
> > number.  
> > > > > > Perhaps a more direct single set of conditions is possible?
> > > > > >
> > > > > > Something like:
> > > > > >
> > > > > > In pci-hyperv.c
> > > > > >
> > > > > > u32 hv_pcifront_get_serial(struct pci_bus *bus, unsigned int  
> > devfn)  
> > > > > > {
> > > > > > 	struct hv_pcibus_device *hbus
> > > > > > 		= container_of(bus->sysdata,
> > > > > > 			       struct hv_pcibus_device, sysdata);
> > > > > > 	struct hf_pci_dev *hpdev;
> > > > > > 	u32 serial;
> > > > > >
> > > > > > 	hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
> > > > > > 	if (!hpdev)
> > > > > > 		return 0;
> > > > > >
> > > > > > 	serial = hpdev->devs.ser;
> > > > > > 	put_pcichild(hpdev, hv_pcidev_ref_by_slot);
> > > > > > 	return serial;
> > > > > > }
> > > > > >
> > > > > > In netvsc_drv.c
> > > > > >
> > > > > > static u32 netvsc_get_vfser(struct net_device *vf_netdev)
> > > > > > {
> > > > > > 	struct device *dev = vf_netdev->dev.parent;
> > > > > > 	struct pci_dev *pdev;
> > > > > > 	u32 wslot;
> > > > > >
> > > > > > 	if (!dev || !dev_is_pci(dev))
> > > > > > 		return 0;
> > > > > >
> > > > > > 	pdev = container_of(dev, struct pci_device, dev);
> > > > > >
> > > > > > 	return hv_pcifront_get_serial(pdev->bus, pdev->devfn);
> > > > > > }
> > > > > >
> > > > > >
> > > > > >
> > > > > >
> > > > > >
> > > > > > P.S: it would be good to be able to get win_slot out through  
> > sysfs  
> > > > as  
> > > > > > well for systemd/udev.  
> > > > >
> > > > > Stephen,
> > > > >
> > > > > Thanks for suggestion. Actually, in my earlier implementation of  
> > this  
> > > > > feature (VF serial based matching), I thought about export a  
> > function  
> > > > > from vPCI driver, then calling it from netvsc. So I don't need to
> > > > > move structs between headers... But, it creates a dependency of  
> > netvsc  
> > > > > on vPCI driver's symbol. So, even if on a VM without SRIOV, we  
> > have to  
> > > > > load vPCI driver, which we don't want.
> > > > >
> > > > > Also, hv_vpci device is 3 parent layers above the vf_netdevice:
> > > > > Here is the VF drv hierarchy --
> > > > > Should we assume it's always 3 parents above vf_netdevice, or  
> > search  
> > > > for it?  
> > > > >
> > > > > [  368.185259] HZINFO:NETDEV_REGISTER:
> > > > > [  368.185261] HZINFO: dev:ffff88007c10d518, bus:          (null),  
> > > > busName:(null), drvName:(null)  
> > > > > [  368.185262] HZINFO: dev:ffff88007c10c0a0, bus:ffffffff81ce4b60,  
> > > > busName:pci, drvName:ixgbevf  
> > > > > [  368.185263] HZINFO: dev:ffff8800355c0000, bus:          (null),  
> > > > busName:(null), drvName:(null)  
> > > > > [  368.185264] HZINFO: dev:ffff8800355c5428, bus:ffffffffa0008160,  
> > > > busName:vmbus, drvName:hv_pci  
> > > > > [  368.185264] HZINFO: dev:ffff88007c49e268, bus:ffffffff81ce9800,  
> > > > busName:acpi, drvName:vmbus  
> > > > > [  368.185265] HZINFO: dev:ffff88007c48ea68, bus:ffffffff81ce9800,  
> > > > busName:acpi, drvName:(null)  
> > > > > [  368.185266] HZINFO: dev:ffff88007c48aa68, bus:ffffffff81ce9800,  
> > > > busName:acpi, drvName:(null)  
> > > > > [  368.185266] HZINFO: dev:ffff88007c48a268, bus:ffffffff81ce9800,  
> > > > busName:acpi, drvName:(null)  
> > > > > [  368.185267] HZINFO: dev:ffff88007c489a68, bus:ffffffff81ce9800,  
> > > > busName:acpi, drvName:(null)  
> > > > >
> > > > > Thanks,
> > > > > - Haiyang  
> > > >
> > > > Since this is a synthetic bus, the topology should not change unless
> > > > host side
> > > > software changes. The vf_netdev device has to be PCI device, so that  
> > is  
> > > > going to
> > > > be certain.  After that there maybe intermediate up to hv_pci. The  
> > code  
> > > > in hyperv-pci
> > > > already has similar stuff (ie for read_config).  
> > >
> > > Other netdevice, like emulated NIC can also trigger this notification.
> > > They are not vPCI.
> > >
> > > Thanks,
> > > - Haiyang  
> > 
> > Emulated NIC is already excluded in start of netvc notifier handler.
> > 
> > static int netvsc_netdev_event(struct notifier_block *this,
> > 			       unsigned long event, void *ptr)
> > {
> > 	struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
> > 
> > 	/* Skip our own events */
> > 	if (event_dev->netdev_ops == &device_ops)
> > 		return NOTIFY_DONE;
> >   
> 
> Emulated device is not based on netvsc. It's the native Linux (dec100M?)
> Driver. So this line doesn't exclude it. And how about other NIC type 
> may be added in the future?

Sorry, forgot about that haven't used emulated device in years.
The emulated device should appear to be on a PCI bus, but the serial
would not match??

^ permalink raw reply	[flat|nested] 29+ messages in thread

* RE: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers
  2016-12-09 22:05                       ` Stephen Hemminger
@ 2016-12-09 22:35                         ` Haiyang Zhang
  2016-12-10  0:21                           ` Stephen Hemminger
  0 siblings, 1 reply; 29+ messages in thread
From: Haiyang Zhang @ 2016-12-09 22:35 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Greg KH, KY Srinivasan, olaf, linux-kernel, bjorn.helgaas, apw,
	devel, leann.ogasawara, jasowang



> -----Original Message-----
> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Friday, December 9, 2016 5:05 PM
> To: Haiyang Zhang <haiyangz@microsoft.com>
> Cc: Greg KH <gregkh@linuxfoundation.org>; KY Srinivasan
> <kys@microsoft.com>; olaf@aepfle.de; linux-kernel@vger.kernel.org;
> bjorn.helgaas@gmail.com; apw@canonical.com; devel@linuxdriverproject.org;
> leann.ogasawara@canonical.com; jasowang@redhat.com
> Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> serial numbers
> 
> On Fri, 9 Dec 2016 21:53:49 +0000
> Haiyang Zhang <haiyangz@microsoft.com> wrote:
> 
> > > -----Original Message-----
> > > From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> > > Sent: Friday, December 9, 2016 4:45 PM
> > > To: Haiyang Zhang <haiyangz@microsoft.com>
> > > Cc: Greg KH <gregkh@linuxfoundation.org>; KY Srinivasan
> > > <kys@microsoft.com>; olaf@aepfle.de; linux-kernel@vger.kernel.org;
> > > bjorn.helgaas@gmail.com; apw@canonical.com;
> devel@linuxdriverproject.org;
> > > leann.ogasawara@canonical.com; jasowang@redhat.com
> > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> > > serial numbers
> > >
> > > On Fri, 9 Dec 2016 21:31:25 +0000
> > > Haiyang Zhang <haiyangz@microsoft.com> wrote:
> > >
> > > > > -----Original Message-----
> > > > > From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> > > > > Sent: Friday, December 9, 2016 3:30 PM
> > > > > To: Haiyang Zhang <haiyangz@microsoft.com>
> > > > > Cc: Greg KH <gregkh@linuxfoundation.org>; KY Srinivasan
> > > > > <kys@microsoft.com>; olaf@aepfle.de; linux-
> kernel@vger.kernel.org;
> > > > > bjorn.helgaas@gmail.com; apw@canonical.com;
> > > devel@linuxdriverproject.org;
> > > > > leann.ogasawara@canonical.com; jasowang@redhat.com
> > > > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based
> on
> > > > > serial numbers
> > > > >
> > > > > On Fri, 9 Dec 2016 20:09:49 +0000
> > > > > Haiyang Zhang <haiyangz@microsoft.com> wrote:
> > > > >
> > > > > > > -----Original Message-----
> > > > > > > From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> > > > > > > Sent: Friday, December 9, 2016 1:21 PM
> > > > > > > To: Greg KH <gregkh@linuxfoundation.org>
> > > > > > > Cc: KY Srinivasan <kys@microsoft.com>; olaf@aepfle.de;
> Haiyang
> > > Zhang
> > > > > > > <haiyangz@microsoft.com>; linux-kernel@vger.kernel.org;
> > > > > > > bjorn.helgaas@gmail.com; apw@canonical.com;
> > > > > devel@linuxdriverproject.org;
> > > > > > > leann.ogasawara@canonical.com; jasowang@redhat.com
> > > > > > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching
> based
> > > on
> > > > > > > serial numbers
> > > > > > >
> > > > > > > On Fri, 9 Dec 2016 08:31:22 +0100
> > > > > > > Greg KH <gregkh@linuxfoundation.org> wrote:
> > > > > > >
> > > > > > > > On Fri, Dec 09, 2016 at 12:05:53AM +0000, KY Srinivasan
> wrote:
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > > -----Original Message-----
> > > > > > > > > > From: Greg KH [mailto:gregkh@linuxfoundation.org]
> > > > > > > > > > Sent: Thursday, December 8, 2016 7:56 AM
> > > > > > > > > > To: KY Srinivasan <kys@microsoft.com>
> > > > > > > > > > Cc: linux-kernel@vger.kernel.org;
> > > devel@linuxdriverproject.org;
> > > > > > > > > > olaf@aepfle.de; apw@canonical.com; vkuznets@redhat.com;
> > > > > > > > > > jasowang@redhat.com; leann.ogasawara@canonical.com;
> > > > > > > > > > bjorn.helgaas@gmail.com; Haiyang Zhang
> > > <haiyangz@microsoft.com>
> > > > > > > > > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF
> matching
> > > > > based on
> > > > > > > serial
> > > > > > > > > > numbers
> > > > > > > > > >
> > > > > > > > > > On Thu, Dec 08, 2016 at 12:33:43AM -0800,
> > > > > > > kys@exchange.microsoft.com
> > > > > > > > > > wrote:
> > > > > > > > > > > From: Haiyang Zhang <haiyangz@microsoft.com>
> > > > > > > > > > >
> > > > > > > > > > > We currently use MAC address to match VF and
> synthetic
> > > NICs.
> > > > > > > Hyper-V
> > > > > > > > > > > provides a serial number to both devices for this
> > > purpose.
> > > > > This
> > > > > > > patch
> > > > > > > > > > > implements the matching based on VF serial numbers.
> This
> > > is
> > > > > the
> > > > > > > way
> > > > > > > > > > > specified by the protocol and more reliable.
> > > > > > > > > > >
> > > > > > > > > > > Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> > > > > > > > > > > Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
> > > > > > > > > > > ---
> > > > > > > > > > >  drivers/net/hyperv/netvsc_drv.c |   55
> > > > > > > > > > ++++++++++++++++++++++++++++++++++++---
> > > > > > > > > > >  1 files changed, 51 insertions(+), 4 deletions(-)
> > > > > > > > > > >
> > > > > > > > > > > diff --git a/drivers/net/hyperv/netvsc_drv.c
> > > > > > > > > > b/drivers/net/hyperv/netvsc_drv.c
> > > > > > > > > > > index 9522763..c5778cf 100644
> > > > > > > > > > > --- a/drivers/net/hyperv/netvsc_drv.c
> > > > > > > > > > > +++ b/drivers/net/hyperv/netvsc_drv.c
> > > > > > > > > > > @@ -1165,9 +1165,10 @@ static void
> > > netvsc_free_netdev(struct
> > > > > > > > > > net_device *netdev)
> > > > > > > > > > >  	free_netdev(netdev);
> > > > > > > > > > >  }
> > > > > > > > > > >
> > > > > > > > > > > -static struct net_device *get_netvsc_bymac(const u8
> > > *mac)
> > > > > > > > > > > +static struct net_device *get_netvsc_byvfser(u32
> vfser)
> > > > > > > > > > >  {
> > > > > > > > > > >  	struct net_device *dev;
> > > > > > > > > > > +	struct net_device_context *ndev_ctx;
> > > > > > > > > > >
> > > > > > > > > > >  	ASSERT_RTNL();
> > > > > > > > > > >
> > > > > > > > > > > @@ -1175,7 +1176,8 @@ static void
> > > netvsc_free_netdev(struct
> > > > > > > net_device
> > > > > > > > > > *netdev)
> > > > > > > > > > >  		if (dev->netdev_ops != &device_ops)
> > > > > > > > > > >  			continue;	/* not a netvsc device */
> > > > > > > > > > >
> > > > > > > > > > > -		if (ether_addr_equal(mac, dev->perm_addr))
> > > > > > > > > > > +		ndev_ctx = netdev_priv(dev);
> > > > > > > > > > > +		if (ndev_ctx->vf_serial == vfser)
> > > > > > > > > > >  			return dev;
> > > > > > > > > > >  	}
> > > > > > > > > > >
> > > > > > > > > > > @@ -1205,21 +1207,66 @@ static void
> > > > > netvsc_free_netdev(struct
> > > > > > > > > > net_device *netdev)
> > > > > > > > > > >  	return NULL;
> > > > > > > > > > >  }
> > > > > > > > > > >
> > > > > > > > > > > +static u32 netvsc_get_vfser(struct net_device
> > > *vf_netdev)
> > > > > > > > > > > +{
> > > > > > > > > > > +	struct device *dev;
> > > > > > > > > > > +	struct hv_device *hdev;
> > > > > > > > > > > +	struct hv_pcibus_device *hbus = NULL;
> > > > > > > > > > > +	struct list_head *iter;
> > > > > > > > > > > +	struct hv_pci_dev *hpdev;
> > > > > > > > > > > +	unsigned long flags;
> > > > > > > > > > > +	u32 vfser = 0;
> > > > > > > > > > > +	u32 count = 0;
> > > > > > > > > > > +
> > > > > > > > > > > +	for (dev = &vf_netdev->dev; dev; dev = dev->parent)
> > > {
> > > > > > > > > >
> > > > > > > > > > You are going to walk the whole device tree backwards?
> > > That's
> > > > > > > crazy.
> > > > > > > > > > And foolish.  And racy and broken (what happens if the
> > > tree
> > > > > > > changes
> > > > > > > > > > while you do this?)  Where is the lock being grabbed
> while
> > > > > this
> > > > > > > happens?
> > > > > > > > > > What about reference counts?  Do you see other drivers
> > > ever
> > > > > doing
> > > > > > > this
> > > > > > > > > > (if you do, point them out and I'll go yell at them
> too...)
> > > > > > > > >
> > > > > > > > > Greg,
> > > > > > > > >
> > > > > > > > > We are registering for netdev events. Coming into this
> > > function,
> > > > > the
> > > > > > > caller
> > > > > > > > > guarantees that the list of netdevs does not change - we
> > > assert
> > > > > this
> > > > > > > on entry:
> > > > > > > > > ASSERT_RTNL(). We are only walking up the device tree
> for
> > > the
> > > > > > > netdevs whose
> > > > > > > > > state change is being notified to us - the device tree
> being
> > > > > walked
> > > > > > > here is limited to
> > > > > > > > > netdevs under question.
> > > > > > > >
> > > > > > > > But a netdev is a child of some type of "real" device, and
> you
> > > are
> > > > > now
> > > > > > > > walking the tree of all devices up to the "root" parent
> device,
> > > > > which
> > > > > > > > means you will hit PCI bridges, USB controllers, and all
> sorts
> > > of
> > > > > fun
> > > > > > > > things if you are a child of those types of devices.
> > > > > > > >
> > > > > > > > And can't you tell if the netdev for this event, really is
> > > "your"
> > > > > > > > netdev?  Or are you getting called this for "all" netdevs?
> > > Sorry,
> > > > > I
> > > > > > > > don't know this api, any pointers to it would be
> appreciated.
> > > > > > > >
> > > > > > > > > We have a reference to the device and we know the device
> is
> > > not
> > > > > > > going away. Is it not
> > > > > > > > > safe to dereference the parent pointer - after all the
> child
> > > has
> > > > > > > taken a reference on
> > > > > > > > > the parent as part of  device_add() call.
> > > > > > > >
> > > > > > > > It might be, and might not be.  There's a reason you don't
> see
> > > > > this
> > > > > > > > pattern anywhere in the kernel because of this...
> > > > > > > >
> > > > > > > > > > > +		if (!dev_is_vmbus(dev))
> > > > > > > > > > > +			continue;
> > > > > > > > > >
> > > > > > > > > > Ick.
> > > > > > > > > >
> > > > > > > > > > Why isn't your parent pointer a vmbus device all the
> time?
> > > > > How
> > > > > > > could
> > > > > > > > > > you get burried down in the device hierarchy when you
> are
> > > the
> > > > > > > driver for
> > > > > > > > > > a specific bus type in the first place?  How could
> this
> > > > > function
> > > > > > > ever be
> > > > > > > > > > called for a device that is NOT of this type?
> > > > > > > > >
> > > > > > > > > We get notified when state changes on any of the netdev
> > > devices
> > > > > in
> > > > > > > the system.
> > > > > > > > > Not all netdevs in the system belong to vmbus. Consider
> for
> > > > > instance
> > > > > > > the
> > > > > > > > > emulated NIC that can be configured. This is an emulated
> PCI
> > > NIC.
> > > > > We
> > > > > > > are only
> > > > > > > > > interested in netdevs that correspond to the VF instance
> > > that we
> > > > > are
> > > > > > > interested in.
> > > > > > > >
> > > > > > > > Can you "know" this is your netdev by some other way than
> > > having
> > > > > to
> > > > > > > walk
> > > > > > > > the device tree?  Name?  local device type?  Something
> else?
> > > This
> > > > > > > seems
> > > > > > > > like an odd api in that everyone would have to do
> gyrations
> > > like
> > > > > this
> > > > > > > in
> > > > > > > > order to determine if the netdev is "theirs" or not...
> > > > > > >
> > > > > > > The scenario is SR-IOV on Hyper-V. In the case of VF device,
> the
> > > > > host
> > > > > > > hands the
> > > > > > > guest OS a PCI device for the virtual function device. The
> VF
> > > device
> > > > > is
> > > > > > > placed
> > > > > > > on a special synthetic PCI bus (ie not part of the other
> buses
> > > on
> > > > > the
> > > > > > > system).
> > > > > > > The VF device also has a synthetic network interface (netvsc)
> > > which
> > > > > > > lives
> > > > > > > on VMBUS.  This code is about managing the interaction
> between
> > > the
> > > > > two.
> > > > > > >
> > > > > > > The association between VF and synthetic NIC is done in
> response
> > > to
> > > > > the
> > > > > > > VF network device being registered. Initial version was
> based on
> > > MAC
> > > > > > > address
> > > > > > > which is the same.  Later refinement used permanent MAC
> address
> > > to
> > > > > > > avoid bugs if MAC address changed.  This version is to use
> > > serial
> > > > > number
> > > > > > > instead which is safer than MAC address.
> > > > > > >
> > > > > > > The code to walk up/down maybe not be needed to find serial
> > > number.
> > > > > > > Perhaps a more direct single set of conditions is possible?
> > > > > > >
> > > > > > > Something like:
> > > > > > >
> > > > > > > In pci-hyperv.c
> > > > > > >
> > > > > > > u32 hv_pcifront_get_serial(struct pci_bus *bus, unsigned int
> > > devfn)
> > > > > > > {
> > > > > > > 	struct hv_pcibus_device *hbus
> > > > > > > 		= container_of(bus->sysdata,
> > > > > > > 			       struct hv_pcibus_device, sysdata);
> > > > > > > 	struct hf_pci_dev *hpdev;
> > > > > > > 	u32 serial;
> > > > > > >
> > > > > > > 	hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev-
> >devfn));
> > > > > > > 	if (!hpdev)
> > > > > > > 		return 0;
> > > > > > >
> > > > > > > 	serial = hpdev->devs.ser;
> > > > > > > 	put_pcichild(hpdev, hv_pcidev_ref_by_slot);
> > > > > > > 	return serial;
> > > > > > > }
> > > > > > >
> > > > > > > In netvsc_drv.c
> > > > > > >
> > > > > > > static u32 netvsc_get_vfser(struct net_device *vf_netdev)
> > > > > > > {
> > > > > > > 	struct device *dev = vf_netdev->dev.parent;
> > > > > > > 	struct pci_dev *pdev;
> > > > > > > 	u32 wslot;
> > > > > > >
> > > > > > > 	if (!dev || !dev_is_pci(dev))
> > > > > > > 		return 0;
> > > > > > >
> > > > > > > 	pdev = container_of(dev, struct pci_device, dev);
> > > > > > >
> > > > > > > 	return hv_pcifront_get_serial(pdev->bus, pdev->devfn);
> > > > > > > }
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > > P.S: it would be good to be able to get win_slot out through
> > > sysfs
> > > > > as
> > > > > > > well for systemd/udev.
> > > > > >
> > > > > > Stephen,
> > > > > >
> > > > > > Thanks for suggestion. Actually, in my earlier implementation
> of
> > > this
> > > > > > feature (VF serial based matching), I thought about export a
> > > function
> > > > > > from vPCI driver, then calling it from netvsc. So I don't need
> to
> > > > > > move structs between headers... But, it creates a dependency
> of
> > > netvsc
> > > > > > on vPCI driver's symbol. So, even if on a VM without SRIOV, we
> > > have to
> > > > > > load vPCI driver, which we don't want.
> > > > > >
> > > > > > Also, hv_vpci device is 3 parent layers above the vf_netdevice:
> > > > > > Here is the VF drv hierarchy --
> > > > > > Should we assume it's always 3 parents above vf_netdevice, or
> > > search
> > > > > for it?
> > > > > >
> > > > > > [  368.185259] HZINFO:NETDEV_REGISTER:
> > > > > > [  368.185261] HZINFO: dev:ffff88007c10d518, bus:
> (null),
> > > > > busName:(null), drvName:(null)
> > > > > > [  368.185262] HZINFO: dev:ffff88007c10c0a0,
> bus:ffffffff81ce4b60,
> > > > > busName:pci, drvName:ixgbevf
> > > > > > [  368.185263] HZINFO: dev:ffff8800355c0000, bus:
> (null),
> > > > > busName:(null), drvName:(null)
> > > > > > [  368.185264] HZINFO: dev:ffff8800355c5428,
> bus:ffffffffa0008160,
> > > > > busName:vmbus, drvName:hv_pci
> > > > > > [  368.185264] HZINFO: dev:ffff88007c49e268,
> bus:ffffffff81ce9800,
> > > > > busName:acpi, drvName:vmbus
> > > > > > [  368.185265] HZINFO: dev:ffff88007c48ea68,
> bus:ffffffff81ce9800,
> > > > > busName:acpi, drvName:(null)
> > > > > > [  368.185266] HZINFO: dev:ffff88007c48aa68,
> bus:ffffffff81ce9800,
> > > > > busName:acpi, drvName:(null)
> > > > > > [  368.185266] HZINFO: dev:ffff88007c48a268,
> bus:ffffffff81ce9800,
> > > > > busName:acpi, drvName:(null)
> > > > > > [  368.185267] HZINFO: dev:ffff88007c489a68,
> bus:ffffffff81ce9800,
> > > > > busName:acpi, drvName:(null)
> > > > > >
> > > > > > Thanks,
> > > > > > - Haiyang
> > > > >
> > > > > Since this is a synthetic bus, the topology should not change
> unless
> > > > > host side
> > > > > software changes. The vf_netdev device has to be PCI device, so
> that
> > > is
> > > > > going to
> > > > > be certain.  After that there maybe intermediate up to hv_pci.
> The
> > > code
> > > > > in hyperv-pci
> > > > > already has similar stuff (ie for read_config).
> > > >
> > > > Other netdevice, like emulated NIC can also trigger this
> notification.
> > > > They are not vPCI.
> > > >
> > > > Thanks,
> > > > - Haiyang
> > >
> > > Emulated NIC is already excluded in start of netvc notifier handler.
> > >
> > > static int netvsc_netdev_event(struct notifier_block *this,
> > > 			       unsigned long event, void *ptr)
> > > {
> > > 	struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
> > >
> > > 	/* Skip our own events */
> > > 	if (event_dev->netdev_ops == &device_ops)
> > > 		return NOTIFY_DONE;
> > >
> >
> > Emulated device is not based on netvsc. It's the native Linux
> (dec100M?)
> > Driver. So this line doesn't exclude it. And how about other NIC type
> > may be added in the future?
> 
> Sorry, forgot about that haven't used emulated device in years.
> The emulated device should appear to be on a PCI bus, but the serial
> would not match??

It's not a vmbus device, not a hv_pci device either. Hv_PCI is a subset
of vmbus devices. So emulated NIC won't have hv_pci serial number.

In my patch, the following code ensure, we only try to get serial number
after confirming it's vmbus and hv_pci device:

+               if (!dev_is_vmbus(dev))
+                       continue;
+
+               hdev = device_to_hv_device(dev);
+               if (hdev->device_id != HV_PCIE)
+                       continue;

Thanks,
- Haiyang

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers
  2016-12-09 22:35                         ` Haiyang Zhang
@ 2016-12-10  0:21                           ` Stephen Hemminger
  2016-12-10 12:20                             ` Greg KH
  0 siblings, 1 reply; 29+ messages in thread
From: Stephen Hemminger @ 2016-12-10  0:21 UTC (permalink / raw)
  To: Haiyang Zhang
  Cc: Greg KH, KY Srinivasan, olaf, linux-kernel, bjorn.helgaas, apw,
	devel, leann.ogasawara, jasowang

On Fri, 9 Dec 2016 22:35:05 +0000
Haiyang Zhang <haiyangz@microsoft.com> wrote:

> > > >
> > > > Emulated NIC is already excluded in start of netvc notifier handler.
> > > >
> > > > static int netvsc_netdev_event(struct notifier_block *this,
> > > > 			       unsigned long event, void *ptr)
> > > > {
> > > > 	struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
> > > >
> > > > 	/* Skip our own events */
> > > > 	if (event_dev->netdev_ops == &device_ops)
> > > > 		return NOTIFY_DONE;
> > > >  
> > >
> > > Emulated device is not based on netvsc. It's the native Linux  
> > (dec100M?)  
> > > Driver. So this line doesn't exclude it. And how about other NIC type
> > > may be added in the future?  
> > 
> > Sorry, forgot about that haven't used emulated device in years.
> > The emulated device should appear to be on a PCI bus, but the serial
> > would not match??  
> 
> It's not a vmbus device, not a hv_pci device either. Hv_PCI is a subset
> of vmbus devices. So emulated NIC won't have hv_pci serial number.
> 
> In my patch, the following code ensure, we only try to get serial number
> after confirming it's vmbus and hv_pci device:
> 
> +               if (!dev_is_vmbus(dev))
> +                       continue;
> +
> +               hdev = device_to_hv_device(dev);
> +               if (hdev->device_id != HV_PCIE)
> +                       continue;

Ok, the walk back up the device tree is logically ok, but I don't
know enough about PCI device tree to be assured that it is safe.
Also, you could short circuit away most of the unwanted devices
by making sure the vf_netdev->dev.parent is a PCI device.

Also the loop to look for serial number in the devices on the
hv_pci bus could be made a separate function and have a short circuit
return (although it probably doesn't matter since there will only
be on e PCI VF device per bus there).

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers
  2016-12-10  0:21                           ` Stephen Hemminger
@ 2016-12-10 12:20                             ` Greg KH
  2016-12-14 23:18                               ` Haiyang Zhang
  0 siblings, 1 reply; 29+ messages in thread
From: Greg KH @ 2016-12-10 12:20 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Haiyang Zhang, olaf, jasowang, linux-kernel, bjorn.helgaas, apw,
	devel, leann.ogasawara

On Fri, Dec 09, 2016 at 04:21:48PM -0800, Stephen Hemminger wrote:
> On Fri, 9 Dec 2016 22:35:05 +0000
> Haiyang Zhang <haiyangz@microsoft.com> wrote:
> 
> > > > >
> > > > > Emulated NIC is already excluded in start of netvc notifier handler.
> > > > >
> > > > > static int netvsc_netdev_event(struct notifier_block *this,
> > > > > 			       unsigned long event, void *ptr)
> > > > > {
> > > > > 	struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
> > > > >
> > > > > 	/* Skip our own events */
> > > > > 	if (event_dev->netdev_ops == &device_ops)
> > > > > 		return NOTIFY_DONE;
> > > > >  
> > > >
> > > > Emulated device is not based on netvsc. It's the native Linux  
> > > (dec100M?)  
> > > > Driver. So this line doesn't exclude it. And how about other NIC type
> > > > may be added in the future?  
> > > 
> > > Sorry, forgot about that haven't used emulated device in years.
> > > The emulated device should appear to be on a PCI bus, but the serial
> > > would not match??  
> > 
> > It's not a vmbus device, not a hv_pci device either. Hv_PCI is a subset
> > of vmbus devices. So emulated NIC won't have hv_pci serial number.
> > 
> > In my patch, the following code ensure, we only try to get serial number
> > after confirming it's vmbus and hv_pci device:
> > 
> > +               if (!dev_is_vmbus(dev))
> > +                       continue;
> > +
> > +               hdev = device_to_hv_device(dev);
> > +               if (hdev->device_id != HV_PCIE)
> > +                       continue;
> 
> Ok, the walk back up the device tree is logically ok, but I don't
> know enough about PCI device tree to be assured that it is safe.
> Also, you could short circuit away most of the unwanted devices
> by making sure the vf_netdev->dev.parent is a PCI device.

Ugh, this seems really really messy.  Can't we just have the
netdev_event interface pass back a pointer to something that we "know"
what it is?  This walking the device tree is a mess, and not good.

I'd even argue that dev_is_pci() needs to be removed from the tree too,
as it shouldn't be needed either.  We did a lot of work on the driver
model to prevent the need for having to declare the "type" of 'struct
device' at all, and by doing this type of thing it goes against the
basic design of the model.

Yes, it makes things a bit "tougher" in places, but you don't do crazy
things like walk device trees to try to find random devices and then
think it's safe to actually use them :(

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 29+ messages in thread

* RE: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers
  2016-12-10 12:20                             ` Greg KH
@ 2016-12-14 23:18                               ` Haiyang Zhang
  2016-12-14 23:27                                 ` Greg KH
  0 siblings, 1 reply; 29+ messages in thread
From: Haiyang Zhang @ 2016-12-14 23:18 UTC (permalink / raw)
  To: Greg KH, Stephen Hemminger
  Cc: olaf, jasowang, linux-kernel, bjorn.helgaas, apw, devel, leann.ogasawara



> -----Original Message-----
> From: Greg KH [mailto:gregkh@linuxfoundation.org]
> Sent: Saturday, December 10, 2016 7:21 AM
> To: Stephen Hemminger <stephen@networkplumber.org>
> Cc: Haiyang Zhang <haiyangz@microsoft.com>; olaf@aepfle.de;
> jasowang@redhat.com; linux-kernel@vger.kernel.org;
> bjorn.helgaas@gmail.com; apw@canonical.com; devel@linuxdriverproject.org;
> leann.ogasawara@canonical.com
> Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> serial numbers
> 
> On Fri, Dec 09, 2016 at 04:21:48PM -0800, Stephen Hemminger wrote:
> > On Fri, 9 Dec 2016 22:35:05 +0000
> > Haiyang Zhang <haiyangz@microsoft.com> wrote:
> >
> > > > > >
> > > > > > Emulated NIC is already excluded in start of netvc notifier
> handler.
> > > > > >
> > > > > > static int netvsc_netdev_event(struct notifier_block *this,
> > > > > > 			       unsigned long event, void *ptr)
> > > > > > {
> > > > > > 	struct net_device *event_dev =
> netdev_notifier_info_to_dev(ptr);
> > > > > >
> > > > > > 	/* Skip our own events */
> > > > > > 	if (event_dev->netdev_ops == &device_ops)
> > > > > > 		return NOTIFY_DONE;
> > > > > >
> > > > >
> > > > > Emulated device is not based on netvsc. It's the native Linux
> > > > (dec100M?)
> > > > > Driver. So this line doesn't exclude it. And how about other NIC
> type
> > > > > may be added in the future?
> > > >
> > > > Sorry, forgot about that haven't used emulated device in years.
> > > > The emulated device should appear to be on a PCI bus, but the
> serial
> > > > would not match??
> > >
> > > It's not a vmbus device, not a hv_pci device either. Hv_PCI is a
> subset
> > > of vmbus devices. So emulated NIC won't have hv_pci serial number.
> > >
> > > In my patch, the following code ensure, we only try to get serial
> number
> > > after confirming it's vmbus and hv_pci device:
> > >
> > > +               if (!dev_is_vmbus(dev))
> > > +                       continue;
> > > +
> > > +               hdev = device_to_hv_device(dev);
> > > +               if (hdev->device_id != HV_PCIE)
> > > +                       continue;
> >
> > Ok, the walk back up the device tree is logically ok, but I don't
> > know enough about PCI device tree to be assured that it is safe.
> > Also, you could short circuit away most of the unwanted devices
> > by making sure the vf_netdev->dev.parent is a PCI device.
> 
> Ugh, this seems really really messy.  Can't we just have the
> netdev_event interface pass back a pointer to something that we "know"
> what it is?  This walking the device tree is a mess, and not good.
> 
> I'd even argue that dev_is_pci() needs to be removed from the tree too,
> as it shouldn't be needed either.  We did a lot of work on the driver
> model to prevent the need for having to declare the "type" of 'struct
> device' at all, and by doing this type of thing it goes against the
> basic design of the model.
> 
> Yes, it makes things a bit "tougher" in places, but you don't do crazy
> things like walk device trees to try to find random devices and then
> think it's safe to actually use them :(
> 

We register a notifier_block with:
	register_netdevice_notifier(struct notifier_block *nb)

The "struct notifier_block" basically contains a callback function:
struct notifier_block {
        notifier_fn_t notifier_call;
        struct notifier_block __rcu *next;
        int priority;
};

It doesn't specify which device we want, so all net devices can trigger
this event. Seems we can't have this notifier return VF device only.

Thanks,
- Haiyang

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers
  2016-12-14 23:18                               ` Haiyang Zhang
@ 2016-12-14 23:27                                 ` Greg KH
  2016-12-14 23:51                                   ` Stephen Hemminger
  0 siblings, 1 reply; 29+ messages in thread
From: Greg KH @ 2016-12-14 23:27 UTC (permalink / raw)
  To: Haiyang Zhang
  Cc: Stephen Hemminger, olaf, jasowang, linux-kernel, bjorn.helgaas,
	apw, devel, leann.ogasawara

On Wed, Dec 14, 2016 at 11:18:59PM +0000, Haiyang Zhang wrote:
> 
> 
> > -----Original Message-----
> > From: Greg KH [mailto:gregkh@linuxfoundation.org]
> > Sent: Saturday, December 10, 2016 7:21 AM
> > To: Stephen Hemminger <stephen@networkplumber.org>
> > Cc: Haiyang Zhang <haiyangz@microsoft.com>; olaf@aepfle.de;
> > jasowang@redhat.com; linux-kernel@vger.kernel.org;
> > bjorn.helgaas@gmail.com; apw@canonical.com; devel@linuxdriverproject.org;
> > leann.ogasawara@canonical.com
> > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> > serial numbers
> > 
> > On Fri, Dec 09, 2016 at 04:21:48PM -0800, Stephen Hemminger wrote:
> > > On Fri, 9 Dec 2016 22:35:05 +0000
> > > Haiyang Zhang <haiyangz@microsoft.com> wrote:
> > >
> > > > > > >
> > > > > > > Emulated NIC is already excluded in start of netvc notifier
> > handler.
> > > > > > >
> > > > > > > static int netvsc_netdev_event(struct notifier_block *this,
> > > > > > > 			       unsigned long event, void *ptr)
> > > > > > > {
> > > > > > > 	struct net_device *event_dev =
> > netdev_notifier_info_to_dev(ptr);
> > > > > > >
> > > > > > > 	/* Skip our own events */
> > > > > > > 	if (event_dev->netdev_ops == &device_ops)
> > > > > > > 		return NOTIFY_DONE;
> > > > > > >
> > > > > >
> > > > > > Emulated device is not based on netvsc. It's the native Linux
> > > > > (dec100M?)
> > > > > > Driver. So this line doesn't exclude it. And how about other NIC
> > type
> > > > > > may be added in the future?
> > > > >
> > > > > Sorry, forgot about that haven't used emulated device in years.
> > > > > The emulated device should appear to be on a PCI bus, but the
> > serial
> > > > > would not match??
> > > >
> > > > It's not a vmbus device, not a hv_pci device either. Hv_PCI is a
> > subset
> > > > of vmbus devices. So emulated NIC won't have hv_pci serial number.
> > > >
> > > > In my patch, the following code ensure, we only try to get serial
> > number
> > > > after confirming it's vmbus and hv_pci device:
> > > >
> > > > +               if (!dev_is_vmbus(dev))
> > > > +                       continue;
> > > > +
> > > > +               hdev = device_to_hv_device(dev);
> > > > +               if (hdev->device_id != HV_PCIE)
> > > > +                       continue;
> > >
> > > Ok, the walk back up the device tree is logically ok, but I don't
> > > know enough about PCI device tree to be assured that it is safe.
> > > Also, you could short circuit away most of the unwanted devices
> > > by making sure the vf_netdev->dev.parent is a PCI device.
> > 
> > Ugh, this seems really really messy.  Can't we just have the
> > netdev_event interface pass back a pointer to something that we "know"
> > what it is?  This walking the device tree is a mess, and not good.
> > 
> > I'd even argue that dev_is_pci() needs to be removed from the tree too,
> > as it shouldn't be needed either.  We did a lot of work on the driver
> > model to prevent the need for having to declare the "type" of 'struct
> > device' at all, and by doing this type of thing it goes against the
> > basic design of the model.
> > 
> > Yes, it makes things a bit "tougher" in places, but you don't do crazy
> > things like walk device trees to try to find random devices and then
> > think it's safe to actually use them :(
> > 
> 
> We register a notifier_block with:
> 	register_netdevice_notifier(struct notifier_block *nb)
> 
> The "struct notifier_block" basically contains a callback function:
> struct notifier_block {
>         notifier_fn_t notifier_call;
>         struct notifier_block __rcu *next;
>         int priority;
> };
> 
> It doesn't specify which device we want, so all net devices can trigger
> this event. Seems we can't have this notifier return VF device only.

Ok, I dug in the kernel and it looks like people check the netdev_ops
structure to see if it matches up with their function pointers to "know"
if this is their device or not.  Why not do that here?  Or compare the
"string" of the driver name?  Or any other such trick that the drivers
that call register_netdevice_notifier do?

All of which are more sane than walking the device tree...

And why am I having to do network driver development, ick ick ick :)

Come on, 'git grep' is your friend.  Even better yet, use a good tool
like 'vgrep' which makes git grep work really really well.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers
  2016-12-14 23:27                                 ` Greg KH
@ 2016-12-14 23:51                                   ` Stephen Hemminger
  2016-12-16  1:11                                     ` KY Srinivasan
  2016-12-16 16:45                                     ` Greg KH
  0 siblings, 2 replies; 29+ messages in thread
From: Stephen Hemminger @ 2016-12-14 23:51 UTC (permalink / raw)
  To: Greg KH
  Cc: Haiyang Zhang, olaf, jasowang, linux-kernel, bjorn.helgaas, apw,
	devel, leann.ogasawara

On Wed, 14 Dec 2016 15:27:58 -0800
Greg KH <gregkh@linuxfoundation.org> wrote:

> On Wed, Dec 14, 2016 at 11:18:59PM +0000, Haiyang Zhang wrote:
> > 
> >   
> > > -----Original Message-----
> > > From: Greg KH [mailto:gregkh@linuxfoundation.org]
> > > Sent: Saturday, December 10, 2016 7:21 AM
> > > To: Stephen Hemminger <stephen@networkplumber.org>
> > > Cc: Haiyang Zhang <haiyangz@microsoft.com>; olaf@aepfle.de;
> > > jasowang@redhat.com; linux-kernel@vger.kernel.org;
> > > bjorn.helgaas@gmail.com; apw@canonical.com; devel@linuxdriverproject.org;
> > > leann.ogasawara@canonical.com
> > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> > > serial numbers
> > > 
> > > On Fri, Dec 09, 2016 at 04:21:48PM -0800, Stephen Hemminger wrote:  
> > > > On Fri, 9 Dec 2016 22:35:05 +0000
> > > > Haiyang Zhang <haiyangz@microsoft.com> wrote:
> > > >  
> > > > > > > >
> > > > > > > > Emulated NIC is already excluded in start of netvc notifier  
> > > handler.  
> > > > > > > >
> > > > > > > > static int netvsc_netdev_event(struct notifier_block *this,
> > > > > > > > 			       unsigned long event, void *ptr)
> > > > > > > > {
> > > > > > > > 	struct net_device *event_dev =  
> > > netdev_notifier_info_to_dev(ptr);  
> > > > > > > >
> > > > > > > > 	/* Skip our own events */
> > > > > > > > 	if (event_dev->netdev_ops == &device_ops)
> > > > > > > > 		return NOTIFY_DONE;
> > > > > > > >  
> > > > > > >
> > > > > > > Emulated device is not based on netvsc. It's the native Linux  
> > > > > > (dec100M?)  
> > > > > > > Driver. So this line doesn't exclude it. And how about other NIC  
> > > type  
> > > > > > > may be added in the future?  
> > > > > >
> > > > > > Sorry, forgot about that haven't used emulated device in years.
> > > > > > The emulated device should appear to be on a PCI bus, but the  
> > > serial  
> > > > > > would not match??  
> > > > >
> > > > > It's not a vmbus device, not a hv_pci device either. Hv_PCI is a  
> > > subset  
> > > > > of vmbus devices. So emulated NIC won't have hv_pci serial number.
> > > > >
> > > > > In my patch, the following code ensure, we only try to get serial  
> > > number  
> > > > > after confirming it's vmbus and hv_pci device:
> > > > >
> > > > > +               if (!dev_is_vmbus(dev))
> > > > > +                       continue;
> > > > > +
> > > > > +               hdev = device_to_hv_device(dev);
> > > > > +               if (hdev->device_id != HV_PCIE)
> > > > > +                       continue;  
> > > >
> > > > Ok, the walk back up the device tree is logically ok, but I don't
> > > > know enough about PCI device tree to be assured that it is safe.
> > > > Also, you could short circuit away most of the unwanted devices
> > > > by making sure the vf_netdev->dev.parent is a PCI device.  
> > > 
> > > Ugh, this seems really really messy.  Can't we just have the
> > > netdev_event interface pass back a pointer to something that we "know"
> > > what it is?  This walking the device tree is a mess, and not good.
> > > 
> > > I'd even argue that dev_is_pci() needs to be removed from the tree too,
> > > as it shouldn't be needed either.  We did a lot of work on the driver
> > > model to prevent the need for having to declare the "type" of 'struct
> > > device' at all, and by doing this type of thing it goes against the
> > > basic design of the model.
> > > 
> > > Yes, it makes things a bit "tougher" in places, but you don't do crazy
> > > things like walk device trees to try to find random devices and then
> > > think it's safe to actually use them :(
> > >   
> > 
> > We register a notifier_block with:
> > 	register_netdevice_notifier(struct notifier_block *nb)
> > 
> > The "struct notifier_block" basically contains a callback function:
> > struct notifier_block {
> >         notifier_fn_t notifier_call;
> >         struct notifier_block __rcu *next;
> >         int priority;
> > };
> > 
> > It doesn't specify which device we want, so all net devices can trigger
> > this event. Seems we can't have this notifier return VF device only.  
> 
> Ok, I dug in the kernel and it looks like people check the netdev_ops
> structure to see if it matches up with their function pointers to "know"
> if this is their device or not.  Why not do that here?  Or compare the
> "string" of the driver name?  Or any other such trick that the drivers
> that call register_netdevice_notifier do?
> 
> All of which are more sane than walking the device tree...
> 
> And why am I having to do network driver development, ick ick ick :)
> 
> Come on, 'git grep' is your friend.  Even better yet, use a good tool
> like 'vgrep' which makes git grep work really really well.

Normally, that would work but in this case we have one driver (netvsc)
which is managing another driver which is unaware of Hyper-V or netvsc
drivers existence.  The callback is happening in netvsc driver and it
needs to say "hey I know that SR-IOV device, it is associated with my
network device". This problem is how to know that N is associated with
V? The V device has to be a network device, that is easy. But then it
also has to be a PCI device, not to bad. But then the netvsc code
is matching based on hyper-V only PCI bus metadata (the serial #).

The Microsoft developers made the rational decision not to go modifying
all the possible SR-IOV network devices from Intel and Mellanox to add
the functionality there. That would have been much worse.

Maybe, rather than trying to do the management in the kernel it
could have been done better in user space. Unfortunately, this would
only move the problem.  The PCI-hyperv host driver could expose serial
value through sysfs (with some pain). But the problem would be how
to make a new API to  join the two V and N device.  Doing a private
ioctl is worse than the notifier.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* RE: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers
  2016-12-14 23:51                                   ` Stephen Hemminger
@ 2016-12-16  1:11                                     ` KY Srinivasan
  2016-12-16 15:20                                       ` Haiyang Zhang
  2016-12-16 16:45                                     ` Greg KH
  1 sibling, 1 reply; 29+ messages in thread
From: KY Srinivasan @ 2016-12-16  1:11 UTC (permalink / raw)
  To: Stephen Hemminger, Greg KH
  Cc: olaf, jasowang, linux-kernel, bjorn.helgaas, apw, devel,
	leann.ogasawara, Haiyang Zhang



> -----Original Message-----
> From: devel [mailto:driverdev-devel-bounces@linuxdriverproject.org] On
> Behalf Of Stephen Hemminger
> Sent: Wednesday, December 14, 2016 3:52 PM
> To: Greg KH <gregkh@linuxfoundation.org>
> Cc: olaf@aepfle.de; jasowang@redhat.com; linux-kernel@vger.kernel.org;
> bjorn.helgaas@gmail.com; apw@canonical.com;
> devel@linuxdriverproject.org; leann.ogasawara@canonical.com; Haiyang
> Zhang <haiyangz@microsoft.com>
> Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial
> numbers
> 
> On Wed, 14 Dec 2016 15:27:58 -0800
> Greg KH <gregkh@linuxfoundation.org> wrote:
> 
> > On Wed, Dec 14, 2016 at 11:18:59PM +0000, Haiyang Zhang wrote:
> > >
> > >
> > > > -----Original Message-----
> > > > From: Greg KH [mailto:gregkh@linuxfoundation.org]
> > > > Sent: Saturday, December 10, 2016 7:21 AM
> > > > To: Stephen Hemminger <stephen@networkplumber.org>
> > > > Cc: Haiyang Zhang <haiyangz@microsoft.com>; olaf@aepfle.de;
> > > > jasowang@redhat.com; linux-kernel@vger.kernel.org;
> > > > bjorn.helgaas@gmail.com; apw@canonical.com;
> devel@linuxdriverproject.org;
> > > > leann.ogasawara@canonical.com
> > > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> > > > serial numbers
> > > >
> > > > On Fri, Dec 09, 2016 at 04:21:48PM -0800, Stephen Hemminger wrote:
> > > > > On Fri, 9 Dec 2016 22:35:05 +0000
> > > > > Haiyang Zhang <haiyangz@microsoft.com> wrote:
> > > > >
> > > > > > > > >
> > > > > > > > > Emulated NIC is already excluded in start of netvc notifier
> > > > handler.
> > > > > > > > >
> > > > > > > > > static int netvsc_netdev_event(struct notifier_block *this,
> > > > > > > > > 			       unsigned long event, void *ptr)
> > > > > > > > > {
> > > > > > > > > 	struct net_device *event_dev =
> > > > netdev_notifier_info_to_dev(ptr);
> > > > > > > > >
> > > > > > > > > 	/* Skip our own events */
> > > > > > > > > 	if (event_dev->netdev_ops == &device_ops)
> > > > > > > > > 		return NOTIFY_DONE;
> > > > > > > > >
> > > > > > > >
> > > > > > > > Emulated device is not based on netvsc. It's the native Linux
> > > > > > > (dec100M?)
> > > > > > > > Driver. So this line doesn't exclude it. And how about other NIC
> > > > type
> > > > > > > > may be added in the future?
> > > > > > >
> > > > > > > Sorry, forgot about that haven't used emulated device in years.
> > > > > > > The emulated device should appear to be on a PCI bus, but the
> > > > serial
> > > > > > > would not match??
> > > > > >
> > > > > > It's not a vmbus device, not a hv_pci device either. Hv_PCI is a
> > > > subset
> > > > > > of vmbus devices. So emulated NIC won't have hv_pci serial
> number.
> > > > > >
> > > > > > In my patch, the following code ensure, we only try to get serial
> > > > number
> > > > > > after confirming it's vmbus and hv_pci device:
> > > > > >
> > > > > > +               if (!dev_is_vmbus(dev))
> > > > > > +                       continue;
> > > > > > +
> > > > > > +               hdev = device_to_hv_device(dev);
> > > > > > +               if (hdev->device_id != HV_PCIE)
> > > > > > +                       continue;
> > > > >
> > > > > Ok, the walk back up the device tree is logically ok, but I don't
> > > > > know enough about PCI device tree to be assured that it is safe.
> > > > > Also, you could short circuit away most of the unwanted devices
> > > > > by making sure the vf_netdev->dev.parent is a PCI device.
> > > >
> > > > Ugh, this seems really really messy.  Can't we just have the
> > > > netdev_event interface pass back a pointer to something that we
> "know"
> > > > what it is?  This walking the device tree is a mess, and not good.
> > > >
> > > > I'd even argue that dev_is_pci() needs to be removed from the tree
> too,
> > > > as it shouldn't be needed either.  We did a lot of work on the driver
> > > > model to prevent the need for having to declare the "type" of 'struct
> > > > device' at all, and by doing this type of thing it goes against the
> > > > basic design of the model.
> > > >
> > > > Yes, it makes things a bit "tougher" in places, but you don't do crazy
> > > > things like walk device trees to try to find random devices and then
> > > > think it's safe to actually use them :(
> > > >
> > >
> > > We register a notifier_block with:
> > > 	register_netdevice_notifier(struct notifier_block *nb)
> > >
> > > The "struct notifier_block" basically contains a callback function:
> > > struct notifier_block {
> > >         notifier_fn_t notifier_call;
> > >         struct notifier_block __rcu *next;
> > >         int priority;
> > > };
> > >
> > > It doesn't specify which device we want, so all net devices can trigger
> > > this event. Seems we can't have this notifier return VF device only.
> >
> > Ok, I dug in the kernel and it looks like people check the netdev_ops
> > structure to see if it matches up with their function pointers to "know"
> > if this is their device or not.  Why not do that here?  Or compare the
> > "string" of the driver name?  Or any other such trick that the drivers
> > that call register_netdevice_notifier do?
> >
> > All of which are more sane than walking the device tree...
> >
> > And why am I having to do network driver development, ick ick ick :)
> >
> > Come on, 'git grep' is your friend.  Even better yet, use a good tool
> > like 'vgrep' which makes git grep work really really well.
> 
> Normally, that would work but in this case we have one driver (netvsc)
> which is managing another driver which is unaware of Hyper-V or netvsc
> drivers existence.  The callback is happening in netvsc driver and it
> needs to say "hey I know that SR-IOV device, it is associated with my
> network device". This problem is how to know that N is associated with
> V? The V device has to be a network device, that is easy. But then it
> also has to be a PCI device, not to bad. But then the netvsc code
> is matching based on hyper-V only PCI bus metadata (the serial #).
> 
> The Microsoft developers made the rational decision not to go modifying
> all the possible SR-IOV network devices from Intel and Mellanox to add
> the functionality there. That would have been much worse.
> 
> Maybe, rather than trying to do the management in the kernel it
> could have been done better in user space. Unfortunately, this would
> only move the problem.  The PCI-hyperv host driver could expose serial
> value through sysfs (with some pain). But the problem would be how
> to make a new API to  join the two V and N device.  Doing a private
> ioctl is worse than the notifier.

All this has been discussed earlier in the thread. I think I have a solution
to the problem:
The only PCI (non-VF) NIC that may be present in the VM is the emulated NIC and
we know exactly the device ID and vendor ID of this NIC. Furthermore,
as a platform we are not going to be emulating additional NICs. So,
if the PCI NIC is not the emulated NIC, it must be a VF and we can extract the
serial number.

Regards,

K. Y
> _______________________________________________
> devel mailing list
> devel@linuxdriverproject.org
> https://na01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fdriverd
> ev.linuxdriverproject.org%2Fmailman%2Flistinfo%2Fdriverdev-
> devel&data=02%7C01%7Ckys%40microsoft.com%7C77c2c8a38fe2431945e408
> d4247c2c7d%7C72f988bf86f141af91ab2d7cd011db47%7C1%7C0%7C63617356
> 3122444667&sdata=u5C0v7ixzRu%2Btw51tTzHNpbsNqCDQTpigzUtwahIPvE%
> 3D&reserved=0

^ permalink raw reply	[flat|nested] 29+ messages in thread

* RE: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers
  2016-12-16  1:11                                     ` KY Srinivasan
@ 2016-12-16 15:20                                       ` Haiyang Zhang
  2016-12-16 18:39                                         ` KY Srinivasan
  0 siblings, 1 reply; 29+ messages in thread
From: Haiyang Zhang @ 2016-12-16 15:20 UTC (permalink / raw)
  To: KY Srinivasan, Stephen Hemminger, Greg KH
  Cc: olaf, jasowang, linux-kernel, bjorn.helgaas, apw, devel, leann.ogasawara



> -----Original Message-----
> From: KY Srinivasan
> Sent: Thursday, December 15, 2016 8:11 PM
> To: Stephen Hemminger <stephen@networkplumber.org>; Greg KH
> <gregkh@linuxfoundation.org>
> Cc: olaf@aepfle.de; jasowang@redhat.com; linux-kernel@vger.kernel.org;
> bjorn.helgaas@gmail.com; apw@canonical.com; devel@linuxdriverproject.org;
> leann.ogasawara@canonical.com; Haiyang Zhang <haiyangz@microsoft.com>
> Subject: RE: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> serial numbers
> 
> 
> 
> > -----Original Message-----
> > From: devel [mailto:driverdev-devel-bounces@linuxdriverproject.org] On
> > Behalf Of Stephen Hemminger
> > Sent: Wednesday, December 14, 2016 3:52 PM
> > To: Greg KH <gregkh@linuxfoundation.org>
> > Cc: olaf@aepfle.de; jasowang@redhat.com; linux-kernel@vger.kernel.org;
> > bjorn.helgaas@gmail.com; apw@canonical.com;
> > devel@linuxdriverproject.org; leann.ogasawara@canonical.com; Haiyang
> > Zhang <haiyangz@microsoft.com>
> > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> serial
> > numbers
> >
> > Normally, that would work but in this case we have one driver (netvsc)
> > which is managing another driver which is unaware of Hyper-V or netvsc
> > drivers existence.  The callback is happening in netvsc driver and it
> > needs to say "hey I know that SR-IOV device, it is associated with my
> > network device". This problem is how to know that N is associated with
> > V? The V device has to be a network device, that is easy. But then it
> > also has to be a PCI device, not to bad. But then the netvsc code
> > is matching based on hyper-V only PCI bus metadata (the serial #).
> >
> > The Microsoft developers made the rational decision not to go
> modifying
> > all the possible SR-IOV network devices from Intel and Mellanox to add
> > the functionality there. That would have been much worse.
> >
> > Maybe, rather than trying to do the management in the kernel it
> > could have been done better in user space. Unfortunately, this would
> > only move the problem.  The PCI-hyperv host driver could expose serial
> > value through sysfs (with some pain). But the problem would be how
> > to make a new API to  join the two V and N device.  Doing a private
> > ioctl is worse than the notifier.
> 
> All this has been discussed earlier in the thread. I think I have a
> solution
> to the problem:
> The only PCI (non-VF) NIC that may be present in the VM is the emulated
> NIC and
> we know exactly the device ID and vendor ID of this NIC. Furthermore,
> as a platform we are not going to be emulating additional NICs. So,
> if the PCI NIC is not the emulated NIC, it must be a VF and we can
> extract the
> serial number.

How about direct pass-through NIC devices. Do they have vPCI serial number?
And, the numbers should be different from VF NIC?

Thanks,
- Haiyang

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers
  2016-12-14 23:51                                   ` Stephen Hemminger
  2016-12-16  1:11                                     ` KY Srinivasan
@ 2016-12-16 16:45                                     ` Greg KH
  1 sibling, 0 replies; 29+ messages in thread
From: Greg KH @ 2016-12-16 16:45 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Haiyang Zhang, olaf, jasowang, linux-kernel, bjorn.helgaas, apw,
	devel, leann.ogasawara

On Wed, Dec 14, 2016 at 03:51:34PM -0800, Stephen Hemminger wrote:
> On Wed, 14 Dec 2016 15:27:58 -0800
> Greg KH <gregkh@linuxfoundation.org> wrote:
> 
> > On Wed, Dec 14, 2016 at 11:18:59PM +0000, Haiyang Zhang wrote:
> > > 
> > >   
> > > > -----Original Message-----
> > > > From: Greg KH [mailto:gregkh@linuxfoundation.org]
> > > > Sent: Saturday, December 10, 2016 7:21 AM
> > > > To: Stephen Hemminger <stephen@networkplumber.org>
> > > > Cc: Haiyang Zhang <haiyangz@microsoft.com>; olaf@aepfle.de;
> > > > jasowang@redhat.com; linux-kernel@vger.kernel.org;
> > > > bjorn.helgaas@gmail.com; apw@canonical.com; devel@linuxdriverproject.org;
> > > > leann.ogasawara@canonical.com
> > > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> > > > serial numbers
> > > > 
> > > > On Fri, Dec 09, 2016 at 04:21:48PM -0800, Stephen Hemminger wrote:  
> > > > > On Fri, 9 Dec 2016 22:35:05 +0000
> > > > > Haiyang Zhang <haiyangz@microsoft.com> wrote:
> > > > >  
> > > > > > > > >
> > > > > > > > > Emulated NIC is already excluded in start of netvc notifier  
> > > > handler.  
> > > > > > > > >
> > > > > > > > > static int netvsc_netdev_event(struct notifier_block *this,
> > > > > > > > > 			       unsigned long event, void *ptr)
> > > > > > > > > {
> > > > > > > > > 	struct net_device *event_dev =  
> > > > netdev_notifier_info_to_dev(ptr);  
> > > > > > > > >
> > > > > > > > > 	/* Skip our own events */
> > > > > > > > > 	if (event_dev->netdev_ops == &device_ops)
> > > > > > > > > 		return NOTIFY_DONE;
> > > > > > > > >  
> > > > > > > >
> > > > > > > > Emulated device is not based on netvsc. It's the native Linux  
> > > > > > > (dec100M?)  
> > > > > > > > Driver. So this line doesn't exclude it. And how about other NIC  
> > > > type  
> > > > > > > > may be added in the future?  
> > > > > > >
> > > > > > > Sorry, forgot about that haven't used emulated device in years.
> > > > > > > The emulated device should appear to be on a PCI bus, but the  
> > > > serial  
> > > > > > > would not match??  
> > > > > >
> > > > > > It's not a vmbus device, not a hv_pci device either. Hv_PCI is a  
> > > > subset  
> > > > > > of vmbus devices. So emulated NIC won't have hv_pci serial number.
> > > > > >
> > > > > > In my patch, the following code ensure, we only try to get serial  
> > > > number  
> > > > > > after confirming it's vmbus and hv_pci device:
> > > > > >
> > > > > > +               if (!dev_is_vmbus(dev))
> > > > > > +                       continue;
> > > > > > +
> > > > > > +               hdev = device_to_hv_device(dev);
> > > > > > +               if (hdev->device_id != HV_PCIE)
> > > > > > +                       continue;  
> > > > >
> > > > > Ok, the walk back up the device tree is logically ok, but I don't
> > > > > know enough about PCI device tree to be assured that it is safe.
> > > > > Also, you could short circuit away most of the unwanted devices
> > > > > by making sure the vf_netdev->dev.parent is a PCI device.  
> > > > 
> > > > Ugh, this seems really really messy.  Can't we just have the
> > > > netdev_event interface pass back a pointer to something that we "know"
> > > > what it is?  This walking the device tree is a mess, and not good.
> > > > 
> > > > I'd even argue that dev_is_pci() needs to be removed from the tree too,
> > > > as it shouldn't be needed either.  We did a lot of work on the driver
> > > > model to prevent the need for having to declare the "type" of 'struct
> > > > device' at all, and by doing this type of thing it goes against the
> > > > basic design of the model.
> > > > 
> > > > Yes, it makes things a bit "tougher" in places, but you don't do crazy
> > > > things like walk device trees to try to find random devices and then
> > > > think it's safe to actually use them :(
> > > >   
> > > 
> > > We register a notifier_block with:
> > > 	register_netdevice_notifier(struct notifier_block *nb)
> > > 
> > > The "struct notifier_block" basically contains a callback function:
> > > struct notifier_block {
> > >         notifier_fn_t notifier_call;
> > >         struct notifier_block __rcu *next;
> > >         int priority;
> > > };
> > > 
> > > It doesn't specify which device we want, so all net devices can trigger
> > > this event. Seems we can't have this notifier return VF device only.  
> > 
> > Ok, I dug in the kernel and it looks like people check the netdev_ops
> > structure to see if it matches up with their function pointers to "know"
> > if this is their device or not.  Why not do that here?  Or compare the
> > "string" of the driver name?  Or any other such trick that the drivers
> > that call register_netdevice_notifier do?
> > 
> > All of which are more sane than walking the device tree...
> > 
> > And why am I having to do network driver development, ick ick ick :)
> > 
> > Come on, 'git grep' is your friend.  Even better yet, use a good tool
> > like 'vgrep' which makes git grep work really really well.
> 
> Normally, that would work but in this case we have one driver (netvsc)
> which is managing another driver which is unaware of Hyper-V or netvsc
> drivers existence.

That's the root problem here :)

> The callback is happening in netvsc driver and it
> needs to say "hey I know that SR-IOV device, it is associated with my
> network device". This problem is how to know that N is associated with
> V? The V device has to be a network device, that is easy. But then it
> also has to be a PCI device, not to bad.

I'd argue that it is just as bad, as it shouldn't be poking around with
a random 'struct device' like that, but I'll let it slide...

> But then the netvsc code
> is matching based on hyper-V only PCI bus metadata (the serial #).

Which is a mess.

Again, walking the device tree like this is racy, broken, and shouldn't
be done anywhere.  You are crossing bus boundries and lots of bad things
could happen if a device was removed at the same time.  Just don't do
that.

> The Microsoft developers made the rational decision not to go modifying
> all the possible SR-IOV network devices from Intel and Mellanox to add
> the functionality there. That would have been much worse.

Why is that worse?  How many lines of code would that be?

> Maybe, rather than trying to do the management in the kernel it
> could have been done better in user space. Unfortunately, this would
> only move the problem.  The PCI-hyperv host driver could expose serial
> value through sysfs (with some pain). But the problem would be how
> to make a new API to  join the two V and N device.  Doing a private
> ioctl is worse than the notifier.

I still don't really understand the relationship between V and N, but it
feels like it is very tenous and sketchy and you should work to make it
more explicit.  We have the source to these drivers, do it correctly,
or, do something in the network bus layer to be able to properly
represent this heirachy so that it is "obvious" what is going on.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 29+ messages in thread

* RE: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers
  2016-12-16 15:20                                       ` Haiyang Zhang
@ 2016-12-16 18:39                                         ` KY Srinivasan
  0 siblings, 0 replies; 29+ messages in thread
From: KY Srinivasan @ 2016-12-16 18:39 UTC (permalink / raw)
  To: Haiyang Zhang, Stephen Hemminger, Greg KH
  Cc: olaf, jasowang, linux-kernel, bjorn.helgaas, apw, devel, leann.ogasawara



> -----Original Message-----
> From: Haiyang Zhang
> Sent: Friday, December 16, 2016 7:21 AM
> To: KY Srinivasan <kys@microsoft.com>; Stephen Hemminger
> <stephen@networkplumber.org>; Greg KH <gregkh@linuxfoundation.org>
> Cc: olaf@aepfle.de; jasowang@redhat.com; linux-kernel@vger.kernel.org;
> bjorn.helgaas@gmail.com; apw@canonical.com;
> devel@linuxdriverproject.org; leann.ogasawara@canonical.com
> Subject: RE: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial
> numbers
> 
> 
> 
> > -----Original Message-----
> > From: KY Srinivasan
> > Sent: Thursday, December 15, 2016 8:11 PM
> > To: Stephen Hemminger <stephen@networkplumber.org>; Greg KH
> > <gregkh@linuxfoundation.org>
> > Cc: olaf@aepfle.de; jasowang@redhat.com; linux-kernel@vger.kernel.org;
> > bjorn.helgaas@gmail.com; apw@canonical.com;
> devel@linuxdriverproject.org;
> > leann.ogasawara@canonical.com; Haiyang Zhang
> <haiyangz@microsoft.com>
> > Subject: RE: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> > serial numbers
> >
> >
> >
> > > -----Original Message-----
> > > From: devel [mailto:driverdev-devel-bounces@linuxdriverproject.org]
> On
> > > Behalf Of Stephen Hemminger
> > > Sent: Wednesday, December 14, 2016 3:52 PM
> > > To: Greg KH <gregkh@linuxfoundation.org>
> > > Cc: olaf@aepfle.de; jasowang@redhat.com; linux-
> kernel@vger.kernel.org;
> > > bjorn.helgaas@gmail.com; apw@canonical.com;
> > > devel@linuxdriverproject.org; leann.ogasawara@canonical.com; Haiyang
> > > Zhang <haiyangz@microsoft.com>
> > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> > serial
> > > numbers
> > >
> > > Normally, that would work but in this case we have one driver (netvsc)
> > > which is managing another driver which is unaware of Hyper-V or netvsc
> > > drivers existence.  The callback is happening in netvsc driver and it
> > > needs to say "hey I know that SR-IOV device, it is associated with my
> > > network device". This problem is how to know that N is associated with
> > > V? The V device has to be a network device, that is easy. But then it
> > > also has to be a PCI device, not to bad. But then the netvsc code
> > > is matching based on hyper-V only PCI bus metadata (the serial #).
> > >
> > > The Microsoft developers made the rational decision not to go
> > modifying
> > > all the possible SR-IOV network devices from Intel and Mellanox to add
> > > the functionality there. That would have been much worse.
> > >
> > > Maybe, rather than trying to do the management in the kernel it
> > > could have been done better in user space. Unfortunately, this would
> > > only move the problem.  The PCI-hyperv host driver could expose serial
> > > value through sysfs (with some pain). But the problem would be how
> > > to make a new API to  join the two V and N device.  Doing a private
> > > ioctl is worse than the notifier.
> >
> > All this has been discussed earlier in the thread. I think I have a
> > solution
> > to the problem:
> > The only PCI (non-VF) NIC that may be present in the VM is the emulated
> > NIC and
> > we know exactly the device ID and vendor ID of this NIC. Furthermore,
> > as a platform we are not going to be emulating additional NICs. So,
> > if the PCI NIC is not the emulated NIC, it must be a VF and we can
> > extract the
> > serial number.
> 
> How about direct pass-through NIC devices. Do they have vPCI serial
> number?
> And, the numbers should be different from VF NIC?

This may not have a valid serial number; but is still a descendent of vmbus.

K. Y  
> 
> Thanks,
> - Haiyang

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2016-12-16 18:57 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-12-08  8:33 [PATCH 0/3] Drivers: hv: Implement VF association based on serial number kys
2016-12-08  8:33 ` [PATCH 1/3] hyperv: Move hv_pci_dev and related structs to hyperv.h kys
2016-12-08  8:33   ` [PATCH 2/3] hyperv: Add a function to detect if the device is a vmbus dev kys
2016-12-08  8:33   ` [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers kys
2016-12-08 15:56     ` Greg KH
2016-12-09  0:05       ` KY Srinivasan
2016-12-09  7:31         ` Greg KH
2016-12-09 18:20           ` Stephen Hemminger
2016-12-09 20:09             ` Haiyang Zhang
2016-12-09 20:29               ` Stephen Hemminger
2016-12-09 21:31                 ` Haiyang Zhang
2016-12-09 21:45                   ` Stephen Hemminger
2016-12-09 21:53                     ` Haiyang Zhang
2016-12-09 22:05                       ` Stephen Hemminger
2016-12-09 22:35                         ` Haiyang Zhang
2016-12-10  0:21                           ` Stephen Hemminger
2016-12-10 12:20                             ` Greg KH
2016-12-14 23:18                               ` Haiyang Zhang
2016-12-14 23:27                                 ` Greg KH
2016-12-14 23:51                                   ` Stephen Hemminger
2016-12-16  1:11                                     ` KY Srinivasan
2016-12-16 15:20                                       ` Haiyang Zhang
2016-12-16 18:39                                         ` KY Srinivasan
2016-12-16 16:45                                     ` Greg KH
2016-12-08  9:49   ` [PATCH 1/3] hyperv: Move hv_pci_dev and related structs to hyperv.h kbuild test robot
2016-12-08 15:26     ` KY Srinivasan
2016-12-08 15:56       ` gregkh
2016-12-08 16:54         ` KY Srinivasan
2016-12-08 11:15   ` kbuild test robot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).