All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] hv_netvsc: automatically name slave VF network device
@ 2017-12-19 19:35 Stephen Hemminger
  2017-12-19 20:32 ` Jakub Kicinski
  2017-12-19 22:44 ` Samudrala, Sridhar
  0 siblings, 2 replies; 15+ messages in thread
From: Stephen Hemminger @ 2017-12-19 19:35 UTC (permalink / raw)
  To: netdev; +Cc: Stephen Hemminger

Rename the VF device to ethX_vf based on the ethX as the
synthetic device.  This eliminates the need for delay on setup,
and the PCI (udev based) naming is not reproducible on Hyper-V
anyway. The name of the VF does not matter since all control
operations take place the primary device. It does make the
user experience better to associate the names.

Based on feedback from all.systems.go talk.
The downside is that it requires exporting a symbol from netdev
core which makes it harder to backport.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
 drivers/net/hyperv/hyperv_net.h |  1 -
 drivers/net/hyperv/netvsc_drv.c | 53 +++++++++++------------------------------
 net/core/dev.c                  |  1 +
 3 files changed, 15 insertions(+), 40 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 0db3bd1ea06f..5013189123e6 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -762,7 +762,6 @@ struct net_device_context {
 	/* State to manage the associated VF interface. */
 	struct net_device __rcu *vf_netdev;
 	struct netvsc_vf_pcpu_stats __percpu *vf_stats;
-	struct delayed_work vf_takeover;
 
 	/* 1: allocated, serial number is valid. 0: not allocated */
 	u32 vf_alloc;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index c5584c2d440e..8a0afe9a5064 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -49,7 +49,6 @@
 #define RING_SIZE_MIN		64
 
 #define LINKCHANGE_INT (2 * HZ)
-#define VF_TAKEOVER_INT (HZ / 10)
 
 static unsigned int ring_size __ro_after_init = 128;
 module_param(ring_size, uint, S_IRUGO);
@@ -1760,7 +1759,7 @@ static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb)
 static int netvsc_vf_join(struct net_device *vf_netdev,
 			  struct net_device *ndev)
 {
-	struct net_device_context *ndev_ctx = netdev_priv(ndev);
+	char vf_name[IFNAMSIZ];
 	int ret;
 
 	ret = netdev_rx_handler_register(vf_netdev,
@@ -1783,23 +1782,14 @@ static int netvsc_vf_join(struct net_device *vf_netdev,
 	/* set slave flag before open to prevent IPv6 addrconf */
 	vf_netdev->flags |= IFF_SLAVE;
 
-	schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT);
-
 	call_netdevice_notifiers(NETDEV_JOIN, vf_netdev);
 
-	netdev_info(vf_netdev, "joined to %s\n", ndev->name);
-	return 0;
-
-upper_link_failed:
-	netdev_rx_handler_unregister(vf_netdev);
-rx_handler_failed:
-	return ret;
-}
-
-static void __netvsc_vf_setup(struct net_device *ndev,
-			      struct net_device *vf_netdev)
-{
-	int ret;
+	/* set the name of VF device based on upper device name */
+	snprintf(vf_name, IFNAMSIZ, "%s_vf", ndev->name);
+	ret = dev_change_name(vf_netdev, vf_name);
+	if (ret != 0)
+		netdev_warn(vf_netdev,
+			    "can not rename device: (%d)\n", ret);
 
 	/* Align MTU of VF with master */
 	ret = dev_set_mtu(vf_netdev, ndev->mtu);
@@ -1807,34 +1797,21 @@ static void __netvsc_vf_setup(struct net_device *ndev,
 		netdev_warn(vf_netdev,
 			    "unable to change mtu to %u\n", ndev->mtu);
 
+	/* If upper device is already up, bring up slave */
 	if (netif_running(ndev)) {
 		ret = dev_open(vf_netdev);
 		if (ret)
 			netdev_warn(vf_netdev,
 				    "unable to open: %d\n", ret);
 	}
-}
-
-/* Setup VF as slave of the synthetic device.
- * Runs in workqueue to avoid recursion in netlink callbacks.
- */
-static void netvsc_vf_setup(struct work_struct *w)
-{
-	struct net_device_context *ndev_ctx
-		= container_of(w, struct net_device_context, vf_takeover.work);
-	struct net_device *ndev = hv_get_drvdata(ndev_ctx->device_ctx);
-	struct net_device *vf_netdev;
-
-	if (!rtnl_trylock()) {
-		schedule_delayed_work(&ndev_ctx->vf_takeover, 0);
-		return;
-	}
 
-	vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
-	if (vf_netdev)
-		__netvsc_vf_setup(ndev, vf_netdev);
+	netdev_info(vf_netdev, "joined to %s\n", ndev->name);
+	return 0;
 
-	rtnl_unlock();
+ upper_link_failed:
+	netdev_rx_handler_unregister(vf_netdev);
+ rx_handler_failed:
+	return ret;
 }
 
 static int netvsc_register_vf(struct net_device *vf_netdev)
@@ -1904,7 +1881,6 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
 		return NOTIFY_DONE;
 
 	net_device_ctx = netdev_priv(ndev);
-	cancel_delayed_work_sync(&net_device_ctx->vf_takeover);
 
 	netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
 
@@ -1947,7 +1923,6 @@ static int netvsc_probe(struct hv_device *dev,
 
 	spin_lock_init(&net_device_ctx->lock);
 	INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
-	INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup);
 
 	net_device_ctx->vf_stats
 		= netdev_alloc_pcpu_stats(struct netvsc_vf_pcpu_stats);
diff --git a/net/core/dev.c b/net/core/dev.c
index c7db39926769..f51358a90efa 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1238,6 +1238,7 @@ int dev_change_name(struct net_device *dev, const char *newname)
 
 	return err;
 }
+EXPORT_SYMBOL(dev_change_name);
 
 /**
  *	dev_set_alias - change ifalias of a device
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [RFC] hv_netvsc: automatically name slave VF network device
  2017-12-19 19:35 [RFC] hv_netvsc: automatically name slave VF network device Stephen Hemminger
@ 2017-12-19 20:32 ` Jakub Kicinski
  2017-12-19 20:44   ` Stephen Hemminger
  2017-12-19 22:44 ` Samudrala, Sridhar
  1 sibling, 1 reply; 15+ messages in thread
From: Jakub Kicinski @ 2017-12-19 20:32 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev, Stephen Hemminger

On Tue, 19 Dec 2017 11:35:37 -0800, Stephen Hemminger wrote:
> Rename the VF device to ethX_vf based on the ethX as the
> synthetic device.  This eliminates the need for delay on setup,
> and the PCI (udev based) naming is not reproducible on Hyper-V
> anyway. The name of the VF does not matter since all control
> operations take place the primary device. It does make the
> user experience better to associate the names.
> 
> Based on feedback from all.systems.go talk.
> The downside is that it requires exporting a symbol from netdev
> core which makes it harder to backport.
> 
> Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>

Why do you have to name the devices in the kernel space in the first
place? :/  Why don't upstream the correct change to biosdevname like
hardware vendors do?

Your VF setup is really _not_ special, I don't understand why we are 
OK with ignoring the standard practices.  Real enterprise distroes
are very careful never to break the naming of interfaces and they keep
the naming policy in user space.  Playing tricks in the kernel has every
chance of breaking existing user setups.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC] hv_netvsc: automatically name slave VF network device
  2017-12-19 20:32 ` Jakub Kicinski
@ 2017-12-19 20:44   ` Stephen Hemminger
  2017-12-19 21:18     ` Jakub Kicinski
  0 siblings, 1 reply; 15+ messages in thread
From: Stephen Hemminger @ 2017-12-19 20:44 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: netdev, Stephen Hemminger

On Tue, 19 Dec 2017 12:32:34 -0800
Jakub Kicinski <kubakici@wp.pl> wrote:

> On Tue, 19 Dec 2017 11:35:37 -0800, Stephen Hemminger wrote:
> > Rename the VF device to ethX_vf based on the ethX as the
> > synthetic device.  This eliminates the need for delay on setup,
> > and the PCI (udev based) naming is not reproducible on Hyper-V
> > anyway. The name of the VF does not matter since all control
> > operations take place the primary device. It does make the
> > user experience better to associate the names.
> > 
> > Based on feedback from all.systems.go talk.
> > The downside is that it requires exporting a symbol from netdev
> > core which makes it harder to backport.
> > 
> > Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>  
> 
> Why do you have to name the devices in the kernel space in the first
> place? :/  Why don't upstream the correct change to biosdevname like
> hardware vendors do?

biosdevname is dead, gone and wouldn't work on Azure (it dumpster dives in /dev/mem).
I assume you mean the modern application is udev, and it works but the name is meaningless
because it based of synthetic PCI information. The PCI host adapter is simulated
for pass through devices. Names like enp12s0.

Since every passthrough VF device on Hyper-V/Azure has a matching synthetic
network device with same mac address. It is best to have the relationship
shown in the name.

> 
> Your VF setup is really _not_ special, I don't understand why we are 
> OK with ignoring the standard practices.  Real enterprise distroes
> are very careful never to break the naming of interfaces and they keep
> the naming policy in user space.  Playing tricks in the kernel has every
> chance of breaking existing user setups.

Actually, Systemd folks said "naming policy is in userspace only because
kernel can't get it right". Also there is no uniformity in userspace
there are at least 5 systems trying to do network setup. And most of
them depend on eth0 (yes still). Fixing userspace is impossible.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC] hv_netvsc: automatically name slave VF network device
  2017-12-19 20:44   ` Stephen Hemminger
@ 2017-12-19 21:18     ` Jakub Kicinski
  2017-12-19 21:29       ` Stephen Hemminger
  0 siblings, 1 reply; 15+ messages in thread
From: Jakub Kicinski @ 2017-12-19 21:18 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev, Stephen Hemminger, Jiri Pirko

On Tue, 19 Dec 2017 12:44:25 -0800, Stephen Hemminger wrote:
> On Tue, 19 Dec 2017 12:32:34 -0800
> Jakub Kicinski <kubakici@wp.pl> wrote:
> 
> > On Tue, 19 Dec 2017 11:35:37 -0800, Stephen Hemminger wrote:  
> > > Rename the VF device to ethX_vf based on the ethX as the
> > > synthetic device.  This eliminates the need for delay on setup,
> > > and the PCI (udev based) naming is not reproducible on Hyper-V
> > > anyway. The name of the VF does not matter since all control
> > > operations take place the primary device. It does make the
> > > user experience better to associate the names.
> > > 
> > > Based on feedback from all.systems.go talk.
> > > The downside is that it requires exporting a symbol from netdev
> > > core which makes it harder to backport.
> > > 
> > > Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>    
> > 
> > Why do you have to name the devices in the kernel space in the first
> > place? :/  Why don't upstream the correct change to biosdevname like
> > hardware vendors do?  
> 
> biosdevname is dead, gone and wouldn't work on Azure (it dumpster dives in /dev/mem).

Hm, I haven't worked on biosdevname myself, but AFAIU it also falls 
back to information from the PCI VPD, which could be populated by 
the hypervisor.

> I assume you mean the modern application is udev, and it works but the name is meaningless
> because it based of synthetic PCI information. The PCI host adapter is simulated
> for pass through devices. Names like enp12s0.
> 
> Since every passthrough VF device on Hyper-V/Azure has a matching synthetic
> network device with same mac address. It is best to have the relationship
> shown in the name.

How about we make the VF drivers expose "vf" as phys_port_name?
Then systemd/udev should glue that onto the name regardless of
how the VF is used.

> > Your VF setup is really _not_ special, I don't understand why we are 
> > OK with ignoring the standard practices.  Real enterprise distroes
> > are very careful never to break the naming of interfaces and they keep
> > the naming policy in user space.  Playing tricks in the kernel has every
> > chance of breaking existing user setups.  
> 
> Actually, Systemd folks said "naming policy is in userspace only because
> kernel can't get it right". Also there is no uniformity in userspace
> there are at least 5 systems trying to do network setup. And most of
> them depend on eth0 (yes still). Fixing userspace is impossible.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC] hv_netvsc: automatically name slave VF network device
  2017-12-19 21:18     ` Jakub Kicinski
@ 2017-12-19 21:29       ` Stephen Hemminger
  2017-12-19 21:55         ` Jakub Kicinski
  0 siblings, 1 reply; 15+ messages in thread
From: Stephen Hemminger @ 2017-12-19 21:29 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: netdev, Stephen Hemminger, Jiri Pirko

On Tue, 19 Dec 2017 13:18:16 -0800
Jakub Kicinski <kubakici@wp.pl> wrote:

> On Tue, 19 Dec 2017 12:44:25 -0800, Stephen Hemminger wrote:
> > On Tue, 19 Dec 2017 12:32:34 -0800
> > Jakub Kicinski <kubakici@wp.pl> wrote:
> >   
> > > On Tue, 19 Dec 2017 11:35:37 -0800, Stephen Hemminger wrote:    
> > > > Rename the VF device to ethX_vf based on the ethX as the
> > > > synthetic device.  This eliminates the need for delay on setup,
> > > > and the PCI (udev based) naming is not reproducible on Hyper-V
> > > > anyway. The name of the VF does not matter since all control
> > > > operations take place the primary device. It does make the
> > > > user experience better to associate the names.
> > > > 
> > > > Based on feedback from all.systems.go talk.
> > > > The downside is that it requires exporting a symbol from netdev
> > > > core which makes it harder to backport.
> > > > 
> > > > Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>      
> > > 
> > > Why do you have to name the devices in the kernel space in the first
> > > place? :/  Why don't upstream the correct change to biosdevname like
> > > hardware vendors do?    
> > 
> > biosdevname is dead, gone and wouldn't work on Azure (it dumpster dives in /dev/mem).  
> 
> Hm, I haven't worked on biosdevname myself, but AFAIU it also falls 
> back to information from the PCI VPD, which could be populated by 
> the hypervisor.

VPD never had any useful standard are info.
The rules used by udev come off sysfs, see:
  https://www.freedesktop.org/wiki/Software/systemd/PredictableNetworkInterfaceNames/

> 
> > I assume you mean the modern application is udev, and it works but the name is meaningless
> > because it based of synthetic PCI information. The PCI host adapter is simulated
> > for pass through devices. Names like enp12s0.
> > 
> > Since every passthrough VF device on Hyper-V/Azure has a matching synthetic
> > network device with same mac address. It is best to have the relationship
> > shown in the name.  
> 
> How about we make the VF drivers expose "vf" as phys_port_name?
> Then systemd/udev should glue that onto the name regardless of
> how the VF is used.

One of the goals was not to modify in any way other drivers (like VF).

> 
> > > Your VF setup is really _not_ special, I don't understand why we are 
> > > OK with ignoring the standard practices.  Real enterprise distroes
> > > are very careful never to break the naming of interfaces and they keep
> > > the naming policy in user space.  Playing tricks in the kernel has every
> > > chance of breaking existing user setups.    
> > 
> > Actually, Systemd folks said "naming policy is in userspace only because
> > kernel can't get it right". Also there is no uniformity in userspace
> > there are at least 5 systems trying to do network setup. And most of
> > them depend on eth0 (yes still). Fixing userspace is impossible.  

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC] hv_netvsc: automatically name slave VF network device
  2017-12-19 21:29       ` Stephen Hemminger
@ 2017-12-19 21:55         ` Jakub Kicinski
  2017-12-19 22:06           ` Stephen Hemminger
  0 siblings, 1 reply; 15+ messages in thread
From: Jakub Kicinski @ 2017-12-19 21:55 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev, Stephen Hemminger, Jiri Pirko

On Tue, 19 Dec 2017 13:29:49 -0800, Stephen Hemminger wrote:
> > > biosdevname is dead, gone and wouldn't work on Azure (it dumpster
> > > dives in /dev/mem).    
> > 
> > Hm, I haven't worked on biosdevname myself, but AFAIU it also falls 
> > back to information from the PCI VPD, which could be populated by 
> > the hypervisor.  
> 
> VPD never had any useful standard are info.
> The rules used by udev come off sysfs, see:
>   https://www.freedesktop.org/wiki/Software/systemd/PredictableNetworkInterfaceNames/

Yes, the current VPD info looks quite limited, although it is
extendable.

> > > I assume you mean the modern application is udev, and it works
> > > but the name is meaningless because it based of synthetic PCI
> > > information. The PCI host adapter is simulated for pass through
> > > devices. Names like enp12s0.
> > > 
> > > Since every passthrough VF device on Hyper-V/Azure has a matching
> > > synthetic network device with same mac address. It is best to
> > > have the relationship shown in the name.    
> > 
> > How about we make the VF drivers expose "vf" as phys_port_name?
> > Then systemd/udev should glue that onto the name regardless of
> > how the VF is used.  
> 
> One of the goals was not to modify in any way other drivers (like VF).

Why?  Do you have out-of-tree drivers you can't change or some such?

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC] hv_netvsc: automatically name slave VF network device
  2017-12-19 21:55         ` Jakub Kicinski
@ 2017-12-19 22:06           ` Stephen Hemminger
  2017-12-19 22:24             ` Jakub Kicinski
  0 siblings, 1 reply; 15+ messages in thread
From: Stephen Hemminger @ 2017-12-19 22:06 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: netdev, Stephen Hemminger, Jiri Pirko

On Tue, 19 Dec 2017 13:55:29 -0800
Jakub Kicinski <kubakici@wp.pl> wrote:

> On Tue, 19 Dec 2017 13:29:49 -0800, Stephen Hemminger wrote:
> > > > biosdevname is dead, gone and wouldn't work on Azure (it dumpster
> > > > dives in /dev/mem).      
> > > 
> > > Hm, I haven't worked on biosdevname myself, but AFAIU it also falls 
> > > back to information from the PCI VPD, which could be populated by 
> > > the hypervisor.    
> > 
> > VPD never had any useful standard are info.
> > The rules used by udev come off sysfs, see:
> >   https://www.freedesktop.org/wiki/Software/systemd/PredictableNetworkInterfaceNames/  
> 
> Yes, the current VPD info looks quite limited, although it is
> extendable.
> 
> > > > I assume you mean the modern application is udev, and it works
> > > > but the name is meaningless because it based of synthetic PCI
> > > > information. The PCI host adapter is simulated for pass through
> > > > devices. Names like enp12s0.
> > > > 
> > > > Since every passthrough VF device on Hyper-V/Azure has a matching
> > > > synthetic network device with same mac address. It is best to
> > > > have the relationship shown in the name.      
> > > 
> > > How about we make the VF drivers expose "vf" as phys_port_name?
> > > Then systemd/udev should glue that onto the name regardless of
> > > how the VF is used.    
> > 
> > One of the goals was not to modify in any way other drivers (like VF).  
> 
> Why?  Do you have out-of-tree drivers you can't change or some such?

This needs to work on enterprise distributions; plus it is not good practice
to introduce random changes into partners like Mellanox drivers.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC] hv_netvsc: automatically name slave VF network device
  2017-12-19 22:06           ` Stephen Hemminger
@ 2017-12-19 22:24             ` Jakub Kicinski
  2017-12-19 22:51               ` Stephen Hemminger
  0 siblings, 1 reply; 15+ messages in thread
From: Jakub Kicinski @ 2017-12-19 22:24 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev, Stephen Hemminger, Jiri Pirko

On Tue, 19 Dec 2017 14:06:59 -0800, Stephen Hemminger wrote:
> > > > > I assume you mean the modern application is udev, and it works
> > > > > but the name is meaningless because it based of synthetic PCI
> > > > > information. The PCI host adapter is simulated for pass through
> > > > > devices. Names like enp12s0.
> > > > > 
> > > > > Since every passthrough VF device on Hyper-V/Azure has a matching
> > > > > synthetic network device with same mac address. It is best to
> > > > > have the relationship shown in the name.        
> > > > 
> > > > How about we make the VF drivers expose "vf" as phys_port_name?
> > > > Then systemd/udev should glue that onto the name regardless of
> > > > how the VF is used.      
> > > 
> > > One of the goals was not to modify in any way other drivers (like VF).    
> > 
> > Why?  Do you have out-of-tree drivers you can't change or some such?  
> 
> This needs to work on enterprise distributions; plus it is not good
> practice to introduce random changes into partners like Mellanox
> drivers.

Are we talking about Linux or Windows kernel here?  I don't think
this requires hypervisor changes?  The notion of a "partner" and
changing drivers by people who are not employed by a vendor being 
bad practice sounds entirely foreign to me.

If we agree that marking VF interfaces is useful (and I think 
it is) then we should mark them always, not only when they are 
enslaved to a magic bond.  And the natural way of doing that 
seems to be phys_port_name.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC] hv_netvsc: automatically name slave VF network device
  2017-12-19 19:35 [RFC] hv_netvsc: automatically name slave VF network device Stephen Hemminger
  2017-12-19 20:32 ` Jakub Kicinski
@ 2017-12-19 22:44 ` Samudrala, Sridhar
  2017-12-19 22:50   ` Stephen Hemminger
  1 sibling, 1 reply; 15+ messages in thread
From: Samudrala, Sridhar @ 2017-12-19 22:44 UTC (permalink / raw)
  To: Stephen Hemminger, netdev; +Cc: Stephen Hemminger

On 12/19/2017 11:35 AM, Stephen Hemminger wrote:
> Rename the VF device to ethX_vf based on the ethX as the
> synthetic device.  This eliminates the need for delay on setup,
> and the PCI (udev based) naming is not reproducible on Hyper-V
> anyway. The name of the VF does not matter since all control
> operations take place the primary device. It does make the
> user experience better to associate the names.
>
> Based on feedback from all.systems.go talk.
> The downside is that it requires exporting a symbol from netdev
> core which makes it harder to backport.
>
> Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
> ---
>   drivers/net/hyperv/hyperv_net.h |  1 -
>   drivers/net/hyperv/netvsc_drv.c | 53 +++++++++++------------------------------
>   net/core/dev.c                  |  1 +
>   3 files changed, 15 insertions(+), 40 deletions(-)
>
> diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
> index 0db3bd1ea06f..5013189123e6 100644
> --- a/drivers/net/hyperv/hyperv_net.h
> +++ b/drivers/net/hyperv/hyperv_net.h
> @@ -762,7 +762,6 @@ struct net_device_context {
>   	/* State to manage the associated VF interface. */
>   	struct net_device __rcu *vf_netdev;
>   	struct netvsc_vf_pcpu_stats __percpu *vf_stats;
> -	struct delayed_work vf_takeover;
>   
>   	/* 1: allocated, serial number is valid. 0: not allocated */
>   	u32 vf_alloc;
> diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
> index c5584c2d440e..8a0afe9a5064 100644
> --- a/drivers/net/hyperv/netvsc_drv.c
> +++ b/drivers/net/hyperv/netvsc_drv.c
> @@ -49,7 +49,6 @@
>   #define RING_SIZE_MIN		64
>   
>   #define LINKCHANGE_INT (2 * HZ)
> -#define VF_TAKEOVER_INT (HZ / 10)
>   
>   static unsigned int ring_size __ro_after_init = 128;
>   module_param(ring_size, uint, S_IRUGO);
> @@ -1760,7 +1759,7 @@ static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb)
>   static int netvsc_vf_join(struct net_device *vf_netdev,
>   			  struct net_device *ndev)
>   {
> -	struct net_device_context *ndev_ctx = netdev_priv(ndev);
> +	char vf_name[IFNAMSIZ];
>   	int ret;
>   
>   	ret = netdev_rx_handler_register(vf_netdev,
> @@ -1783,23 +1782,14 @@ static int netvsc_vf_join(struct net_device *vf_netdev,
>   	/* set slave flag before open to prevent IPv6 addrconf */
>   	vf_netdev->flags |= IFF_SLAVE;
>   
> -	schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT);
> -
>   	call_netdevice_notifiers(NETDEV_JOIN, vf_netdev);
>   
> -	netdev_info(vf_netdev, "joined to %s\n", ndev->name);
> -	return 0;
> -
> -upper_link_failed:
> -	netdev_rx_handler_unregister(vf_netdev);
> -rx_handler_failed:
> -	return ret;
> -}
> -
> -static void __netvsc_vf_setup(struct net_device *ndev,
> -			      struct net_device *vf_netdev)
> -{
> -	int ret;
> +	/* set the name of VF device based on upper device name */
> +	snprintf(vf_name, IFNAMSIZ, "%s_vf", ndev->name);
> +	ret = dev_change_name(vf_netdev, vf_name);
> +	if (ret != 0)
> +		netdev_warn(vf_netdev,
> +			    "can not rename device: (%d)\n", ret);

It is possible that upper device name can change after this call.  I 
noticed this
when i tried this approach with virtio_net.

Also, what should happen if the upper device is unloaded? Should we rename
the VF name?


>   
>   	/* Align MTU of VF with master */
>   	ret = dev_set_mtu(vf_netdev, ndev->mtu);
> @@ -1807,34 +1797,21 @@ static void __netvsc_vf_setup(struct net_device *ndev,
>   		netdev_warn(vf_netdev,
>   			    "unable to change mtu to %u\n", ndev->mtu);
>   
> +	/* If upper device is already up, bring up slave */
>   	if (netif_running(ndev)) {
>   		ret = dev_open(vf_netdev);
>   		if (ret)
>   			netdev_warn(vf_netdev,
>   				    "unable to open: %d\n", ret);
>   	}
> -}
> -
> -/* Setup VF as slave of the synthetic device.
> - * Runs in workqueue to avoid recursion in netlink callbacks.
> - */
> -static void netvsc_vf_setup(struct work_struct *w)
> -{
> -	struct net_device_context *ndev_ctx
> -		= container_of(w, struct net_device_context, vf_takeover.work);
> -	struct net_device *ndev = hv_get_drvdata(ndev_ctx->device_ctx);
> -	struct net_device *vf_netdev;
> -
> -	if (!rtnl_trylock()) {
> -		schedule_delayed_work(&ndev_ctx->vf_takeover, 0);
> -		return;
> -	}
>   
> -	vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
> -	if (vf_netdev)
> -		__netvsc_vf_setup(ndev, vf_netdev);
> +	netdev_info(vf_netdev, "joined to %s\n", ndev->name);
> +	return 0;
>   
> -	rtnl_unlock();
> + upper_link_failed:
> +	netdev_rx_handler_unregister(vf_netdev);
> + rx_handler_failed:
> +	return ret;
>   }
>   
>   static int netvsc_register_vf(struct net_device *vf_netdev)
> @@ -1904,7 +1881,6 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
>   		return NOTIFY_DONE;
>   
>   	net_device_ctx = netdev_priv(ndev);
> -	cancel_delayed_work_sync(&net_device_ctx->vf_takeover);
>   
>   	netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
>   
> @@ -1947,7 +1923,6 @@ static int netvsc_probe(struct hv_device *dev,
>   
>   	spin_lock_init(&net_device_ctx->lock);
>   	INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
> -	INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup);
>   
>   	net_device_ctx->vf_stats
>   		= netdev_alloc_pcpu_stats(struct netvsc_vf_pcpu_stats);
> diff --git a/net/core/dev.c b/net/core/dev.c
> index c7db39926769..f51358a90efa 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -1238,6 +1238,7 @@ int dev_change_name(struct net_device *dev, const char *newname)
>   
>   	return err;
>   }
> +EXPORT_SYMBOL(dev_change_name);
>   
>   /**
>    *	dev_set_alias - change ifalias of a device

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC] hv_netvsc: automatically name slave VF network device
  2017-12-19 22:44 ` Samudrala, Sridhar
@ 2017-12-19 22:50   ` Stephen Hemminger
  2017-12-19 23:20     ` Jakub Kicinski
  0 siblings, 1 reply; 15+ messages in thread
From: Stephen Hemminger @ 2017-12-19 22:50 UTC (permalink / raw)
  To: Samudrala, Sridhar; +Cc: netdev, Stephen Hemminger

On Tue, 19 Dec 2017 14:44:37 -0800
"Samudrala, Sridhar" <sridhar.samudrala@intel.com> wrote:

>  -static void __netvsc_vf_setup(struct net_device *ndev,
> > -			      struct net_device *vf_netdev)
> > -{
> > -	int ret;
> > +	/* set the name of VF device based on upper device name */
> > +	snprintf(vf_name, IFNAMSIZ, "%s_vf", ndev->name);
> > +	ret = dev_change_name(vf_netdev, vf_name);
> > +	if (ret != 0)
> > +		netdev_warn(vf_netdev,
> > +			    "can not rename device: (%d)\n", ret);  
> 
> It is possible that upper device name can change after this call.  I 
> noticed this
> when i tried this approach with virtio_net.
> 
> Also, what should happen if the upper device is unloaded? Should we rename
> the VF name?

Yes upper device can change name. So sure, netdevice could trap that
in callback (it already has notifier) and rename VF. Will add that in V2.

If upper device is unloaded then it is already decoupled from the VF.
There is no good value to change it back to. The orignal name probably
has been reused by then.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC] hv_netvsc: automatically name slave VF network device
  2017-12-19 22:24             ` Jakub Kicinski
@ 2017-12-19 22:51               ` Stephen Hemminger
  0 siblings, 0 replies; 15+ messages in thread
From: Stephen Hemminger @ 2017-12-19 22:51 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: netdev, Stephen Hemminger, Jiri Pirko

On Tue, 19 Dec 2017 14:24:52 -0800
Jakub Kicinski <kubakici@wp.pl> wrote:

> On Tue, 19 Dec 2017 14:06:59 -0800, Stephen Hemminger wrote:
> > > > > > I assume you mean the modern application is udev, and it works
> > > > > > but the name is meaningless because it based of synthetic PCI
> > > > > > information. The PCI host adapter is simulated for pass through
> > > > > > devices. Names like enp12s0.
> > > > > > 
> > > > > > Since every passthrough VF device on Hyper-V/Azure has a matching
> > > > > > synthetic network device with same mac address. It is best to
> > > > > > have the relationship shown in the name.          
> > > > > 
> > > > > How about we make the VF drivers expose "vf" as phys_port_name?
> > > > > Then systemd/udev should glue that onto the name regardless of
> > > > > how the VF is used.        
> > > > 
> > > > One of the goals was not to modify in any way other drivers (like VF).      
> > > 
> > > Why?  Do you have out-of-tree drivers you can't change or some such?    
> > 
> > This needs to work on enterprise distributions; plus it is not good
> > practice to introduce random changes into partners like Mellanox
> > drivers.  
> 
> Are we talking about Linux or Windows kernel here?  I don't think
> this requires hypervisor changes?  The notion of a "partner" and
> changing drivers by people who are not employed by a vendor being 
> bad practice sounds entirely foreign to me.

Minor bug fixes sure, but changing semantics requires consultation.
Vendors like Intel and Mellanox have code bases that feed upstream.

> If we agree that marking VF interfaces is useful (and I think 
> it is) then we should mark them always, not only when they are 
> enslaved to a magic bond.  And the natural way of doing that 
> seems to be phys_port_name.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC] hv_netvsc: automatically name slave VF network device
  2017-12-19 22:50   ` Stephen Hemminger
@ 2017-12-19 23:20     ` Jakub Kicinski
  2017-12-19 23:44       ` Stephen Hemminger
  0 siblings, 1 reply; 15+ messages in thread
From: Jakub Kicinski @ 2017-12-19 23:20 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Samudrala, Sridhar, netdev, Stephen Hemminger

On Tue, 19 Dec 2017 14:50:17 -0800, Stephen Hemminger wrote:
> On Tue, 19 Dec 2017 14:44:37 -0800
> "Samudrala, Sridhar" <sridhar.samudrala@intel.com> wrote:
> 
> >  -static void __netvsc_vf_setup(struct net_device *ndev,  
> > > -			      struct net_device *vf_netdev)
> > > -{
> > > -	int ret;
> > > +	/* set the name of VF device based on upper device name */
> > > +	snprintf(vf_name, IFNAMSIZ, "%s_vf", ndev->name);
> > > +	ret = dev_change_name(vf_netdev, vf_name);
> > > +	if (ret != 0)
> > > +		netdev_warn(vf_netdev,
> > > +			    "can not rename device: (%d)\n", ret);    
> > 
> > It is possible that upper device name can change after this call.  I 
> > noticed this
> > when i tried this approach with virtio_net.
> > 
> > Also, what should happen if the upper device is unloaded? Should we rename
> > the VF name?  
> 
> Yes upper device can change name. So sure, netdevice could trap that
> in callback (it already has notifier) and rename VF. Will add that in V2.
> 
> If upper device is unloaded then it is already decoupled from the VF.
> There is no good value to change it back to. The orignal name probably
> has been reused by then.

Both of those issues would be solved by just exposing phys_port_name
from the VF driver, and letting systemd do its thing independent of 
magic bonds.

Reluctance to do driver work aside :/

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC] hv_netvsc: automatically name slave VF network device
  2017-12-19 23:20     ` Jakub Kicinski
@ 2017-12-19 23:44       ` Stephen Hemminger
  2017-12-19 23:53         ` Jakub Kicinski
  0 siblings, 1 reply; 15+ messages in thread
From: Stephen Hemminger @ 2017-12-19 23:44 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: Samudrala, Sridhar, netdev, Stephen Hemminger

On Tue, 19 Dec 2017 15:20:57 -0800
Jakub Kicinski <jakub.kicinski@netronome.com> wrote:

> On Tue, 19 Dec 2017 14:50:17 -0800, Stephen Hemminger wrote:
> > On Tue, 19 Dec 2017 14:44:37 -0800
> > "Samudrala, Sridhar" <sridhar.samudrala@intel.com> wrote:
> >   
> > >  -static void __netvsc_vf_setup(struct net_device *ndev,    
> > > > -			      struct net_device *vf_netdev)
> > > > -{
> > > > -	int ret;
> > > > +	/* set the name of VF device based on upper device name */
> > > > +	snprintf(vf_name, IFNAMSIZ, "%s_vf", ndev->name);
> > > > +	ret = dev_change_name(vf_netdev, vf_name);
> > > > +	if (ret != 0)
> > > > +		netdev_warn(vf_netdev,
> > > > +			    "can not rename device: (%d)\n", ret);      
> > > 
> > > It is possible that upper device name can change after this call.  I 
> > > noticed this
> > > when i tried this approach with virtio_net.
> > > 
> > > Also, what should happen if the upper device is unloaded? Should we rename
> > > the VF name?    
> > 
> > Yes upper device can change name. So sure, netdevice could trap that
> > in callback (it already has notifier) and rename VF. Will add that in V2.
> > 
> > If upper device is unloaded then it is already decoupled from the VF.
> > There is no good value to change it back to. The orignal name probably
> > has been reused by then.  
> 
> Both of those issues would be solved by just exposing phys_port_name
> from the VF driver, and letting systemd do its thing independent of 
> magic bonds.
> 
> Reluctance to do driver work aside :/

The port name for Mellanox driver is already set in the driver as a numeric value.
It indicates which port is used.
This won't work.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC] hv_netvsc: automatically name slave VF network device
  2017-12-19 23:44       ` Stephen Hemminger
@ 2017-12-19 23:53         ` Jakub Kicinski
  2017-12-20  6:41           ` Jiri Pirko
  0 siblings, 1 reply; 15+ messages in thread
From: Jakub Kicinski @ 2017-12-19 23:53 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Samudrala, Sridhar, netdev, Stephen Hemminger

On Tue, 19 Dec 2017 15:44:05 -0800, Stephen Hemminger wrote:
> On Tue, 19 Dec 2017 15:20:57 -0800
> Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
> 
> > On Tue, 19 Dec 2017 14:50:17 -0800, Stephen Hemminger wrote:  
> > > On Tue, 19 Dec 2017 14:44:37 -0800
> > > "Samudrala, Sridhar" <sridhar.samudrala@intel.com> wrote:
> > >     
> > > >  -static void __netvsc_vf_setup(struct net_device *ndev,      
> > > > > -			      struct net_device *vf_netdev)
> > > > > -{
> > > > > -	int ret;
> > > > > +	/* set the name of VF device based on upper device name */
> > > > > +	snprintf(vf_name, IFNAMSIZ, "%s_vf", ndev->name);
> > > > > +	ret = dev_change_name(vf_netdev, vf_name);
> > > > > +	if (ret != 0)
> > > > > +		netdev_warn(vf_netdev,
> > > > > +			    "can not rename device: (%d)\n", ret);        
> > > > 
> > > > It is possible that upper device name can change after this call.  I 
> > > > noticed this
> > > > when i tried this approach with virtio_net.
> > > > 
> > > > Also, what should happen if the upper device is unloaded? Should we rename
> > > > the VF name?      
> > > 
> > > Yes upper device can change name. So sure, netdevice could trap that
> > > in callback (it already has notifier) and rename VF. Will add that in V2.
> > > 
> > > If upper device is unloaded then it is already decoupled from the VF.
> > > There is no good value to change it back to. The orignal name probably
> > > has been reused by then.    
> > 
> > Both of those issues would be solved by just exposing phys_port_name
> > from the VF driver, and letting systemd do its thing independent of 
> > magic bonds.
> > 
> > Reluctance to do driver work aside :/  
> 
> The port name for Mellanox driver is already set in the driver as a numeric value.
> It indicates which port is used.
> This won't work.

You must be looking at representor ndos.  I don't think MLX NIC drivers
are implementing phys_port_name for normal netdevs at all today.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC] hv_netvsc: automatically name slave VF network device
  2017-12-19 23:53         ` Jakub Kicinski
@ 2017-12-20  6:41           ` Jiri Pirko
  0 siblings, 0 replies; 15+ messages in thread
From: Jiri Pirko @ 2017-12-20  6:41 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Stephen Hemminger, Samudrala, Sridhar, netdev, Stephen Hemminger

Wed, Dec 20, 2017 at 12:53:53AM CET, jakub.kicinski@netronome.com wrote:
>On Tue, 19 Dec 2017 15:44:05 -0800, Stephen Hemminger wrote:
>> On Tue, 19 Dec 2017 15:20:57 -0800
>> Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
>> 
>> > On Tue, 19 Dec 2017 14:50:17 -0800, Stephen Hemminger wrote:  
>> > > On Tue, 19 Dec 2017 14:44:37 -0800
>> > > "Samudrala, Sridhar" <sridhar.samudrala@intel.com> wrote:
>> > >     
>> > > >  -static void __netvsc_vf_setup(struct net_device *ndev,      
>> > > > > -			      struct net_device *vf_netdev)
>> > > > > -{
>> > > > > -	int ret;
>> > > > > +	/* set the name of VF device based on upper device name */
>> > > > > +	snprintf(vf_name, IFNAMSIZ, "%s_vf", ndev->name);
>> > > > > +	ret = dev_change_name(vf_netdev, vf_name);
>> > > > > +	if (ret != 0)
>> > > > > +		netdev_warn(vf_netdev,
>> > > > > +			    "can not rename device: (%d)\n", ret);        
>> > > > 
>> > > > It is possible that upper device name can change after this call.  I 
>> > > > noticed this
>> > > > when i tried this approach with virtio_net.
>> > > > 
>> > > > Also, what should happen if the upper device is unloaded? Should we rename
>> > > > the VF name?      
>> > > 
>> > > Yes upper device can change name. So sure, netdevice could trap that
>> > > in callback (it already has notifier) and rename VF. Will add that in V2.
>> > > 
>> > > If upper device is unloaded then it is already decoupled from the VF.
>> > > There is no good value to change it back to. The orignal name probably
>> > > has been reused by then.    
>> > 
>> > Both of those issues would be solved by just exposing phys_port_name
>> > from the VF driver, and letting systemd do its thing independent of 
>> > magic bonds.
>> > 
>> > Reluctance to do driver work aside :/  
>> 
>> The port name for Mellanox driver is already set in the driver as a numeric value.
>> It indicates which port is used.
>> This won't work.
>
>You must be looking at representor ndos.  I don't think MLX NIC drivers
>are implementing phys_port_name for normal netdevs at all today.

You are right Jakub. I plan to generate phys_port_name according to the
netdev flavours (vf,pf,qp,vfrep,pfrep,qprep,etc). That would resolve
this.

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2017-12-20  6:41 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-12-19 19:35 [RFC] hv_netvsc: automatically name slave VF network device Stephen Hemminger
2017-12-19 20:32 ` Jakub Kicinski
2017-12-19 20:44   ` Stephen Hemminger
2017-12-19 21:18     ` Jakub Kicinski
2017-12-19 21:29       ` Stephen Hemminger
2017-12-19 21:55         ` Jakub Kicinski
2017-12-19 22:06           ` Stephen Hemminger
2017-12-19 22:24             ` Jakub Kicinski
2017-12-19 22:51               ` Stephen Hemminger
2017-12-19 22:44 ` Samudrala, Sridhar
2017-12-19 22:50   ` Stephen Hemminger
2017-12-19 23:20     ` Jakub Kicinski
2017-12-19 23:44       ` Stephen Hemminger
2017-12-19 23:53         ` Jakub Kicinski
2017-12-20  6:41           ` Jiri Pirko

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.