Netdev Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH net-next v4] net: Allow to specify ifindex when device is moved to another namespace
@ 2021-04-06  7:54 Andrei Vagin
  2021-04-06 18:31 ` Jakub Kicinski
  2021-04-08 13:02 ` Christian Brauner
  0 siblings, 2 replies; 3+ messages in thread
From: Andrei Vagin @ 2021-04-06  7:54 UTC (permalink / raw)
  To: David S. Miller, Jakub Kicinski, netdev
  Cc: Christian Brauner, Andrei Vagin, Alexander Mikhalitsyn

Currently, we can specify ifindex on link creation. This change allows
to specify ifindex when a device is moved to another network namespace.

Even now, a device ifindex can be changed if there is another device
with the same ifindex in the target namespace. So this change doesn't
introduce completely new behavior, it adds more control to the process.

CRIU users want to restore containers with pre-created network devices.
A user will provide network devices and instructions where they have to
be restored, then CRIU will restore network namespaces and move devices
into them. The problem is that devices have to be restored with the same
indexes that they have before C/R.

Cc: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
Suggested-by: Christian Brauner <christian.brauner@ubuntu.com>
Reviewed-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Andrei Vagin <avagin@gmail.com>
---

v2: initialize new_ifindex to 0 in do_setlink.
v3: check that new_ifindex is positive.
v4: - use ifla_policy to validate IFLA_NEW_IFINDEX.
    - don't change the prototype of dev_change_net_namespace that is
      used in many places.

 include/linux/netdevice.h |  9 ++++++++-
 net/core/dev.c            | 26 ++++++++++++++++++--------
 net/core/rtnetlink.c      | 15 ++++++++++++---
 3 files changed, 38 insertions(+), 12 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f57b70fc251f..5cbc950b34df 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4026,7 +4026,14 @@ void __dev_notify_flags(struct net_device *, unsigned int old_flags,
 int dev_change_name(struct net_device *, const char *);
 int dev_set_alias(struct net_device *, const char *, size_t);
 int dev_get_alias(const struct net_device *, char *, size_t);
-int dev_change_net_namespace(struct net_device *, struct net *, const char *);
+int __dev_change_net_namespace(struct net_device *dev, struct net *net,
+			       const char *pat, int new_ifindex);
+static inline
+int dev_change_net_namespace(struct net_device *dev, struct net *net,
+			     const char *pat)
+{
+	return __dev_change_net_namespace(dev, net, pat, 0);
+}
 int __dev_set_mtu(struct net_device *, int);
 int dev_validate_mtu(struct net_device *dev, int mtu,
 		     struct netlink_ext_ack *extack);
diff --git a/net/core/dev.c b/net/core/dev.c
index b4c67a5be606..33ff4a944109 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -11062,11 +11062,13 @@ void unregister_netdev(struct net_device *dev)
 EXPORT_SYMBOL(unregister_netdev);
 
 /**
- *	dev_change_net_namespace - move device to different nethost namespace
+ *	__dev_change_net_namespace - move device to different nethost namespace
  *	@dev: device
  *	@net: network namespace
  *	@pat: If not NULL name pattern to try if the current device name
  *	      is already taken in the destination network namespace.
+ *	@new_ifindex: If not zero, specifies device index in the target
+ *	              namespace.
  *
  *	This function shuts down a device interface and moves it
  *	to a new network namespace. On success 0 is returned, on
@@ -11075,10 +11077,11 @@ EXPORT_SYMBOL(unregister_netdev);
  *	Callers must hold the rtnl semaphore.
  */
 
-int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
+int __dev_change_net_namespace(struct net_device *dev, struct net *net,
+			       const char *pat, int new_ifindex)
 {
 	struct net *net_old = dev_net(dev);
-	int err, new_nsid, new_ifindex;
+	int err, new_nsid;
 
 	ASSERT_RTNL();
 
@@ -11109,6 +11112,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 			goto out;
 	}
 
+	/* Check that new_ifindex isn't used yet. */
+	err = -EBUSY;
+	if (new_ifindex && __dev_get_by_index(net, new_ifindex))
+		goto out;
+
 	/*
 	 * And now a mini version of register_netdevice unregister_netdevice.
 	 */
@@ -11136,10 +11144,12 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 
 	new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL);
 	/* If there is an ifindex conflict assign a new one */
-	if (__dev_get_by_index(net, dev->ifindex))
-		new_ifindex = dev_new_index(net);
-	else
-		new_ifindex = dev->ifindex;
+	if (!new_ifindex) {
+		if (__dev_get_by_index(net, dev->ifindex))
+			new_ifindex = dev_new_index(net);
+		else
+			new_ifindex = dev->ifindex;
+	}
 
 	rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid,
 			    new_ifindex);
@@ -11192,7 +11202,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 out:
 	return err;
 }
-EXPORT_SYMBOL_GPL(dev_change_net_namespace);
+EXPORT_SYMBOL_GPL(__dev_change_net_namespace);
 
 static int dev_cpu_dead(unsigned int oldcpu)
 {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1bdcb33fb561..9f1f55785a6f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1877,6 +1877,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 				    .len = ALTIFNAMSIZ - 1 },
 	[IFLA_PERM_ADDRESS]	= { .type = NLA_REJECT },
 	[IFLA_PROTO_DOWN_REASON] = { .type = NLA_NESTED },
+	[IFLA_NEW_IFINDEX]	= NLA_POLICY_MIN(NLA_S32, 1),
 };
 
 static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -2603,14 +2604,22 @@ static int do_setlink(const struct sk_buff *skb,
 		return err;
 
 	if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD] || tb[IFLA_TARGET_NETNSID]) {
-		struct net *net = rtnl_link_get_net_capable(skb, dev_net(dev),
-							    tb, CAP_NET_ADMIN);
+		struct net *net;
+		int new_ifindex;
+
+		net = rtnl_link_get_net_capable(skb, dev_net(dev),
+						tb, CAP_NET_ADMIN);
 		if (IS_ERR(net)) {
 			err = PTR_ERR(net);
 			goto errout;
 		}
 
-		err = dev_change_net_namespace(dev, net, ifname);
+		if (tb[IFLA_NEW_IFINDEX])
+			new_ifindex = nla_get_s32(tb[IFLA_NEW_IFINDEX]);
+		else
+			new_ifindex = 0;
+
+		err = __dev_change_net_namespace(dev, net, ifname, new_ifindex);
 		put_net(net);
 		if (err)
 			goto errout;
-- 
2.29.2


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH net-next v4] net: Allow to specify ifindex when device is moved to another namespace
  2021-04-06  7:54 [PATCH net-next v4] net: Allow to specify ifindex when device is moved to another namespace Andrei Vagin
@ 2021-04-06 18:31 ` Jakub Kicinski
  2021-04-08 13:02 ` Christian Brauner
  1 sibling, 0 replies; 3+ messages in thread
From: Jakub Kicinski @ 2021-04-06 18:31 UTC (permalink / raw)
  To: Andrei Vagin
  Cc: David S. Miller, netdev, Christian Brauner, Alexander Mikhalitsyn

On Tue,  6 Apr 2021 00:54:48 -0700 Andrei Vagin wrote:
> v3: check that new_ifindex is positive.
> v4: - use ifla_policy to validate IFLA_NEW_IFINDEX.
>     - don't change the prototype of dev_change_net_namespace that is
>       used in many places.

I'm afraid v3 got merged before I sent my review, would you mind
rebasing on net-next and resending an incremental change?

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH net-next v4] net: Allow to specify ifindex when device is moved to another namespace
  2021-04-06  7:54 [PATCH net-next v4] net: Allow to specify ifindex when device is moved to another namespace Andrei Vagin
  2021-04-06 18:31 ` Jakub Kicinski
@ 2021-04-08 13:02 ` Christian Brauner
  1 sibling, 0 replies; 3+ messages in thread
From: Christian Brauner @ 2021-04-08 13:02 UTC (permalink / raw)
  To: Andrei Vagin
  Cc: David S. Miller, Jakub Kicinski, netdev, Alexander Mikhalitsyn

On Tue, Apr 06, 2021 at 12:54:48AM -0700, Andrei Vagin wrote:
> Currently, we can specify ifindex on link creation. This change allows
> to specify ifindex when a device is moved to another network namespace.
> 
> Even now, a device ifindex can be changed if there is another device
> with the same ifindex in the target namespace. So this change doesn't
> introduce completely new behavior, it adds more control to the process.
> 
> CRIU users want to restore containers with pre-created network devices.
> A user will provide network devices and instructions where they have to
> be restored, then CRIU will restore network namespaces and move devices
> into them. The problem is that devices have to be restored with the same
> indexes that they have before C/R.
> 
> Cc: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
> Suggested-by: Christian Brauner <christian.brauner@ubuntu.com>
> Reviewed-by: Christian Brauner <christian.brauner@ubuntu.com>
> Signed-off-by: Andrei Vagin <avagin@gmail.com>
> ---

I've compiled a kernel with this patch and was able to successfully dump
and restore a container which relies on pre-created network devices that
need to be moved into a target network namespace with a specific
ifindex. The pull-request making use of the feature in this patch can be
found here:

https://github.com/checkpoint-restore/criu/pull/1432

Thanks!
(Always happy with the fast and painless processes in net[-next]!)
Christian

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, back to index

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-06  7:54 [PATCH net-next v4] net: Allow to specify ifindex when device is moved to another namespace Andrei Vagin
2021-04-06 18:31 ` Jakub Kicinski
2021-04-08 13:02 ` Christian Brauner

Netdev Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/netdev/0 netdev/git/0.git
	git clone --mirror https://lore.kernel.org/netdev/1 netdev/git/1.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 netdev netdev/ https://lore.kernel.org/netdev \
		netdev@vger.kernel.org
	public-inbox-index netdev

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.netdev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git