From: Jiri Pirko <jiri@resnulli.us>
To: netdev@vger.kernel.org
Cc: davem@davemloft.net, idosch@mellanox.com, dsahern@gmail.com,
jakub.kicinski@netronome.com, tariqt@mellanox.com,
saeedm@mellanox.com, kuznet@ms2.inr.ac.ru,
yoshfuji@linux-ipv6.org, shuah@kernel.org, mlxsw@mellanox.com
Subject: [patch net-next 14/15] net: devlink: allow to change namespaces during reload
Date: Sat, 14 Sep 2019 08:46:07 +0200 [thread overview]
Message-ID: <20190914064608.26799-15-jiri@resnulli.us> (raw)
In-Reply-To: <20190914064608.26799-1-jiri@resnulli.us>
From: Jiri Pirko <jiri@mellanox.com>
All devlink instances are created in init_net and stay there for a
lifetime. Allow user to be able to move devlink instances into
namespaces during devlink reload operation. That ensures proper
re-instantiation of driver objects, including netdevices.
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx4/main.c | 4 +
include/uapi/linux/devlink.h | 4 +
net/core/devlink.c | 155 ++++++++++++++++++++--
3 files changed, 155 insertions(+), 8 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index ef3f3d06ff1e..989d0882aaa9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -3942,6 +3942,10 @@ static int mlx4_devlink_reload_down(struct devlink *devlink,
struct mlx4_dev *dev = &priv->dev;
struct mlx4_dev_persistent *persist = dev->persist;
+ if (!net_eq(devlink_net(devlink), &init_net)) {
+ NL_SET_ERR_MSG_MOD(extack, "Namespace change is not supported");
+ return -EOPNOTSUPP;
+ }
if (persist->num_vfs)
mlx4_warn(persist->dev, "Reload performed on PF, will cause reset on operating Virtual Functions\n");
mlx4_restart_one_down(persist->pdev);
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 580b7a2e40e1..b558ea88b766 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -421,6 +421,10 @@ enum devlink_attr {
DEVLINK_ATTR_RELOAD_FAILED, /* u8 0 or 1 */
+ DEVLINK_ATTR_NETNS_FD, /* u32 */
+ DEVLINK_ATTR_NETNS_PID, /* u32 */
+ DEVLINK_ATTR_NETNS_ID, /* u32 */
+
/* add new attributes above here, update the policy in devlink.c */
__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 362cbbcca225..2a5db95cce3c 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -435,8 +435,16 @@ static void devlink_nl_post_doit(const struct genl_ops *ops,
{
struct devlink *devlink;
- devlink = devlink_get_from_info(info);
- if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
+ /* When devlink changes netns, it would not be found
+ * by devlink_get_from_info(). So try if it is stored first.
+ */
+ if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) {
+ devlink = info->user_ptr[0];
+ } else {
+ devlink = devlink_get_from_info(info);
+ WARN_ON(IS_ERR(devlink));
+ }
+ if (!IS_ERR(devlink) && ~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
mutex_unlock(&devlink->lock);
mutex_unlock(&devlink_mutex);
}
@@ -2675,6 +2683,73 @@ devlink_resources_validate(struct devlink *devlink,
return err;
}
+static struct net *devlink_netns_get(struct sk_buff *skb,
+ struct devlink *devlink,
+ struct genl_info *info)
+{
+ struct nlattr *netns_pid_attr = info->attrs[DEVLINK_ATTR_NETNS_PID];
+ struct nlattr *netns_fd_attr = info->attrs[DEVLINK_ATTR_NETNS_FD];
+ struct nlattr *netns_id_attr = info->attrs[DEVLINK_ATTR_NETNS_ID];
+ struct net *net;
+
+ if (!!netns_pid_attr + !!netns_fd_attr + !!netns_id_attr > 1) {
+ NL_SET_ERR_MSG(info->extack, "multiple netns identifying attributes specified");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (netns_pid_attr) {
+ net = get_net_ns_by_pid(nla_get_u32(netns_pid_attr));
+ } else if (netns_fd_attr) {
+ net = get_net_ns_by_fd(nla_get_u32(netns_fd_attr));
+ } else if (netns_id_attr) {
+ net = get_net_ns_by_id(sock_net(skb->sk),
+ nla_get_u32(netns_id_attr));
+ if (!net)
+ net = ERR_PTR(-EINVAL);
+ } else {
+ WARN_ON(1);
+ net = ERR_PTR(-EINVAL);
+ }
+ if (IS_ERR(net)) {
+ NL_SET_ERR_MSG(info->extack, "Unknown network namespace");
+ return ERR_PTR(-EINVAL);
+ }
+ if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
+ put_net(net);
+ return ERR_PTR(-EPERM);
+ }
+ return net;
+}
+
+static void devlink_param_notify(struct devlink *devlink,
+ unsigned int port_index,
+ struct devlink_param_item *param_item,
+ enum devlink_command cmd);
+
+static void devlink_reload_netns_change(struct devlink *devlink,
+ struct net *dest_net)
+{
+ struct devlink_param_item *param_item;
+
+ /* Userspace needs to be notified about devlink objects
+ * removed from original and entering new network namespace.
+ * The rest of the devlink objects are re-created during
+ * reload process so the notifications are generated separatelly.
+ */
+
+ list_for_each_entry(param_item, &devlink->param_list, list)
+ devlink_param_notify(devlink, 0, param_item,
+ DEVLINK_CMD_PARAM_DEL);
+ devlink_notify(devlink, DEVLINK_CMD_DEL);
+
+ devlink_net_set(devlink, dest_net);
+
+ devlink_notify(devlink, DEVLINK_CMD_NEW);
+ list_for_each_entry(param_item, &devlink->param_list, list)
+ devlink_param_notify(devlink, 0, param_item,
+ DEVLINK_CMD_PARAM_NEW);
+}
+
static bool devlink_reload_supported(struct devlink *devlink)
{
return devlink->ops->reload_down && devlink->ops->reload_up;
@@ -2695,9 +2770,27 @@ bool devlink_is_reload_failed(const struct devlink *devlink)
}
EXPORT_SYMBOL_GPL(devlink_is_reload_failed);
+static int devlink_reload(struct devlink *devlink, struct net *dest_net,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ err = devlink->ops->reload_down(devlink, extack);
+ if (err)
+ return err;
+
+ if (dest_net && !net_eq(dest_net, devlink_net(devlink)))
+ devlink_reload_netns_change(devlink, dest_net);
+
+ err = devlink->ops->reload_up(devlink, extack);
+ devlink_reload_failed_set(devlink, !!err);
+ return err;
+}
+
static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
+ struct net *dest_net = NULL;
int err;
if (!devlink_reload_supported(devlink))
@@ -2708,11 +2801,20 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed");
return err;
}
- err = devlink->ops->reload_down(devlink, info->extack);
- if (err)
- return err;
- err = devlink->ops->reload_up(devlink, info->extack);
- devlink_reload_failed_set(devlink, !!err);
+
+ if (info->attrs[DEVLINK_ATTR_NETNS_PID] ||
+ info->attrs[DEVLINK_ATTR_NETNS_FD] ||
+ info->attrs[DEVLINK_ATTR_NETNS_ID]) {
+ dest_net = devlink_netns_get(skb, devlink, info);
+ if (IS_ERR(dest_net))
+ return PTR_ERR(dest_net);
+ }
+
+ err = devlink_reload(devlink, dest_net, info->extack);
+
+ if (dest_net)
+ put_net(dest_net);
+
return err;
}
@@ -5794,6 +5896,9 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_TRAP_ACTION] = { .type = NLA_U8 },
[DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32 },
};
static const struct genl_ops devlink_nl_ops[] = {
@@ -8061,9 +8166,43 @@ int devlink_compat_switch_id_get(struct net_device *dev,
return 0;
}
+static void __net_exit devlink_pernet_pre_exit(struct net *net)
+{
+ struct devlink *devlink;
+ int err;
+
+ /* In case network namespace is getting destroyed, reload
+ * all devlink instances from this namespace into init_net.
+ */
+ mutex_lock(&devlink_mutex);
+ list_for_each_entry(devlink, &devlink_list, list) {
+ if (net_eq(devlink_net(devlink), net)) {
+ if (WARN_ON(!devlink_reload_supported(devlink)))
+ continue;
+ err = devlink_reload(devlink, &init_net, NULL);
+ if (err)
+ pr_warn("Failed to reload devlink instance into init_net\n");
+ }
+ }
+ mutex_unlock(&devlink_mutex);
+}
+
+static struct pernet_operations devlink_pernet_ops __net_initdata = {
+ .pre_exit = devlink_pernet_pre_exit,
+};
+
static int __init devlink_init(void)
{
- return genl_register_family(&devlink_nl_family);
+ int err;
+
+ err = genl_register_family(&devlink_nl_family);
+ if (err)
+ goto out;
+ err = register_pernet_subsys(&devlink_pernet_ops);
+
+out:
+ WARN_ON(err);
+ return err;
}
subsys_initcall(devlink_init);
--
2.21.0
next prev parent reply other threads:[~2019-09-14 6:46 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-09-14 6:45 [patch net-next 00/15] devlink: allow devlink instances to change network namespace Jiri Pirko
2019-09-14 6:45 ` [patch net-next 01/15] netdevsim: change fib accounting and limitations to be per-device Jiri Pirko
2019-09-14 6:45 ` [patch net-next 02/15] net: fib_notifier: make FIB notifier per-netns Jiri Pirko
2019-09-15 8:06 ` Ido Schimmel
2019-09-15 9:37 ` Jiri Pirko
2019-09-15 20:05 ` David Ahern
2019-09-16 5:38 ` Jiri Pirko
2019-09-16 14:54 ` David Ahern
2019-09-14 6:45 ` [patch net-next 03/15] net: fib_notifier: propagate possible error during fib notifier registration Jiri Pirko
2019-09-15 8:17 ` Ido Schimmel
2019-09-15 9:41 ` Jiri Pirko
2019-09-14 6:45 ` [patch net-next 04/15] mlxsw: spectrum_router: Don't rely on missing extack to symbolize dump Jiri Pirko
2019-09-14 6:45 ` [patch net-next 05/15] net: fib_notifier: propagate extack down to the notifier block callback Jiri Pirko
2019-09-14 6:45 ` [patch net-next 06/15] net: devlink: export devlink net getter Jiri Pirko
2019-09-14 6:46 ` [patch net-next 07/15] mlxsw: spectrum: Take devlink net instead of init_net Jiri Pirko
2019-09-14 6:46 ` [patch net-next 08/15] mlxsw: Register port netdevices into net of core Jiri Pirko
2019-09-15 8:37 ` Ido Schimmel
2019-09-14 6:46 ` [patch net-next 09/15] mlxsw: Propagate extack down to register_fib_notifier() Jiri Pirko
2019-09-15 8:39 ` Ido Schimmel
2019-09-14 6:46 ` [patch net-next 10/15] netdevsim: add all ports in nsim_dev_create() and del them in destroy() Jiri Pirko
2019-09-14 6:46 ` [patch net-next 11/15] netdevsim: implement proper devlink reload Jiri Pirko
2019-09-14 6:46 ` [patch net-next 12/15] netdevsim: register port netdevices into net of device Jiri Pirko
2019-09-14 6:46 ` [patch net-next 13/15] netdevsim: take devlink net instead of init_net Jiri Pirko
2019-09-14 6:46 ` Jiri Pirko [this message]
2019-09-15 8:58 ` [patch net-next 14/15] net: devlink: allow to change namespaces during reload Ido Schimmel
2019-09-15 9:43 ` Jiri Pirko
2019-09-14 6:46 ` [patch net-next 15/15] selftests: netdevsim: add tests for devlink reload with resources Jiri Pirko
2019-09-14 6:57 ` [patch iproute2-next 1/2] devlink: introduce cmdline option to switch to a different namespace Jiri Pirko
2019-09-15 18:01 ` David Ahern
2019-09-14 6:57 ` [patch iproute2-next 2/2] devlink: extend reload command to add support for network namespace change Jiri Pirko
2019-09-15 7:16 ` Ido Schimmel
2019-09-15 9:44 ` Jiri Pirko
2019-09-16 7:01 ` [patch net-next 00/15] devlink: allow devlink instances to change network namespace David Miller
2019-09-16 7:09 ` Jiri Pirko
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190914064608.26799-15-jiri@resnulli.us \
--to=jiri@resnulli.us \
--cc=davem@davemloft.net \
--cc=dsahern@gmail.com \
--cc=idosch@mellanox.com \
--cc=jakub.kicinski@netronome.com \
--cc=kuznet@ms2.inr.ac.ru \
--cc=mlxsw@mellanox.com \
--cc=netdev@vger.kernel.org \
--cc=saeedm@mellanox.com \
--cc=shuah@kernel.org \
--cc=tariqt@mellanox.com \
--cc=yoshfuji@linux-ipv6.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).