All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] IB/ipoib: Enable pkey and device name decoupling
@ 2017-09-27  9:32 Yuval Shaia
       [not found] ` <20170927093248.3819-1-yuval.shaia-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Yuval Shaia @ 2017-09-27  9:32 UTC (permalink / raw)
  To: dledford-H+wXaHxf7aLQT0dZR+AlfA,
	sean.hefty-ral2JQCrhuEAvxtiuMwx3w,
	hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w, corbet-T1hC0tSOHrs,
	leon-DgEjT+Ai2ygdnm+yROfE0A, valex-VPRAkNaXOzVWk0Htik3J/w,
	erezsh-VPRAkNaXOzVWk0Htik3J/w,
	yuval.shaia-QHcLZuEGTsvQT0dZR+AlfA,
	dasaratharaman.chandramouli-ral2JQCrhuEAvxtiuMwx3w,
	yanjun.zhu-QHcLZuEGTsvQT0dZR+AlfA, pabeni-H+wXaHxf7aLQT0dZR+AlfA,
	kernel-6AxghH7DbtA, ferasda-VPRAkNaXOzVWk0Htik3J/w,
	shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA,
	mingo-DgEjT+Ai2ygdnm+yROfE0A, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	mukesh.kacker-QHcLZuEGTsvQT0dZR+AlfA,
	chien.yen-QHcLZuEGTsvQT0dZR+AlfA

The sysfs "create_child" interface creates pkey based child interface but
derives the name from parent device name and pkey value.
This makes administration difficult where pkey values can change but
policies encoded with device names do not.

We add ability to create a child interface with a user specified name and a
specified pkey with a new sysfs "create_named_child" interface (and also
add a corresponding "delete_named_child" interface).

We also add a new module api interface to query pkey from a netdevice so
any kernel users of pkey based child interfaces can query it - since with
device name decoupled from pkey, it can no longer be deduced from parsing
the device name by other kernel users.

Signed-off-by: Mukesh Kacker <mukesh.kacker-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
Reviewed-by: Yuval Shaia <yuval.shaia-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
Reviewed-by: Chien-Hua Yen <chien.yen-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
Signed-off-by: Yuval Shaia <yuval.shaia-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
---
 Documentation/infiniband/ipoib.txt        |  12 ++
 drivers/infiniband/ulp/ipoib/ipoib.h      |   3 +
 drivers/infiniband/ulp/ipoib/ipoib_main.c | 187 ++++++++++++++++++++++++++++++
 drivers/infiniband/ulp/ipoib/ipoib_vlan.c |  76 +++++++++++-
 4 files changed, 272 insertions(+), 6 deletions(-)

diff --git a/Documentation/infiniband/ipoib.txt b/Documentation/infiniband/ipoib.txt
index 47c1dd9818f2..1db53c9b2906 100644
--- a/Documentation/infiniband/ipoib.txt
+++ b/Documentation/infiniband/ipoib.txt
@@ -21,6 +21,18 @@ Partitions and P_Keys
 
     echo 0x8001 > /sys/class/net/ib0/delete_child
 
+  Interfaces with a user chosen name can be created in a similar
+  manner with a different name and P_Key, by writing them into the
+  main interface's /sys/class/net/<intf name>/create_named_child
+  For example:
+     echo "epart2 0x8002" > /sys/class/net/ib1/create_named_child
+
+   This will create an interfaces named epart2 with P_Key 0x8002 and
+   parent ib1. To remove a named subinterface, use the
+   "delete_named_child" file:
+
+     echo epart2 > /sys/class/net/ib1/delete_named_child
+
   The P_Key for any interface is given by the "pkey" file, and the
   main interface for a subinterface is in "parent."
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 4a5c7a07a631..9d0010f9b324 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -589,6 +589,9 @@ void ipoib_event(struct ib_event_handler *handler,
 
 int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey);
 int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey);
+int ipoib_named_vlan_add(struct net_device *pdev, unsigned short pkey,
+			 char *child_name_buf);
+int ipoib_named_vlan_delete(struct net_device *pdev, char *child_name_buf);
 
 int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
 		     u16 pkey, int child_type);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index bac95b509a9b..2bdd4055d69f 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -34,6 +34,7 @@
 
 #include "ipoib.h"
 
+#include <linux/ctype.h>
 #include <linux/module.h>
 
 #include <linux/init.h>
@@ -136,6 +137,13 @@ static int ipoib_netdev_event(struct notifier_block *this,
 }
 #endif
 
+/*
+ * PKEY_HEXSTRING_MAXWIDTH - number of hex
+ *   digits needed to represent max width of
+ *   pkey value.
+ */
+#define PKEY_HEXSTRING_MAXWIDTH 4
+
 int ipoib_open(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
@@ -2111,6 +2119,121 @@ static int ipoib_set_mac(struct net_device *dev, void *addr)
 	return 0;
 }
 
+/*
+ * Check if a buffer has name of the format
+ *
+ * <network-device-name>.<4hexcharacters>
+ * e.g. ib1.8004 etc.
+ *
+ * Such names are generated by create_child() by
+ * concatenating parent device with 16-bit pkey
+ * in hex, and disallowed from usage with
+ * create_named_child() interface.
+ *
+ */
+static bool ipoib_disallowed_named_child_namespace(const char *buf)
+{
+	char localbuf[IFNAMSIZ];
+	char *dotp = NULL;
+	char *buf_before_dot = NULL;
+	char *buf_after_dot = NULL;
+	unsigned int ii;
+
+	memcpy(localbuf, buf, IFNAMSIZ);
+	localbuf[IFNAMSIZ-1] = '\0'; /* paranoia! */
+
+	dotp = strnchr(localbuf, IFNAMSIZ, '.');
+	/* no dot or dot at end! */
+	if (dotp == NULL || dotp == localbuf+IFNAMSIZ-2)
+		return false;
+
+	*dotp = '\0';		/* split buffer at "dot"  */
+	buf_before_dot = localbuf;
+	buf_after_dot = dotp + 1;
+
+	/*
+	 * Check if buf_after_dot is hexstring of width
+	 * that could be a pkey!
+	 */
+	if (strlen(buf_after_dot) != PKEY_HEXSTRING_MAXWIDTH)
+		return false;
+
+	for (ii = 0; ii < PKEY_HEXSTRING_MAXWIDTH; ii++) {
+		if (!isxdigit(buf_after_dot[ii]))
+			return false;
+	}
+
+	/*
+	 * (1) buf_after_dot check above makes it valid hexdigit .XXXX format
+	 *
+	 * Now verify if buf_before_dot is a valid net device name -
+	 * (if it is not, then we are not in disallowed namespace)
+	 */
+	if (__dev_get_by_name(&init_net, buf_before_dot) == NULL)
+		return false;
+
+	/*
+	 * (2) buf_before_dot is valid net device name
+	 *    - reserved namespace is being used!
+	 *
+	 * Note: No check on netdev->type to be ARPHRD_INFINIBAND etc
+	 *       We implicitly treat even misleading names such as eth1.XXXX
+	 *       (ethernet device prefix) for child interface name of an
+	 *       infiniband device as intrusion of reserved namespace!
+	 */
+	return true;
+}
+
+static int parse_named_child(struct device *dev, const char *buf,
+			     char *child_name_buf, int *pkeyp)
+{
+	int ret;
+	struct ipoib_dev_priv *priv = ipoib_priv(to_net_dev(dev));
+
+	if (pkeyp)
+		*pkeyp = -1;
+
+	/*
+	 * First parameter is child interface name, after that
+	 * 'pkey' is required if we were passed a pkey buffer
+	 * (Note: From create_named_child, we are passed a pkey
+	 * buffer to parse input, from delete_named_child we are
+	 * not!)
+	 * Note: IFNAMSIZ is 16, allowing for tail null
+	 * we only scan 15 characters for name.
+	 */
+	if (pkeyp) {
+		ret = sscanf(buf, "%15s %i", child_name_buf, pkeyp);
+		if (ret != 2)
+			return -EINVAL;
+	} else {
+		ret = sscanf(buf, "%15s", child_name_buf);
+		if (ret != 1)
+			return -EINVAL;
+	}
+
+	if (strlen(child_name_buf) <= 0 || !dev_valid_name(child_name_buf))
+		return -EINVAL;
+
+	if (pkeyp && (*pkeyp <= 0 || *pkeyp > 0xffff || *pkeyp == 0x8000))
+		return -EINVAL;
+
+	if (ipoib_disallowed_named_child_namespace(child_name_buf)) {
+		pr_warn("child name %s not allowed  to be used with create_named_child as it uses <network-device-name>.XXXX format reserved for create_child/delete_child interfaces!\n",
+			child_name_buf);
+		return -EINVAL;
+	}
+
+	if (pkeyp)
+		ipoib_dbg(priv, "%s inp %s out child_name_buf %s, pkey %04x\n",
+			  __func__, buf, child_name_buf, *pkeyp);
+	else
+		ipoib_dbg(priv, "%s inp %s out child_name_buf %s\n", __func__,
+			  buf, child_name_buf);
+	return 0;
+}
+
+
 static ssize_t create_child(struct device *dev,
 			    struct device_attribute *attr,
 			    const char *buf, size_t count)
@@ -2156,6 +2279,44 @@ static ssize_t delete_child(struct device *dev,
 }
 static DEVICE_ATTR(delete_child, S_IWUSR, NULL, delete_child);
 
+static ssize_t create_named_child(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	int pkey;
+	char child_name[IFNAMSIZ];
+	int ret = 0;
+
+	child_name[0] = '\0';
+
+	if (parse_named_child(dev, buf, child_name, &pkey))
+		return -EINVAL;
+
+	ret = ipoib_named_vlan_add(to_net_dev(dev), pkey, child_name);
+	return ret ? ret : count;
+}
+static DEVICE_ATTR(create_named_child, S_IWUSR, NULL, create_named_child);
+
+static ssize_t delete_named_child(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	char child_name[IFNAMSIZ];
+	int ret = 0;
+
+	child_name[0] = '\0';
+
+	if (parse_named_child(dev, buf, child_name, NULL))
+		return -EINVAL;
+
+	ret = ipoib_named_vlan_delete(to_net_dev(dev), child_name);
+
+	return ret ? ret : count;
+
+}
+static DEVICE_ATTR(delete_named_child, S_IWUSR, NULL, delete_named_child);
+
+
 int ipoib_add_pkey_attr(struct net_device *dev)
 {
 	return device_create_file(&dev->dev, &dev_attr_pkey);
@@ -2263,6 +2424,11 @@ static struct net_device *ipoib_add_port(const char *format,
 		goto sysfs_failed;
 	if (device_create_file(&priv->dev->dev, &dev_attr_delete_child))
 		goto sysfs_failed;
+	if (device_create_file(&priv->dev->dev, &dev_attr_create_named_child))
+		goto sysfs_failed;
+	if (device_create_file(&priv->dev->dev, &dev_attr_delete_named_child))
+		goto sysfs_failed;
+
 
 	return priv->dev;
 
@@ -2367,6 +2533,27 @@ static struct notifier_block ipoib_netdev_notifier = {
 };
 #endif
 
+int
+ipoib_get_netdev_pkey(struct net_device *dev, u16 *pkey)
+{
+	struct ipoib_dev_priv *priv;
+
+	if (dev->type != ARPHRD_INFINIBAND)
+		return -EINVAL;
+
+	/* only for ipoib net devices! */
+	if ((dev->netdev_ops != &ipoib_netdev_ops_pf) &&
+	    (dev->netdev_ops != &ipoib_netdev_ops_vf))
+		return -EINVAL;
+
+	priv = ipoib_priv(dev);
+
+	*pkey = priv->pkey;
+
+	return 0;
+}
+EXPORT_SYMBOL(ipoib_get_netdev_pkey);
+
 static int __init ipoib_init_module(void)
 {
 	int ret;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 9927cd6b7082..f5ae55f4f845 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -115,7 +115,9 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
 	return result;
 }
 
-int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
+int ipoib_vlan_add_common(struct net_device *pdev,
+			  unsigned short pkey,
+			  char *child_name_buf)
 {
 	struct ipoib_dev_priv *ppriv, *priv;
 	char intf_name[IFNAMSIZ];
@@ -130,8 +132,21 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
 	if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags))
 		return -EPERM;
 
-	snprintf(intf_name, sizeof intf_name, "%s.%04x",
-		 ppriv->dev->name, pkey);
+	if (child_name_buf == NULL) {
+		/*
+		 * If child name is not provided, we generated
+		 * one using name of parent and pkey.
+		 */
+		snprintf(intf_name, sizeof(intf_name), "%s.%04x",
+			 ppriv->dev->name, pkey);
+	} else {
+		/*
+		 * Note: Duplicate intf_name will be detected later in the code
+		 * by register_netdevice() (inside __ipoib_vlan_add() call
+		 * below) returning EEXIST!
+		 */
+		strncpy(intf_name, child_name_buf, IFNAMSIZ);
+	}
 
 	if (!mutex_trylock(&ppriv->sysfs_mutex))
 		return restart_syscall();
@@ -183,10 +198,27 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
 	return result;
 }
 
-int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
+int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
+{
+	return ipoib_vlan_add_common(pdev, pkey, NULL);
+}
+
+int ipoib_named_vlan_add(struct net_device *pdev,
+			 unsigned short pkey,
+			 char *child_name_buf)
+{
+	return ipoib_vlan_add_common(pdev, pkey, child_name_buf);
+}
+
+int ipoib_vlan_delete_common(struct net_device *pdev,
+			     unsigned short pkey,
+			     char *child_name_buf)
 {
 	struct ipoib_dev_priv *ppriv, *priv, *tpriv;
 	struct net_device *dev = NULL;
+	char gen_intf_name[IFNAMSIZ];
+
+	gen_intf_name[0] = '\0'; /* initialize - paranoia! */
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -205,9 +237,30 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
 	}
 
 	down_write(&ppriv->vlan_rwsem);
+	if (child_name_buf == NULL && ppriv->dev) {
+		/*
+		 * If child name is not provided, we generate the
+		 * expected one using name of parent and pkey
+		 * and use it in addition to pkey value
+		 * (other children with same pkey may exist that have
+		 * created by create_named_child() - we do not allow
+		 * delete_child() to delete them - delete_named_child()
+		 * has to be used!)
+		 */
+		snprintf(gen_intf_name, sizeof(gen_intf_name),
+			 "%s.%04x", ppriv->dev->name, pkey);
+	}
 	list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
-		if (priv->pkey == pkey &&
-		    priv->child_type == IPOIB_LEGACY_CHILD) {
+		if ((priv->child_type == IPOIB_LEGACY_CHILD) &&
+		    /* user named child (match by name) OR */
+		    ((child_name_buf && priv->dev &&
+		      !strcmp(child_name_buf, priv->dev->name)) ||
+		     /*
+		      * OR classic (devname.hexpkey generated name) child
+		      * (match by pkey and generated name)
+		      */
+		     (!child_name_buf && priv->pkey == pkey &&
+		      priv->dev && !strcmp(gen_intf_name, priv->dev->name)))) {
 			list_del(&priv->list);
 			dev = priv->dev;
 			break;
@@ -231,3 +284,14 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
 
 	return -ENODEV;
 }
+
+int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
+{
+
+	return ipoib_vlan_delete_common(pdev, pkey, NULL);
+}
+
+int ipoib_named_vlan_delete(struct net_device *pdev, char *child_name_buf)
+{
+	return ipoib_vlan_delete_common(pdev, 0, child_name_buf);
+}
-- 
2.13.5

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH] IB/ipoib: Enable pkey and device name decoupling
       [not found] ` <20170927093248.3819-1-yuval.shaia-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
@ 2017-09-27 14:14   ` Doug Ledford
  2017-09-27 15:01   ` Leon Romanovsky
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 23+ messages in thread
From: Doug Ledford @ 2017-09-27 14:14 UTC (permalink / raw)
  To: Yuval Shaia, sean.hefty-ral2JQCrhuEAvxtiuMwx3w,
	hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w, corbet-T1hC0tSOHrs,
	leon-DgEjT+Ai2ygdnm+yROfE0A, valex-VPRAkNaXOzVWk0Htik3J/w,
	erezsh-VPRAkNaXOzVWk0Htik3J/w,
	dasaratharaman.chandramouli-ral2JQCrhuEAvxtiuMwx3w,
	yanjun.zhu-QHcLZuEGTsvQT0dZR+AlfA, pabeni-H+wXaHxf7aLQT0dZR+AlfA,
	kernel-6AxghH7DbtA, ferasda-VPRAkNaXOzVWk0Htik3J/w,
	shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA,
	mingo-DgEjT+Ai2ygdnm+yROfE0A, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	mukesh.kacker-QHcLZuEGTsvQT0dZR+AlfA,
	chien.yen-QHcLZuEGTsvQT0dZR+AlfA

On Wed, 2017-09-27 at 12:32 +0300, Yuval Shaia wrote:
> +       /*
> +        * (1) buf_after_dot check above makes it valid hexdigit
> .XXXX format
> +        *
> +        * Now verify if buf_before_dot is a valid net device name -
> +        * (if it is not, then we are not in disallowed namespace)
> +        */
> +       if (__dev_get_by_name(&init_net, buf_before_dot) == NULL)
> +               return false;

This is wrong.  We don't use &init_net in ipoib any more, we are
namespace aware, so your patch must be namespace aware too.

-- 
Doug Ledford <dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
    GPG KeyID: B826A3330E572FDD
    Key fingerprint = AE6B 1BDA 122B 23B4 265B  1274 B826 A333 0E57 2FDD

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] IB/ipoib: Enable pkey and device name decoupling
       [not found] ` <20170927093248.3819-1-yuval.shaia-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
  2017-09-27 14:14   ` Doug Ledford
@ 2017-09-27 15:01   ` Leon Romanovsky
       [not found]     ` <20170927150140.GF2297-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
  2017-09-27 17:20   ` Mukesh Kacker
  2017-09-28 16:34   ` Jason Gunthorpe
  3 siblings, 1 reply; 23+ messages in thread
From: Leon Romanovsky @ 2017-09-27 15:01 UTC (permalink / raw)
  To: Yuval Shaia
  Cc: dledford-H+wXaHxf7aLQT0dZR+AlfA,
	sean.hefty-ral2JQCrhuEAvxtiuMwx3w,
	hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w, corbet-T1hC0tSOHrs,
	valex-VPRAkNaXOzVWk0Htik3J/w, erezsh-VPRAkNaXOzVWk0Htik3J/w,
	dasaratharaman.chandramouli-ral2JQCrhuEAvxtiuMwx3w,
	yanjun.zhu-QHcLZuEGTsvQT0dZR+AlfA, pabeni-H+wXaHxf7aLQT0dZR+AlfA,
	kernel-6AxghH7DbtA, ferasda-VPRAkNaXOzVWk0Htik3J/w,
	shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA,
	mingo-DgEjT+Ai2ygdnm+yROfE0A, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	mukesh.kacker-QHcLZuEGTsvQT0dZR+AlfA,
	chien.yen-QHcLZuEGTsvQT0dZR+AlfA

[-- Attachment #1: Type: text/plain, Size: 2506 bytes --]

On Wed, Sep 27, 2017 at 12:32:48PM +0300, Yuval Shaia wrote:
> The sysfs "create_child" interface creates pkey based child interface but
> derives the name from parent device name and pkey value.
> This makes administration difficult where pkey values can change but
> policies encoded with device names do not.
>
> We add ability to create a child interface with a user specified name and a
> specified pkey with a new sysfs "create_named_child" interface (and also
> add a corresponding "delete_named_child" interface).
>
> We also add a new module api interface to query pkey from a netdevice so
> any kernel users of pkey based child interfaces can query it - since with
> device name decoupled from pkey, it can no longer be deduced from parsing
> the device name by other kernel users.
>
> Signed-off-by: Mukesh Kacker <mukesh.kacker-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> Reviewed-by: Yuval Shaia <yuval.shaia-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> Reviewed-by: Chien-Hua Yen <chien.yen-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> Signed-off-by: Yuval Shaia <yuval.shaia-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> ---
>  Documentation/infiniband/ipoib.txt        |  12 ++
>  drivers/infiniband/ulp/ipoib/ipoib.h      |   3 +
>  drivers/infiniband/ulp/ipoib/ipoib_main.c | 187 ++++++++++++++++++++++++++++++
>  drivers/infiniband/ulp/ipoib/ipoib_vlan.c |  76 +++++++++++-
>  4 files changed, 272 insertions(+), 6 deletions(-)
>
> diff --git a/Documentation/infiniband/ipoib.txt b/Documentation/infiniband/ipoib.txt
> index 47c1dd9818f2..1db53c9b2906 100644
> --- a/Documentation/infiniband/ipoib.txt
> +++ b/Documentation/infiniband/ipoib.txt
> @@ -21,6 +21,18 @@ Partitions and P_Keys
>
>      echo 0x8001 > /sys/class/net/ib0/delete_child
>
> +  Interfaces with a user chosen name can be created in a similar
> +  manner with a different name and P_Key, by writing them into the
> +  main interface's /sys/class/net/<intf name>/create_named_child
> +  For example:
> +     echo "epart2 0x8002" > /sys/class/net/ib1/create_named_child
> +
> +   This will create an interfaces named epart2 with P_Key 0x8002 and
> +   parent ib1. To remove a named subinterface, use the
> +   "delete_named_child" file:
> +
> +     echo epart2 > /sys/class/net/ib1/delete_named_child

I doubt that delete_named_child is actually needed. You can use delete_child
on the pkey, which you used to create named child.

Maybe better to add support to rename child instead of introducing named
child concept?

Thanks

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] IB/ipoib: Enable pkey and device name decoupling
       [not found] ` <20170927093248.3819-1-yuval.shaia-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
  2017-09-27 14:14   ` Doug Ledford
  2017-09-27 15:01   ` Leon Romanovsky
@ 2017-09-27 17:20   ` Mukesh Kacker
       [not found]     ` <78f7983f-dc6b-a42a-0b29-adb69777fa75-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
  2017-09-28 16:34   ` Jason Gunthorpe
  3 siblings, 1 reply; 23+ messages in thread
From: Mukesh Kacker @ 2017-09-27 17:20 UTC (permalink / raw)
  To: Yuval Shaia, dledford-H+wXaHxf7aLQT0dZR+AlfA,
	sean.hefty-ral2JQCrhuEAvxtiuMwx3w,
	hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w, corbet-T1hC0tSOHrs,
	leon-DgEjT+Ai2ygdnm+yROfE0A, valex-VPRAkNaXOzVWk0Htik3J/w,
	erezsh-VPRAkNaXOzVWk0Htik3J/w,
	dasaratharaman.chandramouli-ral2JQCrhuEAvxtiuMwx3w,
	yanjun.zhu-QHcLZuEGTsvQT0dZR+AlfA, pabeni-H+wXaHxf7aLQT0dZR+AlfA,
	kernel-6AxghH7DbtA, ferasda-VPRAkNaXOzVWk0Htik3J/w,
	shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA,
	mingo-DgEjT+Ai2ygdnm+yROfE0A, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	chien.yen-QHcLZuEGTsvQT0dZR+AlfA

Please do retain original author name from UEK4 and that should be me! 
:-)  [ can be fixed by editing with "git commit --amend 
--author="<string>" ]

-Mukesh Kacker


On 09/27/2017 02:32 AM, Yuval Shaia wrote:
> The sysfs "create_child" interface creates pkey based child interface but
> derives the name from parent device name and pkey value.
> This makes administration difficult where pkey values can change but
> policies encoded with device names do not.
> 
> We add ability to create a child interface with a user specified name and a
> specified pkey with a new sysfs "create_named_child" interface (and also
> add a corresponding "delete_named_child" interface).
> 
> We also add a new module api interface to query pkey from a netdevice so
> any kernel users of pkey based child interfaces can query it - since with
> device name decoupled from pkey, it can no longer be deduced from parsing
> the device name by other kernel users.
> 
> Signed-off-by: Mukesh Kacker <mukesh.kacker-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> Reviewed-by: Yuval Shaia <yuval.shaia-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> Reviewed-by: Chien-Hua Yen <chien.yen-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> Signed-off-by: Yuval Shaia <yuval.shaia-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> ---
>   Documentation/infiniband/ipoib.txt        |  12 ++
>   drivers/infiniband/ulp/ipoib/ipoib.h      |   3 +
>   drivers/infiniband/ulp/ipoib/ipoib_main.c | 187 ++++++++++++++++++++++++++++++
>   drivers/infiniband/ulp/ipoib/ipoib_vlan.c |  76 +++++++++++-
>   4 files changed, 272 insertions(+), 6 deletions(-)
> 
> diff --git a/Documentation/infiniband/ipoib.txt b/Documentation/infiniband/ipoib.txt
> index 47c1dd9818f2..1db53c9b2906 100644
> --- a/Documentation/infiniband/ipoib.txt
> +++ b/Documentation/infiniband/ipoib.txt
> @@ -21,6 +21,18 @@ Partitions and P_Keys
>   
>       echo 0x8001 > /sys/class/net/ib0/delete_child
>   
> +  Interfaces with a user chosen name can be created in a similar
> +  manner with a different name and P_Key, by writing them into the
> +  main interface's /sys/class/net/<intf name>/create_named_child
> +  For example:
> +     echo "epart2 0x8002" > /sys/class/net/ib1/create_named_child
> +
> +   This will create an interfaces named epart2 with P_Key 0x8002 and
> +   parent ib1. To remove a named subinterface, use the
> +   "delete_named_child" file:
> +
> +     echo epart2 > /sys/class/net/ib1/delete_named_child
> +
>     The P_Key for any interface is given by the "pkey" file, and the
>     main interface for a subinterface is in "parent."
>   
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
> index 4a5c7a07a631..9d0010f9b324 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib.h
> +++ b/drivers/infiniband/ulp/ipoib/ipoib.h
> @@ -589,6 +589,9 @@ void ipoib_event(struct ib_event_handler *handler,
>   
>   int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey);
>   int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey);
> +int ipoib_named_vlan_add(struct net_device *pdev, unsigned short pkey,
> +			 char *child_name_buf);
> +int ipoib_named_vlan_delete(struct net_device *pdev, char *child_name_buf);
>   
>   int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
>   		     u16 pkey, int child_type);
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> index bac95b509a9b..2bdd4055d69f 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> @@ -34,6 +34,7 @@
>   
>   #include "ipoib.h"
>   
> +#include <linux/ctype.h>
>   #include <linux/module.h>
>   
>   #include <linux/init.h>
> @@ -136,6 +137,13 @@ static int ipoib_netdev_event(struct notifier_block *this,
>   }
>   #endif
>   
> +/*
> + * PKEY_HEXSTRING_MAXWIDTH - number of hex
> + *   digits needed to represent max width of
> + *   pkey value.
> + */
> +#define PKEY_HEXSTRING_MAXWIDTH 4
> +
>   int ipoib_open(struct net_device *dev)
>   {
>   	struct ipoib_dev_priv *priv = ipoib_priv(dev);
> @@ -2111,6 +2119,121 @@ static int ipoib_set_mac(struct net_device *dev, void *addr)
>   	return 0;
>   }
>   
> +/*
> + * Check if a buffer has name of the format
> + *
> + * <network-device-name>.<4hexcharacters>
> + * e.g. ib1.8004 etc.
> + *
> + * Such names are generated by create_child() by
> + * concatenating parent device with 16-bit pkey
> + * in hex, and disallowed from usage with
> + * create_named_child() interface.
> + *
> + */
> +static bool ipoib_disallowed_named_child_namespace(const char *buf)
> +{
> +	char localbuf[IFNAMSIZ];
> +	char *dotp = NULL;
> +	char *buf_before_dot = NULL;
> +	char *buf_after_dot = NULL;
> +	unsigned int ii;
> +
> +	memcpy(localbuf, buf, IFNAMSIZ);
> +	localbuf[IFNAMSIZ-1] = '\0'; /* paranoia! */
> +
> +	dotp = strnchr(localbuf, IFNAMSIZ, '.');
> +	/* no dot or dot at end! */
> +	if (dotp == NULL || dotp == localbuf+IFNAMSIZ-2)
> +		return false;
> +
> +	*dotp = '\0';		/* split buffer at "dot"  */
> +	buf_before_dot = localbuf;
> +	buf_after_dot = dotp + 1;
> +
> +	/*
> +	 * Check if buf_after_dot is hexstring of width
> +	 * that could be a pkey!
> +	 */
> +	if (strlen(buf_after_dot) != PKEY_HEXSTRING_MAXWIDTH)
> +		return false;
> +
> +	for (ii = 0; ii < PKEY_HEXSTRING_MAXWIDTH; ii++) {
> +		if (!isxdigit(buf_after_dot[ii]))
> +			return false;
> +	}
> +
> +	/*
> +	 * (1) buf_after_dot check above makes it valid hexdigit .XXXX format
> +	 *
> +	 * Now verify if buf_before_dot is a valid net device name -
> +	 * (if it is not, then we are not in disallowed namespace)
> +	 */
> +	if (__dev_get_by_name(&init_net, buf_before_dot) == NULL)
> +		return false;
> +
> +	/*
> +	 * (2) buf_before_dot is valid net device name
> +	 *    - reserved namespace is being used!
> +	 *
> +	 * Note: No check on netdev->type to be ARPHRD_INFINIBAND etc
> +	 *       We implicitly treat even misleading names such as eth1.XXXX
> +	 *       (ethernet device prefix) for child interface name of an
> +	 *       infiniband device as intrusion of reserved namespace!
> +	 */
> +	return true;
> +}
> +
> +static int parse_named_child(struct device *dev, const char *buf,
> +			     char *child_name_buf, int *pkeyp)
> +{
> +	int ret;
> +	struct ipoib_dev_priv *priv = ipoib_priv(to_net_dev(dev));
> +
> +	if (pkeyp)
> +		*pkeyp = -1;
> +
> +	/*
> +	 * First parameter is child interface name, after that
> +	 * 'pkey' is required if we were passed a pkey buffer
> +	 * (Note: From create_named_child, we are passed a pkey
> +	 * buffer to parse input, from delete_named_child we are
> +	 * not!)
> +	 * Note: IFNAMSIZ is 16, allowing for tail null
> +	 * we only scan 15 characters for name.
> +	 */
> +	if (pkeyp) {
> +		ret = sscanf(buf, "%15s %i", child_name_buf, pkeyp);
> +		if (ret != 2)
> +			return -EINVAL;
> +	} else {
> +		ret = sscanf(buf, "%15s", child_name_buf);
> +		if (ret != 1)
> +			return -EINVAL;
> +	}
> +
> +	if (strlen(child_name_buf) <= 0 || !dev_valid_name(child_name_buf))
> +		return -EINVAL;
> +
> +	if (pkeyp && (*pkeyp <= 0 || *pkeyp > 0xffff || *pkeyp == 0x8000))
> +		return -EINVAL;
> +
> +	if (ipoib_disallowed_named_child_namespace(child_name_buf)) {
> +		pr_warn("child name %s not allowed  to be used with create_named_child as it uses <network-device-name>.XXXX format reserved for create_child/delete_child interfaces!\n",
> +			child_name_buf);
> +		return -EINVAL;
> +	}
> +
> +	if (pkeyp)
> +		ipoib_dbg(priv, "%s inp %s out child_name_buf %s, pkey %04x\n",
> +			  __func__, buf, child_name_buf, *pkeyp);
> +	else
> +		ipoib_dbg(priv, "%s inp %s out child_name_buf %s\n", __func__,
> +			  buf, child_name_buf);
> +	return 0;
> +}
> +
> +
>   static ssize_t create_child(struct device *dev,
>   			    struct device_attribute *attr,
>   			    const char *buf, size_t count)
> @@ -2156,6 +2279,44 @@ static ssize_t delete_child(struct device *dev,
>   }
>   static DEVICE_ATTR(delete_child, S_IWUSR, NULL, delete_child);
>   
> +static ssize_t create_named_child(struct device *dev,
> +				  struct device_attribute *attr,
> +				  const char *buf, size_t count)
> +{
> +	int pkey;
> +	char child_name[IFNAMSIZ];
> +	int ret = 0;
> +
> +	child_name[0] = '\0';
> +
> +	if (parse_named_child(dev, buf, child_name, &pkey))
> +		return -EINVAL;
> +
> +	ret = ipoib_named_vlan_add(to_net_dev(dev), pkey, child_name);
> +	return ret ? ret : count;
> +}
> +static DEVICE_ATTR(create_named_child, S_IWUSR, NULL, create_named_child);
> +
> +static ssize_t delete_named_child(struct device *dev,
> +				  struct device_attribute *attr,
> +				  const char *buf, size_t count)
> +{
> +	char child_name[IFNAMSIZ];
> +	int ret = 0;
> +
> +	child_name[0] = '\0';
> +
> +	if (parse_named_child(dev, buf, child_name, NULL))
> +		return -EINVAL;
> +
> +	ret = ipoib_named_vlan_delete(to_net_dev(dev), child_name);
> +
> +	return ret ? ret : count;
> +
> +}
> +static DEVICE_ATTR(delete_named_child, S_IWUSR, NULL, delete_named_child);
> +
> +
>   int ipoib_add_pkey_attr(struct net_device *dev)
>   {
>   	return device_create_file(&dev->dev, &dev_attr_pkey);
> @@ -2263,6 +2424,11 @@ static struct net_device *ipoib_add_port(const char *format,
>   		goto sysfs_failed;
>   	if (device_create_file(&priv->dev->dev, &dev_attr_delete_child))
>   		goto sysfs_failed;
> +	if (device_create_file(&priv->dev->dev, &dev_attr_create_named_child))
> +		goto sysfs_failed;
> +	if (device_create_file(&priv->dev->dev, &dev_attr_delete_named_child))
> +		goto sysfs_failed;
> +
>   
>   	return priv->dev;
>   
> @@ -2367,6 +2533,27 @@ static struct notifier_block ipoib_netdev_notifier = {
>   };
>   #endif
>   
> +int
> +ipoib_get_netdev_pkey(struct net_device *dev, u16 *pkey)
> +{
> +	struct ipoib_dev_priv *priv;
> +
> +	if (dev->type != ARPHRD_INFINIBAND)
> +		return -EINVAL;
> +
> +	/* only for ipoib net devices! */
> +	if ((dev->netdev_ops != &ipoib_netdev_ops_pf) &&
> +	    (dev->netdev_ops != &ipoib_netdev_ops_vf))
> +		return -EINVAL;
> +
> +	priv = ipoib_priv(dev);
> +
> +	*pkey = priv->pkey;
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL(ipoib_get_netdev_pkey);
> +
>   static int __init ipoib_init_module(void)
>   {
>   	int ret;
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
> index 9927cd6b7082..f5ae55f4f845 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
> @@ -115,7 +115,9 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
>   	return result;
>   }
>   
> -int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
> +int ipoib_vlan_add_common(struct net_device *pdev,
> +			  unsigned short pkey,
> +			  char *child_name_buf)
>   {
>   	struct ipoib_dev_priv *ppriv, *priv;
>   	char intf_name[IFNAMSIZ];
> @@ -130,8 +132,21 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
>   	if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags))
>   		return -EPERM;
>   
> -	snprintf(intf_name, sizeof intf_name, "%s.%04x",
> -		 ppriv->dev->name, pkey);
> +	if (child_name_buf == NULL) {
> +		/*
> +		 * If child name is not provided, we generated
> +		 * one using name of parent and pkey.
> +		 */
> +		snprintf(intf_name, sizeof(intf_name), "%s.%04x",
> +			 ppriv->dev->name, pkey);
> +	} else {
> +		/*
> +		 * Note: Duplicate intf_name will be detected later in the code
> +		 * by register_netdevice() (inside __ipoib_vlan_add() call
> +		 * below) returning EEXIST!
> +		 */
> +		strncpy(intf_name, child_name_buf, IFNAMSIZ);
> +	}
>   
>   	if (!mutex_trylock(&ppriv->sysfs_mutex))
>   		return restart_syscall();
> @@ -183,10 +198,27 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
>   	return result;
>   }
>   
> -int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
> +int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
> +{
> +	return ipoib_vlan_add_common(pdev, pkey, NULL);
> +}
> +
> +int ipoib_named_vlan_add(struct net_device *pdev,
> +			 unsigned short pkey,
> +			 char *child_name_buf)
> +{
> +	return ipoib_vlan_add_common(pdev, pkey, child_name_buf);
> +}
> +
> +int ipoib_vlan_delete_common(struct net_device *pdev,
> +			     unsigned short pkey,
> +			     char *child_name_buf)
>   {
>   	struct ipoib_dev_priv *ppriv, *priv, *tpriv;
>   	struct net_device *dev = NULL;
> +	char gen_intf_name[IFNAMSIZ];
> +
> +	gen_intf_name[0] = '\0'; /* initialize - paranoia! */
>   
>   	if (!capable(CAP_NET_ADMIN))
>   		return -EPERM;
> @@ -205,9 +237,30 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
>   	}
>   
>   	down_write(&ppriv->vlan_rwsem);
> +	if (child_name_buf == NULL && ppriv->dev) {
> +		/*
> +		 * If child name is not provided, we generate the
> +		 * expected one using name of parent and pkey
> +		 * and use it in addition to pkey value
> +		 * (other children with same pkey may exist that have
> +		 * created by create_named_child() - we do not allow
> +		 * delete_child() to delete them - delete_named_child()
> +		 * has to be used!)
> +		 */
> +		snprintf(gen_intf_name, sizeof(gen_intf_name),
> +			 "%s.%04x", ppriv->dev->name, pkey);
> +	}
>   	list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
> -		if (priv->pkey == pkey &&
> -		    priv->child_type == IPOIB_LEGACY_CHILD) {
> +		if ((priv->child_type == IPOIB_LEGACY_CHILD) &&
> +		    /* user named child (match by name) OR */
> +		    ((child_name_buf && priv->dev &&
> +		      !strcmp(child_name_buf, priv->dev->name)) ||
> +		     /*
> +		      * OR classic (devname.hexpkey generated name) child
> +		      * (match by pkey and generated name)
> +		      */
> +		     (!child_name_buf && priv->pkey == pkey &&
> +		      priv->dev && !strcmp(gen_intf_name, priv->dev->name)))) {
>   			list_del(&priv->list);
>   			dev = priv->dev;
>   			break;
> @@ -231,3 +284,14 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
>   
>   	return -ENODEV;
>   }
> +
> +int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
> +{
> +
> +	return ipoib_vlan_delete_common(pdev, pkey, NULL);
> +}
> +
> +int ipoib_named_vlan_delete(struct net_device *pdev, char *child_name_buf)
> +{
> +	return ipoib_vlan_delete_common(pdev, 0, child_name_buf);
> +}
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] IB/ipoib: Enable pkey and device name decoupling
       [not found]     ` <78f7983f-dc6b-a42a-0b29-adb69777fa75-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
@ 2017-09-27 17:26       ` Yuval Shaia
  0 siblings, 0 replies; 23+ messages in thread
From: Yuval Shaia @ 2017-09-27 17:26 UTC (permalink / raw)
  To: Mukesh Kacker
  Cc: dledford-H+wXaHxf7aLQT0dZR+AlfA,
	sean.hefty-ral2JQCrhuEAvxtiuMwx3w,
	hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w, corbet-T1hC0tSOHrs,
	leon-DgEjT+Ai2ygdnm+yROfE0A, valex-VPRAkNaXOzVWk0Htik3J/w,
	erezsh-VPRAkNaXOzVWk0Htik3J/w,
	dasaratharaman.chandramouli-ral2JQCrhuEAvxtiuMwx3w,
	yanjun.zhu-QHcLZuEGTsvQT0dZR+AlfA, pabeni-H+wXaHxf7aLQT0dZR+AlfA,
	kernel-6AxghH7DbtA, ferasda-VPRAkNaXOzVWk0Htik3J/w,
	shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA,
	mingo-DgEjT+Ai2ygdnm+yROfE0A, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	chien.yen-QHcLZuEGTsvQT0dZR+AlfA

On Wed, Sep 27, 2017 at 10:20:20AM -0700, Mukesh Kacker wrote:
> Please do retain original author name from UEK4 and that should be me! :-)

Oops,
Will be fixed for v1

> [ can be fixed by editing with "git commit --amend --author="<string>" ]
> 
> -Mukesh Kacker
> 
> 
> On 09/27/2017 02:32 AM, Yuval Shaia wrote:
> > The sysfs "create_child" interface creates pkey based child interface but
> > derives the name from parent device name and pkey value.
> > This makes administration difficult where pkey values can change but
> > policies encoded with device names do not.
> > 
> > We add ability to create a child interface with a user specified name and a
> > specified pkey with a new sysfs "create_named_child" interface (and also
> > add a corresponding "delete_named_child" interface).
> > 
> > We also add a new module api interface to query pkey from a netdevice so
> > any kernel users of pkey based child interfaces can query it - since with
> > device name decoupled from pkey, it can no longer be deduced from parsing
> > the device name by other kernel users.
> > 
> > Signed-off-by: Mukesh Kacker <mukesh.kacker-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> > Reviewed-by: Yuval Shaia <yuval.shaia-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> > Reviewed-by: Chien-Hua Yen <chien.yen-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> > Signed-off-by: Yuval Shaia <yuval.shaia-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> > ---
> >   Documentation/infiniband/ipoib.txt        |  12 ++
> >   drivers/infiniband/ulp/ipoib/ipoib.h      |   3 +
> >   drivers/infiniband/ulp/ipoib/ipoib_main.c | 187 ++++++++++++++++++++++++++++++
> >   drivers/infiniband/ulp/ipoib/ipoib_vlan.c |  76 +++++++++++-
> >   4 files changed, 272 insertions(+), 6 deletions(-)
> > 
> > diff --git a/Documentation/infiniband/ipoib.txt b/Documentation/infiniband/ipoib.txt
> > index 47c1dd9818f2..1db53c9b2906 100644
> > --- a/Documentation/infiniband/ipoib.txt
> > +++ b/Documentation/infiniband/ipoib.txt
> > @@ -21,6 +21,18 @@ Partitions and P_Keys
> >       echo 0x8001 > /sys/class/net/ib0/delete_child
> > +  Interfaces with a user chosen name can be created in a similar
> > +  manner with a different name and P_Key, by writing them into the
> > +  main interface's /sys/class/net/<intf name>/create_named_child
> > +  For example:
> > +     echo "epart2 0x8002" > /sys/class/net/ib1/create_named_child
> > +
> > +   This will create an interfaces named epart2 with P_Key 0x8002 and
> > +   parent ib1. To remove a named subinterface, use the
> > +   "delete_named_child" file:
> > +
> > +     echo epart2 > /sys/class/net/ib1/delete_named_child
> > +
> >     The P_Key for any interface is given by the "pkey" file, and the
> >     main interface for a subinterface is in "parent."
> > diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
> > index 4a5c7a07a631..9d0010f9b324 100644
> > --- a/drivers/infiniband/ulp/ipoib/ipoib.h
> > +++ b/drivers/infiniband/ulp/ipoib/ipoib.h
> > @@ -589,6 +589,9 @@ void ipoib_event(struct ib_event_handler *handler,
> >   int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey);
> >   int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey);
> > +int ipoib_named_vlan_add(struct net_device *pdev, unsigned short pkey,
> > +			 char *child_name_buf);
> > +int ipoib_named_vlan_delete(struct net_device *pdev, char *child_name_buf);
> >   int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
> >   		     u16 pkey, int child_type);
> > diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> > index bac95b509a9b..2bdd4055d69f 100644
> > --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
> > +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> > @@ -34,6 +34,7 @@
> >   #include "ipoib.h"
> > +#include <linux/ctype.h>
> >   #include <linux/module.h>
> >   #include <linux/init.h>
> > @@ -136,6 +137,13 @@ static int ipoib_netdev_event(struct notifier_block *this,
> >   }
> >   #endif
> > +/*
> > + * PKEY_HEXSTRING_MAXWIDTH - number of hex
> > + *   digits needed to represent max width of
> > + *   pkey value.
> > + */
> > +#define PKEY_HEXSTRING_MAXWIDTH 4
> > +
> >   int ipoib_open(struct net_device *dev)
> >   {
> >   	struct ipoib_dev_priv *priv = ipoib_priv(dev);
> > @@ -2111,6 +2119,121 @@ static int ipoib_set_mac(struct net_device *dev, void *addr)
> >   	return 0;
> >   }
> > +/*
> > + * Check if a buffer has name of the format
> > + *
> > + * <network-device-name>.<4hexcharacters>
> > + * e.g. ib1.8004 etc.
> > + *
> > + * Such names are generated by create_child() by
> > + * concatenating parent device with 16-bit pkey
> > + * in hex, and disallowed from usage with
> > + * create_named_child() interface.
> > + *
> > + */
> > +static bool ipoib_disallowed_named_child_namespace(const char *buf)
> > +{
> > +	char localbuf[IFNAMSIZ];
> > +	char *dotp = NULL;
> > +	char *buf_before_dot = NULL;
> > +	char *buf_after_dot = NULL;
> > +	unsigned int ii;
> > +
> > +	memcpy(localbuf, buf, IFNAMSIZ);
> > +	localbuf[IFNAMSIZ-1] = '\0'; /* paranoia! */
> > +
> > +	dotp = strnchr(localbuf, IFNAMSIZ, '.');
> > +	/* no dot or dot at end! */
> > +	if (dotp == NULL || dotp == localbuf+IFNAMSIZ-2)
> > +		return false;
> > +
> > +	*dotp = '\0';		/* split buffer at "dot"  */
> > +	buf_before_dot = localbuf;
> > +	buf_after_dot = dotp + 1;
> > +
> > +	/*
> > +	 * Check if buf_after_dot is hexstring of width
> > +	 * that could be a pkey!
> > +	 */
> > +	if (strlen(buf_after_dot) != PKEY_HEXSTRING_MAXWIDTH)
> > +		return false;
> > +
> > +	for (ii = 0; ii < PKEY_HEXSTRING_MAXWIDTH; ii++) {
> > +		if (!isxdigit(buf_after_dot[ii]))
> > +			return false;
> > +	}
> > +
> > +	/*
> > +	 * (1) buf_after_dot check above makes it valid hexdigit .XXXX format
> > +	 *
> > +	 * Now verify if buf_before_dot is a valid net device name -
> > +	 * (if it is not, then we are not in disallowed namespace)
> > +	 */
> > +	if (__dev_get_by_name(&init_net, buf_before_dot) == NULL)
> > +		return false;
> > +
> > +	/*
> > +	 * (2) buf_before_dot is valid net device name
> > +	 *    - reserved namespace is being used!
> > +	 *
> > +	 * Note: No check on netdev->type to be ARPHRD_INFINIBAND etc
> > +	 *       We implicitly treat even misleading names such as eth1.XXXX
> > +	 *       (ethernet device prefix) for child interface name of an
> > +	 *       infiniband device as intrusion of reserved namespace!
> > +	 */
> > +	return true;
> > +}
> > +
> > +static int parse_named_child(struct device *dev, const char *buf,
> > +			     char *child_name_buf, int *pkeyp)
> > +{
> > +	int ret;
> > +	struct ipoib_dev_priv *priv = ipoib_priv(to_net_dev(dev));
> > +
> > +	if (pkeyp)
> > +		*pkeyp = -1;
> > +
> > +	/*
> > +	 * First parameter is child interface name, after that
> > +	 * 'pkey' is required if we were passed a pkey buffer
> > +	 * (Note: From create_named_child, we are passed a pkey
> > +	 * buffer to parse input, from delete_named_child we are
> > +	 * not!)
> > +	 * Note: IFNAMSIZ is 16, allowing for tail null
> > +	 * we only scan 15 characters for name.
> > +	 */
> > +	if (pkeyp) {
> > +		ret = sscanf(buf, "%15s %i", child_name_buf, pkeyp);
> > +		if (ret != 2)
> > +			return -EINVAL;
> > +	} else {
> > +		ret = sscanf(buf, "%15s", child_name_buf);
> > +		if (ret != 1)
> > +			return -EINVAL;
> > +	}
> > +
> > +	if (strlen(child_name_buf) <= 0 || !dev_valid_name(child_name_buf))
> > +		return -EINVAL;
> > +
> > +	if (pkeyp && (*pkeyp <= 0 || *pkeyp > 0xffff || *pkeyp == 0x8000))
> > +		return -EINVAL;
> > +
> > +	if (ipoib_disallowed_named_child_namespace(child_name_buf)) {
> > +		pr_warn("child name %s not allowed  to be used with create_named_child as it uses <network-device-name>.XXXX format reserved for create_child/delete_child interfaces!\n",
> > +			child_name_buf);
> > +		return -EINVAL;
> > +	}
> > +
> > +	if (pkeyp)
> > +		ipoib_dbg(priv, "%s inp %s out child_name_buf %s, pkey %04x\n",
> > +			  __func__, buf, child_name_buf, *pkeyp);
> > +	else
> > +		ipoib_dbg(priv, "%s inp %s out child_name_buf %s\n", __func__,
> > +			  buf, child_name_buf);
> > +	return 0;
> > +}
> > +
> > +
> >   static ssize_t create_child(struct device *dev,
> >   			    struct device_attribute *attr,
> >   			    const char *buf, size_t count)
> > @@ -2156,6 +2279,44 @@ static ssize_t delete_child(struct device *dev,
> >   }
> >   static DEVICE_ATTR(delete_child, S_IWUSR, NULL, delete_child);
> > +static ssize_t create_named_child(struct device *dev,
> > +				  struct device_attribute *attr,
> > +				  const char *buf, size_t count)
> > +{
> > +	int pkey;
> > +	char child_name[IFNAMSIZ];
> > +	int ret = 0;
> > +
> > +	child_name[0] = '\0';
> > +
> > +	if (parse_named_child(dev, buf, child_name, &pkey))
> > +		return -EINVAL;
> > +
> > +	ret = ipoib_named_vlan_add(to_net_dev(dev), pkey, child_name);
> > +	return ret ? ret : count;
> > +}
> > +static DEVICE_ATTR(create_named_child, S_IWUSR, NULL, create_named_child);
> > +
> > +static ssize_t delete_named_child(struct device *dev,
> > +				  struct device_attribute *attr,
> > +				  const char *buf, size_t count)
> > +{
> > +	char child_name[IFNAMSIZ];
> > +	int ret = 0;
> > +
> > +	child_name[0] = '\0';
> > +
> > +	if (parse_named_child(dev, buf, child_name, NULL))
> > +		return -EINVAL;
> > +
> > +	ret = ipoib_named_vlan_delete(to_net_dev(dev), child_name);
> > +
> > +	return ret ? ret : count;
> > +
> > +}
> > +static DEVICE_ATTR(delete_named_child, S_IWUSR, NULL, delete_named_child);
> > +
> > +
> >   int ipoib_add_pkey_attr(struct net_device *dev)
> >   {
> >   	return device_create_file(&dev->dev, &dev_attr_pkey);
> > @@ -2263,6 +2424,11 @@ static struct net_device *ipoib_add_port(const char *format,
> >   		goto sysfs_failed;
> >   	if (device_create_file(&priv->dev->dev, &dev_attr_delete_child))
> >   		goto sysfs_failed;
> > +	if (device_create_file(&priv->dev->dev, &dev_attr_create_named_child))
> > +		goto sysfs_failed;
> > +	if (device_create_file(&priv->dev->dev, &dev_attr_delete_named_child))
> > +		goto sysfs_failed;
> > +
> >   	return priv->dev;
> > @@ -2367,6 +2533,27 @@ static struct notifier_block ipoib_netdev_notifier = {
> >   };
> >   #endif
> > +int
> > +ipoib_get_netdev_pkey(struct net_device *dev, u16 *pkey)
> > +{
> > +	struct ipoib_dev_priv *priv;
> > +
> > +	if (dev->type != ARPHRD_INFINIBAND)
> > +		return -EINVAL;
> > +
> > +	/* only for ipoib net devices! */
> > +	if ((dev->netdev_ops != &ipoib_netdev_ops_pf) &&
> > +	    (dev->netdev_ops != &ipoib_netdev_ops_vf))
> > +		return -EINVAL;
> > +
> > +	priv = ipoib_priv(dev);
> > +
> > +	*pkey = priv->pkey;
> > +
> > +	return 0;
> > +}
> > +EXPORT_SYMBOL(ipoib_get_netdev_pkey);
> > +
> >   static int __init ipoib_init_module(void)
> >   {
> >   	int ret;
> > diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
> > index 9927cd6b7082..f5ae55f4f845 100644
> > --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
> > +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
> > @@ -115,7 +115,9 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
> >   	return result;
> >   }
> > -int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
> > +int ipoib_vlan_add_common(struct net_device *pdev,
> > +			  unsigned short pkey,
> > +			  char *child_name_buf)
> >   {
> >   	struct ipoib_dev_priv *ppriv, *priv;
> >   	char intf_name[IFNAMSIZ];
> > @@ -130,8 +132,21 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
> >   	if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags))
> >   		return -EPERM;
> > -	snprintf(intf_name, sizeof intf_name, "%s.%04x",
> > -		 ppriv->dev->name, pkey);
> > +	if (child_name_buf == NULL) {
> > +		/*
> > +		 * If child name is not provided, we generated
> > +		 * one using name of parent and pkey.
> > +		 */
> > +		snprintf(intf_name, sizeof(intf_name), "%s.%04x",
> > +			 ppriv->dev->name, pkey);
> > +	} else {
> > +		/*
> > +		 * Note: Duplicate intf_name will be detected later in the code
> > +		 * by register_netdevice() (inside __ipoib_vlan_add() call
> > +		 * below) returning EEXIST!
> > +		 */
> > +		strncpy(intf_name, child_name_buf, IFNAMSIZ);
> > +	}
> >   	if (!mutex_trylock(&ppriv->sysfs_mutex))
> >   		return restart_syscall();
> > @@ -183,10 +198,27 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
> >   	return result;
> >   }
> > -int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
> > +int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
> > +{
> > +	return ipoib_vlan_add_common(pdev, pkey, NULL);
> > +}
> > +
> > +int ipoib_named_vlan_add(struct net_device *pdev,
> > +			 unsigned short pkey,
> > +			 char *child_name_buf)
> > +{
> > +	return ipoib_vlan_add_common(pdev, pkey, child_name_buf);
> > +}
> > +
> > +int ipoib_vlan_delete_common(struct net_device *pdev,
> > +			     unsigned short pkey,
> > +			     char *child_name_buf)
> >   {
> >   	struct ipoib_dev_priv *ppriv, *priv, *tpriv;
> >   	struct net_device *dev = NULL;
> > +	char gen_intf_name[IFNAMSIZ];
> > +
> > +	gen_intf_name[0] = '\0'; /* initialize - paranoia! */
> >   	if (!capable(CAP_NET_ADMIN))
> >   		return -EPERM;
> > @@ -205,9 +237,30 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
> >   	}
> >   	down_write(&ppriv->vlan_rwsem);
> > +	if (child_name_buf == NULL && ppriv->dev) {
> > +		/*
> > +		 * If child name is not provided, we generate the
> > +		 * expected one using name of parent and pkey
> > +		 * and use it in addition to pkey value
> > +		 * (other children with same pkey may exist that have
> > +		 * created by create_named_child() - we do not allow
> > +		 * delete_child() to delete them - delete_named_child()
> > +		 * has to be used!)
> > +		 */
> > +		snprintf(gen_intf_name, sizeof(gen_intf_name),
> > +			 "%s.%04x", ppriv->dev->name, pkey);
> > +	}
> >   	list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
> > -		if (priv->pkey == pkey &&
> > -		    priv->child_type == IPOIB_LEGACY_CHILD) {
> > +		if ((priv->child_type == IPOIB_LEGACY_CHILD) &&
> > +		    /* user named child (match by name) OR */
> > +		    ((child_name_buf && priv->dev &&
> > +		      !strcmp(child_name_buf, priv->dev->name)) ||
> > +		     /*
> > +		      * OR classic (devname.hexpkey generated name) child
> > +		      * (match by pkey and generated name)
> > +		      */
> > +		     (!child_name_buf && priv->pkey == pkey &&
> > +		      priv->dev && !strcmp(gen_intf_name, priv->dev->name)))) {
> >   			list_del(&priv->list);
> >   			dev = priv->dev;
> >   			break;
> > @@ -231,3 +284,14 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
> >   	return -ENODEV;
> >   }
> > +
> > +int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
> > +{
> > +
> > +	return ipoib_vlan_delete_common(pdev, pkey, NULL);
> > +}
> > +
> > +int ipoib_named_vlan_delete(struct net_device *pdev, char *child_name_buf)
> > +{
> > +	return ipoib_vlan_delete_common(pdev, 0, child_name_buf);
> > +}
> > 
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] IB/ipoib: Enable pkey and device name decoupling
       [not found]     ` <20170927150140.GF2297-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
@ 2017-09-27 19:03       ` Mukesh Kacker
       [not found]         ` <c98fc5c0-cb34-c960-4bbf-34422bbacd0e-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Mukesh Kacker @ 2017-09-27 19:03 UTC (permalink / raw)
  To: Leon Romanovsky, Yuval Shaia
  Cc: dledford-H+wXaHxf7aLQT0dZR+AlfA,
	sean.hefty-ral2JQCrhuEAvxtiuMwx3w,
	hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w, corbet-T1hC0tSOHrs,
	valex-VPRAkNaXOzVWk0Htik3J/w, erezsh-VPRAkNaXOzVWk0Htik3J/w,
	dasaratharaman.chandramouli-ral2JQCrhuEAvxtiuMwx3w,
	yanjun.zhu-QHcLZuEGTsvQT0dZR+AlfA, pabeni-H+wXaHxf7aLQT0dZR+AlfA,
	kernel-6AxghH7DbtA, ferasda-VPRAkNaXOzVWk0Htik3J/w,
	shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA,
	mingo-DgEjT+Ai2ygdnm+yROfE0A, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	chien.yen-QHcLZuEGTsvQT0dZR+AlfA

On 09/27/2017 08:01 AM, Leon Romanovsky wrote:
> On Wed, Sep 27, 2017 at 12:32:48PM +0300, Yuval Shaia wrote:
>> The sysfs "create_child" interface creates pkey based child interface but
>> derives the name from parent device name and pkey value.
>> This makes administration difficult where pkey values can change but
>> policies encoded with device names do not.
>>
>> We add ability to create a child interface with a user specified name and a
>> specified pkey with a new sysfs "create_named_child" interface (and also
>> add a corresponding "delete_named_child" interface).
>>
>> We also add a new module api interface to query pkey from a netdevice so
>> any kernel users of pkey based child interfaces can query it - since with
>> device name decoupled from pkey, it can no longer be deduced from parsing
>> the device name by other kernel users.
>>
>> Signed-off-by: Mukesh Kacker <mukesh.kacker-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
>> Reviewed-by: Yuval Shaia <yuval.shaia-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
>> Reviewed-by: Chien-Hua Yen <chien.yen-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
>> Signed-off-by: Yuval Shaia <yuval.shaia-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
>> ---
>>   Documentation/infiniband/ipoib.txt        |  12 ++
>>   drivers/infiniband/ulp/ipoib/ipoib.h      |   3 +
>>   drivers/infiniband/ulp/ipoib/ipoib_main.c | 187 ++++++++++++++++++++++++++++++
>>   drivers/infiniband/ulp/ipoib/ipoib_vlan.c |  76 +++++++++++-
>>   4 files changed, 272 insertions(+), 6 deletions(-)
>>
>> diff --git a/Documentation/infiniband/ipoib.txt b/Documentation/infiniband/ipoib.txt
>> index 47c1dd9818f2..1db53c9b2906 100644
>> --- a/Documentation/infiniband/ipoib.txt
>> +++ b/Documentation/infiniband/ipoib.txt
>> @@ -21,6 +21,18 @@ Partitions and P_Keys
>>
>>       echo 0x8001 > /sys/class/net/ib0/delete_child
>>
>> +  Interfaces with a user chosen name can be created in a similar
>> +  manner with a different name and P_Key, by writing them into the
>> +  main interface's /sys/class/net/<intf name>/create_named_child
>> +  For example:
>> +     echo "epart2 0x8002" > /sys/class/net/ib1/create_named_child
>> +
>> +   This will create an interfaces named epart2 with P_Key 0x8002 and
>> +   parent ib1. To remove a named subinterface, use the
>> +   "delete_named_child" file:
>> +
>> +     echo epart2 > /sys/class/net/ib1/delete_named_child
> 
> I doubt that delete_named_child is actually needed. You can use delete_child
> on the pkey, which you used to create named child.
> 
> Maybe better to add support to rename child instead of introducing named
> child concept?
> 
> Thanks
> 


I can offer a slightly indirect answer to justify the current interface 
by providing the background behind the requirements for this change.

The requirement for this change had come from the desire for ease of 
writing management tools and facilitate "renumbering" of pkeys as IB 
network clouds are reconfigured.

The renumbering still requires the name-value pair (e.g. PKEY_ID=<n>) to 
be propagated to hosts configurations, but having the pkey embeded in 
device name was introducing complexity as various sysadmin scripts and 
other things need to pick it up.

Having devices with names like ib0.datanet, ib1.cellnet or any other 
ib<N>.<string> simplifies that life of people designing the management 
tools for networks and integrating them for the use case of renumbering 
of pkeys.

Probably many future redesigns are possible, but for this tweak of the 
existing sysfs "create_child" interface, a rename child may not be the 
best variant if it requires using device name with pkey values at any 
stage in the use case. Same for delete_named_child.

Also, some related trivia - which I would not use to justify this design 
but can explain why certain things were done.

In ancient kernels like 2.6.39 (still widely used by our customers :-) ) 
where this was implemented first, it was possible to create multiple 
child interfaces with same pkey value through variants, so a delete 
interface just using pkey would have been ambiguous (probably not true 
in current kernels!).

Another trivia: We also have an accompanying change diffs to the script 
usually installed as /etc/sysconfig/network-scripts/ifup-ib and part of 
startup scripts (usually in RHEL and related distributions) which uses 
"create_child" and was enhanced to allow both "create_child" and 
"create_named_child" - if these changes are accepted, those changes 
should also be presented to the appropriate upstream for those scripts.

-Mukesh Kacker
mukesh.kacker-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] IB/ipoib: Enable pkey and device name decoupling
       [not found]         ` <c98fc5c0-cb34-c960-4bbf-34422bbacd0e-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
@ 2017-09-28 13:45           ` Leon Romanovsky
  2017-09-28 16:38           ` Jason Gunthorpe
  1 sibling, 0 replies; 23+ messages in thread
From: Leon Romanovsky @ 2017-09-28 13:45 UTC (permalink / raw)
  To: Mukesh Kacker
  Cc: Yuval Shaia, dledford-H+wXaHxf7aLQT0dZR+AlfA,
	sean.hefty-ral2JQCrhuEAvxtiuMwx3w,
	hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w, corbet-T1hC0tSOHrs,
	valex-VPRAkNaXOzVWk0Htik3J/w, erezsh-VPRAkNaXOzVWk0Htik3J/w,
	dasaratharaman.chandramouli-ral2JQCrhuEAvxtiuMwx3w,
	yanjun.zhu-QHcLZuEGTsvQT0dZR+AlfA, pabeni-H+wXaHxf7aLQT0dZR+AlfA,
	kernel-6AxghH7DbtA, ferasda-VPRAkNaXOzVWk0Htik3J/w,
	shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA,
	mingo-DgEjT+Ai2ygdnm+yROfE0A, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	chien.yen-QHcLZuEGTsvQT0dZR+AlfA

[-- Attachment #1: Type: text/plain, Size: 5277 bytes --]

On Wed, Sep 27, 2017 at 12:03:40PM -0700, Mukesh Kacker wrote:
> On 09/27/2017 08:01 AM, Leon Romanovsky wrote:
> > On Wed, Sep 27, 2017 at 12:32:48PM +0300, Yuval Shaia wrote:
> > > The sysfs "create_child" interface creates pkey based child interface but
> > > derives the name from parent device name and pkey value.
> > > This makes administration difficult where pkey values can change but
> > > policies encoded with device names do not.
> > >
> > > We add ability to create a child interface with a user specified name and a
> > > specified pkey with a new sysfs "create_named_child" interface (and also
> > > add a corresponding "delete_named_child" interface).
> > >
> > > We also add a new module api interface to query pkey from a netdevice so
> > > any kernel users of pkey based child interfaces can query it - since with
> > > device name decoupled from pkey, it can no longer be deduced from parsing
> > > the device name by other kernel users.
> > >
> > > Signed-off-by: Mukesh Kacker <mukesh.kacker-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> > > Reviewed-by: Yuval Shaia <yuval.shaia-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> > > Reviewed-by: Chien-Hua Yen <chien.yen-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> > > Signed-off-by: Yuval Shaia <yuval.shaia-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> > > ---
> > >   Documentation/infiniband/ipoib.txt        |  12 ++
> > >   drivers/infiniband/ulp/ipoib/ipoib.h      |   3 +
> > >   drivers/infiniband/ulp/ipoib/ipoib_main.c | 187 ++++++++++++++++++++++++++++++
> > >   drivers/infiniband/ulp/ipoib/ipoib_vlan.c |  76 +++++++++++-
> > >   4 files changed, 272 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/Documentation/infiniband/ipoib.txt b/Documentation/infiniband/ipoib.txt
> > > index 47c1dd9818f2..1db53c9b2906 100644
> > > --- a/Documentation/infiniband/ipoib.txt
> > > +++ b/Documentation/infiniband/ipoib.txt
> > > @@ -21,6 +21,18 @@ Partitions and P_Keys
> > >
> > >       echo 0x8001 > /sys/class/net/ib0/delete_child
> > >
> > > +  Interfaces with a user chosen name can be created in a similar
> > > +  manner with a different name and P_Key, by writing them into the
> > > +  main interface's /sys/class/net/<intf name>/create_named_child
> > > +  For example:
> > > +     echo "epart2 0x8002" > /sys/class/net/ib1/create_named_child
> > > +
> > > +   This will create an interfaces named epart2 with P_Key 0x8002 and
> > > +   parent ib1. To remove a named subinterface, use the
> > > +   "delete_named_child" file:
> > > +
> > > +     echo epart2 > /sys/class/net/ib1/delete_named_child
> >
> > I doubt that delete_named_child is actually needed. You can use delete_child
> > on the pkey, which you used to create named child.
> >
> > Maybe better to add support to rename child instead of introducing named
> > child concept?
> >
> > Thanks
> >
>
>
> I can offer a slightly indirect answer to justify the current interface by
> providing the background behind the requirements for this change.
>
> The requirement for this change had come from the desire for ease of writing
> management tools and facilitate "renumbering" of pkeys as IB network clouds
> are reconfigured.
>
> The renumbering still requires the name-value pair (e.g. PKEY_ID=<n>) to be
> propagated to hosts configurations, but having the pkey embeded in device
> name was introducing complexity as various sysadmin scripts and other things
> need to pick it up.
>
> Having devices with names like ib0.datanet, ib1.cellnet or any other
> ib<N>.<string> simplifies that life of people designing the management tools
> for networks and integrating them for the use case of renumbering of pkeys.
>
> Probably many future redesigns are possible, but for this tweak of the
> existing sysfs "create_child" interface, a rename child may not be the best
> variant if it requires using device name with pkey values at any stage in
> the use case. Same for delete_named_child.

I'm not the IPoIB expert, but I see ipoib_netlink.c which uses netdev
stable index and can be easily extended without addition of new sysfs
model to allow rename from ip tool. I'm aware of many management tools
which uses directly netlink interface to configure network devices.

Did you see it?

>
> Also, some related trivia - which I would not use to justify this design but
> can explain why certain things were done.
>
> In ancient kernels like 2.6.39 (still widely used by our customers :-) )
> where this was implemented first, it was possible to create multiple child
> interfaces with same pkey value through variants, so a delete interface just
> using pkey would have been ambiguous (probably not true in current
> kernels!).
>
> Another trivia: We also have an accompanying change diffs to the script
> usually installed as /etc/sysconfig/network-scripts/ifup-ib and part of
> startup scripts (usually in RHEL and related distributions) which uses
> "create_child" and was enhanced to allow both "create_child" and
> "create_named_child" - if these changes are accepted, those changes should
> also be presented to the appropriate upstream for those scripts.

Those "trivia" are not relevant for any modern distribution and looks
like specific to ancient RHELs.

>
> -Mukesh Kacker
> mukesh.kacker-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] IB/ipoib: Enable pkey and device name decoupling
       [not found] ` <20170927093248.3819-1-yuval.shaia-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
                     ` (2 preceding siblings ...)
  2017-09-27 17:20   ` Mukesh Kacker
@ 2017-09-28 16:34   ` Jason Gunthorpe
       [not found]     ` <20170928163406.GB17880-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  3 siblings, 1 reply; 23+ messages in thread
From: Jason Gunthorpe @ 2017-09-28 16:34 UTC (permalink / raw)
  To: Yuval Shaia
  Cc: dledford-H+wXaHxf7aLQT0dZR+AlfA,
	sean.hefty-ral2JQCrhuEAvxtiuMwx3w,
	hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w, corbet-T1hC0tSOHrs,
	leon-DgEjT+Ai2ygdnm+yROfE0A, valex-VPRAkNaXOzVWk0Htik3J/w,
	erezsh-VPRAkNaXOzVWk0Htik3J/w,
	dasaratharaman.chandramouli-ral2JQCrhuEAvxtiuMwx3w,
	yanjun.zhu-QHcLZuEGTsvQT0dZR+AlfA, pabeni-H+wXaHxf7aLQT0dZR+AlfA,
	kernel-6AxghH7DbtA, ferasda-VPRAkNaXOzVWk0Htik3J/w,
	shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA,
	mingo-DgEjT+Ai2ygdnm+yROfE0A, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	mukesh.kacker-QHcLZuEGTsvQT0dZR+AlfA,
	chien.yen-QHcLZuEGTsvQT0dZR+AlfA

On Wed, Sep 27, 2017 at 12:32:48PM +0300, Yuval Shaia wrote:
> The sysfs "create_child" interface creates pkey based child interface but
> derives the name from parent device name and pkey value.
> This makes administration difficult where pkey values can change but
> policies encoded with device names do not.
> 
> We add ability to create a child interface with a user specified name and a
> specified pkey with a new sysfs "create_named_child" interface (and also
> add a corresponding "delete_named_child" interface).
> 
> We also add a new module api interface to query pkey from a netdevice so
> any kernel users of pkey based child interfaces can query it - since with
> device name decoupled from pkey, it can no longer be deduced from parsing
> the device name by other kernel users.

This should all use netlink these days, not more sysfs files.

Leon? What do you think about using rdmatool to provide a command line
for creating ipoib children?

> +  Interfaces with a user chosen name can be created in a similar
> +  manner with a different name and P_Key, by writing them into the
> +  main interface's /sys/class/net/<intf name>/create_named_child
> +  For example:
> +     echo "epart2 0x8002" > /sys/class/net/ib1/create_named_child

'multi-value' sysfs files like this are categorically banned by Greg.

Any kind of configuration in sysfs is really frowned on these days.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] IB/ipoib: Enable pkey and device name decoupling
       [not found]         ` <c98fc5c0-cb34-c960-4bbf-34422bbacd0e-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
  2017-09-28 13:45           ` Leon Romanovsky
@ 2017-09-28 16:38           ` Jason Gunthorpe
  1 sibling, 0 replies; 23+ messages in thread
From: Jason Gunthorpe @ 2017-09-28 16:38 UTC (permalink / raw)
  To: Mukesh Kacker
  Cc: Leon Romanovsky, Yuval Shaia, dledford-H+wXaHxf7aLQT0dZR+AlfA,
	sean.hefty-ral2JQCrhuEAvxtiuMwx3w,
	hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w, corbet-T1hC0tSOHrs,
	valex-VPRAkNaXOzVWk0Htik3J/w, erezsh-VPRAkNaXOzVWk0Htik3J/w,
	dasaratharaman.chandramouli-ral2JQCrhuEAvxtiuMwx3w,
	yanjun.zhu-QHcLZuEGTsvQT0dZR+AlfA, pabeni-H+wXaHxf7aLQT0dZR+AlfA,
	kernel-6AxghH7DbtA, ferasda-VPRAkNaXOzVWk0Htik3J/w,
	shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA,
	mingo-DgEjT+Ai2ygdnm+yROfE0A, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	chien.yen-QHcLZuEGTsvQT0dZR+AlfA

On Wed, Sep 27, 2017 at 12:03:40PM -0700, Mukesh Kacker wrote:

> Having devices with names like ib0.datanet, ib1.cellnet or any other
> ib<N>.<string> simplifies that life of people designing the management tools
> for networks and integrating them for the use case of renumbering of pkeys.

You should already be able to rename ipoib devices via 'ip link set name' - why
didn't you use that to get your names?

Do you need atomicity for some reason??

> variant if it requires using device name with pkey values at any stage in
> the use case. Same for delete_named_child.

delete_named_child is ugly, it should be a netlink command, and it
should use a ifindex, not name.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] IB/ipoib: Enable pkey and device name decoupling
       [not found]     ` <20170928163406.GB17880-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2017-09-28 16:47       ` Leon Romanovsky
       [not found]         ` <20170928164735.GC2297-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Leon Romanovsky @ 2017-09-28 16:47 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Yuval Shaia, dledford-H+wXaHxf7aLQT0dZR+AlfA,
	sean.hefty-ral2JQCrhuEAvxtiuMwx3w,
	hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w, corbet-T1hC0tSOHrs,
	valex-VPRAkNaXOzVWk0Htik3J/w, erezsh-VPRAkNaXOzVWk0Htik3J/w,
	dasaratharaman.chandramouli-ral2JQCrhuEAvxtiuMwx3w,
	yanjun.zhu-QHcLZuEGTsvQT0dZR+AlfA, pabeni-H+wXaHxf7aLQT0dZR+AlfA,
	kernel-6AxghH7DbtA, ferasda-VPRAkNaXOzVWk0Htik3J/w,
	shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA,
	mingo-DgEjT+Ai2ygdnm+yROfE0A, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	mukesh.kacker-QHcLZuEGTsvQT0dZR+AlfA,
	chien.yen-QHcLZuEGTsvQT0dZR+AlfA

[-- Attachment #1: Type: text/plain, Size: 1804 bytes --]

On Thu, Sep 28, 2017 at 10:34:06AM -0600, Jason Gunthorpe wrote:
> On Wed, Sep 27, 2017 at 12:32:48PM +0300, Yuval Shaia wrote:
> > The sysfs "create_child" interface creates pkey based child interface but
> > derives the name from parent device name and pkey value.
> > This makes administration difficult where pkey values can change but
> > policies encoded with device names do not.
> >
> > We add ability to create a child interface with a user specified name and a
> > specified pkey with a new sysfs "create_named_child" interface (and also
> > add a corresponding "delete_named_child" interface).
> >
> > We also add a new module api interface to query pkey from a netdevice so
> > any kernel users of pkey based child interfaces can query it - since with
> > device name decoupled from pkey, it can no longer be deduced from parsing
> > the device name by other kernel users.
>
> This should all use netlink these days, not more sysfs files.
>
> Leon? What do you think about using rdmatool to provide a command line
> for creating ipoib children?

As far as I understand ipoib_netlink.c, ipoib_new_child_link() already
implements it and it is supported in "ip".

And I think that it is more netdev than rdma, so IMHO, the "ip" is more
appropriate, but if someone decides to add "ipoib" object in rdmatool,
I won't stand against it.

>
> > +  Interfaces with a user chosen name can be created in a similar
> > +  manner with a different name and P_Key, by writing them into the
> > +  main interface's /sys/class/net/<intf name>/create_named_child
> > +  For example:
> > +     echo "epart2 0x8002" > /sys/class/net/ib1/create_named_child
>
> 'multi-value' sysfs files like this are categorically banned by Greg.
>
> Any kind of configuration in sysfs is really frowned on these days.
>
> Jason

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] IB/ipoib: Enable pkey and device name decoupling
       [not found]         ` <20170928164735.GC2297-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
@ 2017-09-28 16:53           ` Jason Gunthorpe
       [not found]             ` <20170928165305.GE17880-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Jason Gunthorpe @ 2017-09-28 16:53 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Yuval Shaia, dledford-H+wXaHxf7aLQT0dZR+AlfA,
	sean.hefty-ral2JQCrhuEAvxtiuMwx3w,
	hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w, corbet-T1hC0tSOHrs,
	valex-VPRAkNaXOzVWk0Htik3J/w, erezsh-VPRAkNaXOzVWk0Htik3J/w,
	dasaratharaman.chandramouli-ral2JQCrhuEAvxtiuMwx3w,
	yanjun.zhu-QHcLZuEGTsvQT0dZR+AlfA, pabeni-H+wXaHxf7aLQT0dZR+AlfA,
	kernel-6AxghH7DbtA, ferasda-VPRAkNaXOzVWk0Htik3J/w,
	shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA,
	mingo-DgEjT+Ai2ygdnm+yROfE0A, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	mukesh.kacker-QHcLZuEGTsvQT0dZR+AlfA,
	chien.yen-QHcLZuEGTsvQT0dZR+AlfA

On Thu, Sep 28, 2017 at 07:47:35PM +0300, Leon Romanovsky wrote:
> On Thu, Sep 28, 2017 at 10:34:06AM -0600, Jason Gunthorpe wrote:
> > On Wed, Sep 27, 2017 at 12:32:48PM +0300, Yuval Shaia wrote:
> > > The sysfs "create_child" interface creates pkey based child interface but
> > > derives the name from parent device name and pkey value.
> > > This makes administration difficult where pkey values can change but
> > > policies encoded with device names do not.
> > >
> > > We add ability to create a child interface with a user specified name and a
> > > specified pkey with a new sysfs "create_named_child" interface (and also
> > > add a corresponding "delete_named_child" interface).
> > >
> > > We also add a new module api interface to query pkey from a netdevice so
> > > any kernel users of pkey based child interfaces can query it - since with
> > > device name decoupled from pkey, it can no longer be deduced from parsing
> > > the device name by other kernel users.
> >
> > This should all use netlink these days, not more sysfs files.
> >
> > Leon? What do you think about using rdmatool to provide a command line
> > for creating ipoib children?
> 
> As far as I understand ipoib_netlink.c, ipoib_new_child_link() already
> implements it and it is supported in "ip".

Oh right:

  ip link add DEVICE name NAME type ipoib [ pkey PKEY ] [mode MODE ]

So what is the point of this series?

NAK from me.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] IB/ipoib: Enable pkey and device name decoupling
       [not found]             ` <20170928165305.GE17880-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2017-09-28 17:08               ` Leon Romanovsky
  2017-10-15  5:47               ` Yuval Shaia
  1 sibling, 0 replies; 23+ messages in thread
From: Leon Romanovsky @ 2017-09-28 17:08 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Yuval Shaia, dledford-H+wXaHxf7aLQT0dZR+AlfA,
	sean.hefty-ral2JQCrhuEAvxtiuMwx3w,
	hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w, corbet-T1hC0tSOHrs,
	valex-VPRAkNaXOzVWk0Htik3J/w, erezsh-VPRAkNaXOzVWk0Htik3J/w,
	dasaratharaman.chandramouli-ral2JQCrhuEAvxtiuMwx3w,
	yanjun.zhu-QHcLZuEGTsvQT0dZR+AlfA, pabeni-H+wXaHxf7aLQT0dZR+AlfA,
	kernel-6AxghH7DbtA, ferasda-VPRAkNaXOzVWk0Htik3J/w,
	shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA,
	mingo-DgEjT+Ai2ygdnm+yROfE0A, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	mukesh.kacker-QHcLZuEGTsvQT0dZR+AlfA,
	chien.yen-QHcLZuEGTsvQT0dZR+AlfA

[-- Attachment #1: Type: text/plain, Size: 1590 bytes --]

On Thu, Sep 28, 2017 at 10:53:05AM -0600, Jason Gunthorpe wrote:
> On Thu, Sep 28, 2017 at 07:47:35PM +0300, Leon Romanovsky wrote:
> > On Thu, Sep 28, 2017 at 10:34:06AM -0600, Jason Gunthorpe wrote:
> > > On Wed, Sep 27, 2017 at 12:32:48PM +0300, Yuval Shaia wrote:
> > > > The sysfs "create_child" interface creates pkey based child interface but
> > > > derives the name from parent device name and pkey value.
> > > > This makes administration difficult where pkey values can change but
> > > > policies encoded with device names do not.
> > > >
> > > > We add ability to create a child interface with a user specified name and a
> > > > specified pkey with a new sysfs "create_named_child" interface (and also
> > > > add a corresponding "delete_named_child" interface).
> > > >
> > > > We also add a new module api interface to query pkey from a netdevice so
> > > > any kernel users of pkey based child interfaces can query it - since with
> > > > device name decoupled from pkey, it can no longer be deduced from parsing
> > > > the device name by other kernel users.
> > >
> > > This should all use netlink these days, not more sysfs files.
> > >
> > > Leon? What do you think about using rdmatool to provide a command line
> > > for creating ipoib children?
> >
> > As far as I understand ipoib_netlink.c, ipoib_new_child_link() already
> > implements it and it is supported in "ip".
>
> Oh right:
>
>   ip link add DEVICE name NAME type ipoib [ pkey PKEY ] [mode MODE ]
>
> So what is the point of this series?
>
> NAK from me.

I tried to be more polite than you :)

>
> Jason

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] IB/ipoib: Enable pkey and device name decoupling
       [not found]             ` <20170928165305.GE17880-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  2017-09-28 17:08               ` Leon Romanovsky
@ 2017-10-15  5:47               ` Yuval Shaia
  2017-10-15  6:27                 ` Leon Romanovsky
  2017-10-17  8:18                 ` Jason Gunthorpe
  1 sibling, 2 replies; 23+ messages in thread
From: Yuval Shaia @ 2017-10-15  5:47 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Leon Romanovsky, dledford-H+wXaHxf7aLQT0dZR+AlfA,
	sean.hefty-ral2JQCrhuEAvxtiuMwx3w,
	hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w, corbet-T1hC0tSOHrs,
	valex-VPRAkNaXOzVWk0Htik3J/w, erezsh-VPRAkNaXOzVWk0Htik3J/w,
	dasaratharaman.chandramouli-ral2JQCrhuEAvxtiuMwx3w,
	yanjun.zhu-QHcLZuEGTsvQT0dZR+AlfA, pabeni-H+wXaHxf7aLQT0dZR+AlfA,
	kernel-6AxghH7DbtA, ferasda-VPRAkNaXOzVWk0Htik3J/w,
	shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA,
	mingo-DgEjT+Ai2ygdnm+yROfE0A, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	mukesh.kacker-QHcLZuEGTsvQT0dZR+AlfA,
	chien.yen-QHcLZuEGTsvQT0dZR+AlfA

On Thu, Sep 28, 2017 at 10:53:05AM -0600, Jason Gunthorpe wrote:
> On Thu, Sep 28, 2017 at 07:47:35PM +0300, Leon Romanovsky wrote:
> > On Thu, Sep 28, 2017 at 10:34:06AM -0600, Jason Gunthorpe wrote:
> > > On Wed, Sep 27, 2017 at 12:32:48PM +0300, Yuval Shaia wrote:
> > > > The sysfs "create_child" interface creates pkey based child interface but
> > > > derives the name from parent device name and pkey value.
> > > > This makes administration difficult where pkey values can change but
> > > > policies encoded with device names do not.
> > > >
> > > > We add ability to create a child interface with a user specified name and a
> > > > specified pkey with a new sysfs "create_named_child" interface (and also
> > > > add a corresponding "delete_named_child" interface).
> > > >
> > > > We also add a new module api interface to query pkey from a netdevice so
> > > > any kernel users of pkey based child interfaces can query it - since with
> > > > device name decoupled from pkey, it can no longer be deduced from parsing
> > > > the device name by other kernel users.
> > >
> > > This should all use netlink these days, not more sysfs files.
> > >
> > > Leon? What do you think about using rdmatool to provide a command line
> > > for creating ipoib children?
> > 
> > As far as I understand ipoib_netlink.c, ipoib_new_child_link() already
> > implements it and it is supported in "ip".
> 
> Oh right:
> 
>   ip link add DEVICE name NAME type ipoib [ pkey PKEY ] [mode MODE ]

So with this interface we can entirely remove the sysfs interface to create
child, right?

> 
> So what is the point of this series?
> 
> NAK from me.
> 
> Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] IB/ipoib: Enable pkey and device name decoupling
  2017-10-15  5:47               ` Yuval Shaia
@ 2017-10-15  6:27                 ` Leon Romanovsky
  2017-10-17  8:18                 ` Jason Gunthorpe
  1 sibling, 0 replies; 23+ messages in thread
From: Leon Romanovsky @ 2017-10-15  6:27 UTC (permalink / raw)
  To: Yuval Shaia
  Cc: Jason Gunthorpe, dledford-H+wXaHxf7aLQT0dZR+AlfA,
	sean.hefty-ral2JQCrhuEAvxtiuMwx3w,
	hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w, corbet-T1hC0tSOHrs,
	valex-VPRAkNaXOzVWk0Htik3J/w, erezsh-VPRAkNaXOzVWk0Htik3J/w,
	dasaratharaman.chandramouli-ral2JQCrhuEAvxtiuMwx3w,
	yanjun.zhu-QHcLZuEGTsvQT0dZR+AlfA, pabeni-H+wXaHxf7aLQT0dZR+AlfA,
	kernel-6AxghH7DbtA, ferasda-VPRAkNaXOzVWk0Htik3J/w,
	shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA,
	mingo-DgEjT+Ai2ygdnm+yROfE0A, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	mukesh.kacker-QHcLZuEGTsvQT0dZR+AlfA,
	chien.yen-QHcLZuEGTsvQT0dZR+AlfA

[-- Attachment #1: Type: text/plain, Size: 1832 bytes --]

On Sun, Oct 15, 2017 at 08:47:46AM +0300, Yuval Shaia wrote:
> On Thu, Sep 28, 2017 at 10:53:05AM -0600, Jason Gunthorpe wrote:
> > On Thu, Sep 28, 2017 at 07:47:35PM +0300, Leon Romanovsky wrote:
> > > On Thu, Sep 28, 2017 at 10:34:06AM -0600, Jason Gunthorpe wrote:
> > > > On Wed, Sep 27, 2017 at 12:32:48PM +0300, Yuval Shaia wrote:
> > > > > The sysfs "create_child" interface creates pkey based child interface but
> > > > > derives the name from parent device name and pkey value.
> > > > > This makes administration difficult where pkey values can change but
> > > > > policies encoded with device names do not.
> > > > >
> > > > > We add ability to create a child interface with a user specified name and a
> > > > > specified pkey with a new sysfs "create_named_child" interface (and also
> > > > > add a corresponding "delete_named_child" interface).
> > > > >
> > > > > We also add a new module api interface to query pkey from a netdevice so
> > > > > any kernel users of pkey based child interfaces can query it - since with
> > > > > device name decoupled from pkey, it can no longer be deduced from parsing
> > > > > the device name by other kernel users.
> > > >
> > > > This should all use netlink these days, not more sysfs files.
> > > >
> > > > Leon? What do you think about using rdmatool to provide a command line
> > > > for creating ipoib children?
> > >
> > > As far as I understand ipoib_netlink.c, ipoib_new_child_link() already
> > > implements it and it is supported in "ip".
> >
> > Oh right:
> >
> >   ip link add DEVICE name NAME type ipoib [ pkey PKEY ] [mode MODE ]
>
> So with this interface we can entirely remove the sysfs interface to create
> child, right?

We can't :(, it will break user's scripts.

Thanks

>
> >
> > So what is the point of this series?
> >
> > NAK from me.
> >
> > Jason

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] IB/ipoib: Enable pkey and device name decoupling
  2017-10-15  5:47               ` Yuval Shaia
  2017-10-15  6:27                 ` Leon Romanovsky
@ 2017-10-17  8:18                 ` Jason Gunthorpe
       [not found]                   ` <20171017081837.GA19107-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  1 sibling, 1 reply; 23+ messages in thread
From: Jason Gunthorpe @ 2017-10-17  8:18 UTC (permalink / raw)
  To: Yuval Shaia
  Cc: Leon Romanovsky, dledford-H+wXaHxf7aLQT0dZR+AlfA,
	sean.hefty-ral2JQCrhuEAvxtiuMwx3w,
	hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w, corbet-T1hC0tSOHrs,
	valex-VPRAkNaXOzVWk0Htik3J/w, erezsh-VPRAkNaXOzVWk0Htik3J/w,
	dasaratharaman.chandramouli-ral2JQCrhuEAvxtiuMwx3w,
	yanjun.zhu-QHcLZuEGTsvQT0dZR+AlfA, pabeni-H+wXaHxf7aLQT0dZR+AlfA,
	kernel-6AxghH7DbtA, ferasda-VPRAkNaXOzVWk0Htik3J/w,
	shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA,
	mingo-DgEjT+Ai2ygdnm+yROfE0A, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	mukesh.kacker-QHcLZuEGTsvQT0dZR+AlfA,
	chien.yen-QHcLZuEGTsvQT0dZR+AlfA

On Sun, Oct 15, 2017 at 08:47:46AM +0300, Yuval Shaia wrote:

> > > As far as I understand ipoib_netlink.c, ipoib_new_child_link() already
> > > implements it and it is supported in "ip".
> > 
> > Oh right:
> > 
> >   ip link add DEVICE name NAME type ipoib [ pkey PKEY ] [mode MODE ]
> 
> So with this interface we can entirely remove the sysfs interface to create
> child, right?

Yes, we should add a deprecation one shot printk to the kernel for the
sysfs interface to encourage people to use ip

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] IB/ipoib: Enable pkey and device name decoupling
       [not found]                   ` <20171017081837.GA19107-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2017-10-17 10:21                     ` Leon Romanovsky
       [not found]                       ` <20171017102121.GM2106-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Leon Romanovsky @ 2017-10-17 10:21 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Yuval Shaia, dledford-H+wXaHxf7aLQT0dZR+AlfA,
	sean.hefty-ral2JQCrhuEAvxtiuMwx3w,
	hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w, corbet-T1hC0tSOHrs,
	valex-VPRAkNaXOzVWk0Htik3J/w, erezsh-VPRAkNaXOzVWk0Htik3J/w,
	dasaratharaman.chandramouli-ral2JQCrhuEAvxtiuMwx3w,
	yanjun.zhu-QHcLZuEGTsvQT0dZR+AlfA, pabeni-H+wXaHxf7aLQT0dZR+AlfA,
	kernel-6AxghH7DbtA, ferasda-VPRAkNaXOzVWk0Htik3J/w,
	shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA,
	mingo-DgEjT+Ai2ygdnm+yROfE0A, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	mukesh.kacker-QHcLZuEGTsvQT0dZR+AlfA,
	chien.yen-QHcLZuEGTsvQT0dZR+AlfA

[-- Attachment #1: Type: text/plain, Size: 890 bytes --]

On Tue, Oct 17, 2017 at 02:18:37AM -0600, Jason Gunthorpe wrote:
> On Sun, Oct 15, 2017 at 08:47:46AM +0300, Yuval Shaia wrote:
>
> > > > As far as I understand ipoib_netlink.c, ipoib_new_child_link() already
> > > > implements it and it is supported in "ip".
> > >
> > > Oh right:
> > >
> > >   ip link add DEVICE name NAME type ipoib [ pkey PKEY ] [mode MODE ]
> >
> > So with this interface we can entirely remove the sysfs interface to create
> > child, right?
>
> Yes, we should add a deprecation one shot printk to the kernel for the
> sysfs interface to encourage people to use ip

Please don't do that, it won't help for anyone, and especially for the people
who didn't hear about "ip" in 2017.

IPoIB netlink doesn't support enhanced IPoIB device because child
device in netlink code was not allocated with rdma_alloc_netdev call
as it was done for other flows.

Thanks

>
> Jason

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] IB/ipoib: Enable pkey and device name decoupling
       [not found]                       ` <20171017102121.GM2106-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
@ 2017-10-19  6:11                         ` Jason Gunthorpe
       [not found]                           ` <20171019061100.GB6555-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Jason Gunthorpe @ 2017-10-19  6:11 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Yuval Shaia, dledford-H+wXaHxf7aLQT0dZR+AlfA,
	sean.hefty-ral2JQCrhuEAvxtiuMwx3w,
	hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w, corbet-T1hC0tSOHrs,
	valex-VPRAkNaXOzVWk0Htik3J/w, erezsh-VPRAkNaXOzVWk0Htik3J/w,
	dasaratharaman.chandramouli-ral2JQCrhuEAvxtiuMwx3w,
	yanjun.zhu-QHcLZuEGTsvQT0dZR+AlfA, pabeni-H+wXaHxf7aLQT0dZR+AlfA,
	kernel-6AxghH7DbtA, ferasda-VPRAkNaXOzVWk0Htik3J/w,
	shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA,
	mingo-DgEjT+Ai2ygdnm+yROfE0A, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	mukesh.kacker-QHcLZuEGTsvQT0dZR+AlfA,
	chien.yen-QHcLZuEGTsvQT0dZR+AlfA

On Tue, Oct 17, 2017 at 01:21:21PM +0300, Leon Romanovsky wrote:
> > Yes, we should add a deprecation one shot printk to the kernel for the
> > sysfs interface to encourage people to use ip
> 
> Please don't do that, it won't help for anyone, and especially for the people
> who didn't hear about "ip" in 2017.

This is the standard kernel way to encourage people to use the new
interfaces...

> IPoIB netlink doesn't support enhanced IPoIB device because child
> device in netlink code was not allocated with rdma_alloc_netdev call
> as it was done for other flows.

I don't understand, isn't this just a (bad) bug to be fixed?

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] IB/ipoib: Enable pkey and device name decoupling
       [not found]                           ` <20171019061100.GB6555-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2017-10-19  6:14                             ` Leon Romanovsky
       [not found]                               ` <20171019061405.GW2106-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Leon Romanovsky @ 2017-10-19  6:14 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Yuval Shaia, dledford-H+wXaHxf7aLQT0dZR+AlfA,
	sean.hefty-ral2JQCrhuEAvxtiuMwx3w,
	hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w, corbet-T1hC0tSOHrs,
	valex-VPRAkNaXOzVWk0Htik3J/w, erezsh-VPRAkNaXOzVWk0Htik3J/w,
	dasaratharaman.chandramouli-ral2JQCrhuEAvxtiuMwx3w,
	yanjun.zhu-QHcLZuEGTsvQT0dZR+AlfA, pabeni-H+wXaHxf7aLQT0dZR+AlfA,
	kernel-6AxghH7DbtA, ferasda-VPRAkNaXOzVWk0Htik3J/w,
	shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA,
	mingo-DgEjT+Ai2ygdnm+yROfE0A, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	mukesh.kacker-QHcLZuEGTsvQT0dZR+AlfA,
	chien.yen-QHcLZuEGTsvQT0dZR+AlfA

[-- Attachment #1: Type: text/plain, Size: 803 bytes --]

On Thu, Oct 19, 2017 at 12:11:00AM -0600, Jason Gunthorpe wrote:
> On Tue, Oct 17, 2017 at 01:21:21PM +0300, Leon Romanovsky wrote:
> > > Yes, we should add a deprecation one shot printk to the kernel for the
> > > sysfs interface to encourage people to use ip
> >
> > Please don't do that, it won't help for anyone, and especially for the people
> > who didn't hear about "ip" in 2017.
>
> This is the standard kernel way to encourage people to use the new
> interfaces...
>
> > IPoIB netlink doesn't support enhanced IPoIB device because child
> > device in netlink code was not allocated with rdma_alloc_netdev call
> > as it was done for other flows.
>
> I don't understand, isn't this just a (bad) bug to be fixed?

Yes and we want to fix it BEFORE adding discouraging warnings.

Thanks

>
> Jason

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] IB/ipoib: Enable pkey and device name decoupling
       [not found]                               ` <20171019061405.GW2106-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
@ 2017-10-19  6:28                                 ` Alex Vesker
       [not found]                                   ` <c2f3b529-cc53-aef0-fdcf-e86e99360db0-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Alex Vesker @ 2017-10-19  6:28 UTC (permalink / raw)
  To: Leon Romanovsky, Jason Gunthorpe
  Cc: Yuval Shaia, dledford-H+wXaHxf7aLQT0dZR+AlfA,
	sean.hefty-ral2JQCrhuEAvxtiuMwx3w,
	hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w, corbet-T1hC0tSOHrs,
	erezsh-VPRAkNaXOzVWk0Htik3J/w,
	dasaratharaman.chandramouli-ral2JQCrhuEAvxtiuMwx3w,
	yanjun.zhu-QHcLZuEGTsvQT0dZR+AlfA, pabeni-H+wXaHxf7aLQT0dZR+AlfA,
	kernel-6AxghH7DbtA, ferasda-VPRAkNaXOzVWk0Htik3J/w,
	shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA,
	mingo-DgEjT+Ai2ygdnm+yROfE0A, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	mukesh.kacker-QHcLZuEGTsvQT0dZR+AlfA,
	chien.yen-QHcLZuEGTsvQT0dZR+AlfA



On 10/19/2017 9:14 AM, Leon Romanovsky wrote:
> On Thu, Oct 19, 2017 at 12:11:00AM -0600, Jason Gunthorpe wrote:
>> On Tue, Oct 17, 2017 at 01:21:21PM +0300, Leon Romanovsky wrote:
>>>> Yes, we should add a deprecation one shot printk to the kernel for the
>>>> sysfs interface to encourage people to use ip
>>> Please don't do that, it won't help for anyone, and especially for the people
>>> who didn't hear about "ip" in 2017.
>> This is the standard kernel way to encourage people to use the new
>> interfaces...
>>
>>> IPoIB netlink doesn't support enhanced IPoIB device because child
>>> device in netlink code was not allocated with rdma_alloc_netdev call
>>> as it was done for other flows.
>> I don't understand, isn't this just a (bad) bug to be fixed?
> Yes and we want to fix it BEFORE adding discouraging warnings.
>
> Thanks
>
>> Jason
We plan to block ipoib_netlink in the meanwhile until we fix it.
The fix is a little tricky since the netdev is allocated in rtnl_newlink 
and it should be allocated
with the new logic introduced in enhanced IPoIB, setting rn_ops, 
allocating an rdma_netdev if possible.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] IB/ipoib: Enable pkey and device name decoupling
       [not found]                                   ` <c2f3b529-cc53-aef0-fdcf-e86e99360db0-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
@ 2017-10-21 19:42                                     ` Jason Gunthorpe
       [not found]                                       ` <20171021194215.GA4094-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Jason Gunthorpe @ 2017-10-21 19:42 UTC (permalink / raw)
  To: Alex Vesker
  Cc: Leon Romanovsky, Yuval Shaia, dledford-H+wXaHxf7aLQT0dZR+AlfA,
	sean.hefty-ral2JQCrhuEAvxtiuMwx3w,
	hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w, corbet-T1hC0tSOHrs,
	erezsh-VPRAkNaXOzVWk0Htik3J/w,
	dasaratharaman.chandramouli-ral2JQCrhuEAvxtiuMwx3w,
	yanjun.zhu-QHcLZuEGTsvQT0dZR+AlfA, pabeni-H+wXaHxf7aLQT0dZR+AlfA,
	kernel-6AxghH7DbtA, ferasda-VPRAkNaXOzVWk0Htik3J/w,
	shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA,
	mingo-DgEjT+Ai2ygdnm+yROfE0A, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	mukesh.kacker-QHcLZuEGTsvQT0dZR+AlfA,
	chien.yen-QHcLZuEGTsvQT0dZR+AlfA

On Thu, Oct 19, 2017 at 09:28:31AM +0300, Alex Vesker wrote:
> On 10/19/2017 9:14 AM, Leon Romanovsky wrote:
> >On Thu, Oct 19, 2017 at 12:11:00AM -0600, Jason Gunthorpe wrote:
> >>On Tue, Oct 17, 2017 at 01:21:21PM +0300, Leon Romanovsky wrote:
> >>>>Yes, we should add a deprecation one shot printk to the kernel for the
> >>>>sysfs interface to encourage people to use ip
> >>>Please don't do that, it won't help for anyone, and especially for the people
> >>>who didn't hear about "ip" in 2017.
> >>This is the standard kernel way to encourage people to use the new
> >>interfaces...
> >>
> >>>IPoIB netlink doesn't support enhanced IPoIB device because child
> >>>device in netlink code was not allocated with rdma_alloc_netdev call
> >>>as it was done for other flows.
> >>I don't understand, isn't this just a (bad) bug to be fixed?
> >Yes and we want to fix it BEFORE adding discouraging warnings.

> We plan to block ipoib_netlink in the meanwhile until we fix it.

That isn't a good idea, don't break APIs for performance reasons..

> The fix is a little tricky since the netdev is allocated in
> rtnl_newlink and it should be allocated with the new logic
> introduced in enhanced IPoIB, setting rn_ops, allocating an
> rdma_netdev if possible.

Should get on it then .. :)

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] IB/ipoib: Enable pkey and device name decoupling
       [not found]                                       ` <20171021194215.GA4094-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2017-10-23  6:04                                         ` Leon Romanovsky
       [not found]                                           ` <20171023060436.GD2106-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Leon Romanovsky @ 2017-10-23  6:04 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Alex Vesker, Yuval Shaia, dledford-H+wXaHxf7aLQT0dZR+AlfA,
	sean.hefty-ral2JQCrhuEAvxtiuMwx3w,
	hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w, corbet-T1hC0tSOHrs,
	erezsh-VPRAkNaXOzVWk0Htik3J/w,
	dasaratharaman.chandramouli-ral2JQCrhuEAvxtiuMwx3w,
	yanjun.zhu-QHcLZuEGTsvQT0dZR+AlfA, pabeni-H+wXaHxf7aLQT0dZR+AlfA,
	kernel-6AxghH7DbtA, ferasda-VPRAkNaXOzVWk0Htik3J/w,
	shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA,
	mingo-DgEjT+Ai2ygdnm+yROfE0A, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	mukesh.kacker-QHcLZuEGTsvQT0dZR+AlfA,
	chien.yen-QHcLZuEGTsvQT0dZR+AlfA

[-- Attachment #1: Type: text/plain, Size: 1583 bytes --]

On Sat, Oct 21, 2017 at 01:42:15PM -0600, Jason Gunthorpe wrote:
> On Thu, Oct 19, 2017 at 09:28:31AM +0300, Alex Vesker wrote:
> > On 10/19/2017 9:14 AM, Leon Romanovsky wrote:
> > >On Thu, Oct 19, 2017 at 12:11:00AM -0600, Jason Gunthorpe wrote:
> > >>On Tue, Oct 17, 2017 at 01:21:21PM +0300, Leon Romanovsky wrote:
> > >>>>Yes, we should add a deprecation one shot printk to the kernel for the
> > >>>>sysfs interface to encourage people to use ip
> > >>>Please don't do that, it won't help for anyone, and especially for the people
> > >>>who didn't hear about "ip" in 2017.
> > >>This is the standard kernel way to encourage people to use the new
> > >>interfaces...
> > >>
> > >>>IPoIB netlink doesn't support enhanced IPoIB device because child
> > >>>device in netlink code was not allocated with rdma_alloc_netdev call
> > >>>as it was done for other flows.
> > >>I don't understand, isn't this just a (bad) bug to be fixed?
> > >Yes and we want to fix it BEFORE adding discouraging warnings.
>
> > We plan to block ipoib_netlink in the meanwhile until we fix it.
>
> That isn't a good idea, don't break APIs for performance reasons..

It is not "performance", but simple attempt to prevent kernel panic,
while calling that function, because I'm unsure if we success to provide
proper fix till merge window.

>
> > The fix is a little tricky since the netdev is allocated in
> > rtnl_newlink and it should be allocated with the new logic
> > introduced in enhanced IPoIB, setting rn_ops, allocating an
> > rdma_netdev if possible.
>
> Should get on it then .. :)
>
> Jason

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] IB/ipoib: Enable pkey and device name decoupling
       [not found]                                           ` <20171023060436.GD2106-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
@ 2017-10-23 15:23                                             ` Jason Gunthorpe
       [not found]                                               ` <20171023152340.GA11952-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Jason Gunthorpe @ 2017-10-23 15:23 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Alex Vesker, Yuval Shaia, dledford-H+wXaHxf7aLQT0dZR+AlfA,
	sean.hefty-ral2JQCrhuEAvxtiuMwx3w,
	hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w, corbet-T1hC0tSOHrs,
	erezsh-VPRAkNaXOzVWk0Htik3J/w,
	dasaratharaman.chandramouli-ral2JQCrhuEAvxtiuMwx3w,
	yanjun.zhu-QHcLZuEGTsvQT0dZR+AlfA, pabeni-H+wXaHxf7aLQT0dZR+AlfA,
	kernel-6AxghH7DbtA, ferasda-VPRAkNaXOzVWk0Htik3J/w,
	shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA,
	mingo-DgEjT+Ai2ygdnm+yROfE0A, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	mukesh.kacker-QHcLZuEGTsvQT0dZR+AlfA,
	chien.yen-QHcLZuEGTsvQT0dZR+AlfA

On Mon, Oct 23, 2017 at 09:04:36AM +0300, Leon Romanovsky wrote:
> On Sat, Oct 21, 2017 at 01:42:15PM -0600, Jason Gunthorpe wrote:
> > On Thu, Oct 19, 2017 at 09:28:31AM +0300, Alex Vesker wrote:
> > > On 10/19/2017 9:14 AM, Leon Romanovsky wrote:
> > > >On Thu, Oct 19, 2017 at 12:11:00AM -0600, Jason Gunthorpe wrote:
> > > >>On Tue, Oct 17, 2017 at 01:21:21PM +0300, Leon Romanovsky wrote:
> > > >>>>Yes, we should add a deprecation one shot printk to the kernel for the
> > > >>>>sysfs interface to encourage people to use ip
> > > >>>Please don't do that, it won't help for anyone, and especially for the people
> > > >>>who didn't hear about "ip" in 2017.
> > > >>This is the standard kernel way to encourage people to use the new
> > > >>interfaces...
> > > >>
> > > >>>IPoIB netlink doesn't support enhanced IPoIB device because child
> > > >>>device in netlink code was not allocated with rdma_alloc_netdev call
> > > >>>as it was done for other flows.
> > > >>I don't understand, isn't this just a (bad) bug to be fixed?
> > > >Yes and we want to fix it BEFORE adding discouraging warnings.
> >
> > > We plan to block ipoib_netlink in the meanwhile until we fix it.
> >
> > That isn't a good idea, don't break APIs for performance reasons..
> 
> It is not "performance", but simple attempt to prevent kernel panic,
> while calling that function, because I'm unsure if we success to provide
> proper fix till merge window.

Er, why should a non-accelerated ipoib instance panic? That makes no
sense :(

In any event, someone needs to send patches to fix these oops's!

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] IB/ipoib: Enable pkey and device name decoupling
       [not found]                                               ` <20171023152340.GA11952-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2017-10-23 17:03                                                 ` Leon Romanovsky
  0 siblings, 0 replies; 23+ messages in thread
From: Leon Romanovsky @ 2017-10-23 17:03 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Alex Vesker, Yuval Shaia, dledford-H+wXaHxf7aLQT0dZR+AlfA,
	sean.hefty-ral2JQCrhuEAvxtiuMwx3w,
	hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w, corbet-T1hC0tSOHrs,
	erezsh-VPRAkNaXOzVWk0Htik3J/w,
	dasaratharaman.chandramouli-ral2JQCrhuEAvxtiuMwx3w,
	yanjun.zhu-QHcLZuEGTsvQT0dZR+AlfA, pabeni-H+wXaHxf7aLQT0dZR+AlfA,
	kernel-6AxghH7DbtA, ferasda-VPRAkNaXOzVWk0Htik3J/w,
	shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA,
	mingo-DgEjT+Ai2ygdnm+yROfE0A, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	mukesh.kacker-QHcLZuEGTsvQT0dZR+AlfA,
	chien.yen-QHcLZuEGTsvQT0dZR+AlfA

[-- Attachment #1: Type: text/plain, Size: 1753 bytes --]

On Mon, Oct 23, 2017 at 09:23:40AM -0600, Jason Gunthorpe wrote:
> On Mon, Oct 23, 2017 at 09:04:36AM +0300, Leon Romanovsky wrote:
> > On Sat, Oct 21, 2017 at 01:42:15PM -0600, Jason Gunthorpe wrote:
> > > On Thu, Oct 19, 2017 at 09:28:31AM +0300, Alex Vesker wrote:
> > > > On 10/19/2017 9:14 AM, Leon Romanovsky wrote:
> > > > >On Thu, Oct 19, 2017 at 12:11:00AM -0600, Jason Gunthorpe wrote:
> > > > >>On Tue, Oct 17, 2017 at 01:21:21PM +0300, Leon Romanovsky wrote:
> > > > >>>>Yes, we should add a deprecation one shot printk to the kernel for the
> > > > >>>>sysfs interface to encourage people to use ip
> > > > >>>Please don't do that, it won't help for anyone, and especially for the people
> > > > >>>who didn't hear about "ip" in 2017.
> > > > >>This is the standard kernel way to encourage people to use the new
> > > > >>interfaces...
> > > > >>
> > > > >>>IPoIB netlink doesn't support enhanced IPoIB device because child
> > > > >>>device in netlink code was not allocated with rdma_alloc_netdev call
> > > > >>>as it was done for other flows.
> > > > >>I don't understand, isn't this just a (bad) bug to be fixed?
> > > > >Yes and we want to fix it BEFORE adding discouraging warnings.
> > >
> > > > We plan to block ipoib_netlink in the meanwhile until we fix it.
> > >
> > > That isn't a good idea, don't break APIs for performance reasons..
> >
> > It is not "performance", but simple attempt to prevent kernel panic,
> > while calling that function, because I'm unsure if we success to provide
> > proper fix till merge window.
>
> Er, why should a non-accelerated ipoib instance panic? That makes no
> sense :(
>
> In any event, someone needs to send patches to fix these oops's!

Agree, Alex is supposed to work on it.

>
> Jason

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2017-10-23 17:03 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-09-27  9:32 [PATCH] IB/ipoib: Enable pkey and device name decoupling Yuval Shaia
     [not found] ` <20170927093248.3819-1-yuval.shaia-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2017-09-27 14:14   ` Doug Ledford
2017-09-27 15:01   ` Leon Romanovsky
     [not found]     ` <20170927150140.GF2297-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
2017-09-27 19:03       ` Mukesh Kacker
     [not found]         ` <c98fc5c0-cb34-c960-4bbf-34422bbacd0e-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2017-09-28 13:45           ` Leon Romanovsky
2017-09-28 16:38           ` Jason Gunthorpe
2017-09-27 17:20   ` Mukesh Kacker
     [not found]     ` <78f7983f-dc6b-a42a-0b29-adb69777fa75-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2017-09-27 17:26       ` Yuval Shaia
2017-09-28 16:34   ` Jason Gunthorpe
     [not found]     ` <20170928163406.GB17880-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2017-09-28 16:47       ` Leon Romanovsky
     [not found]         ` <20170928164735.GC2297-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
2017-09-28 16:53           ` Jason Gunthorpe
     [not found]             ` <20170928165305.GE17880-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2017-09-28 17:08               ` Leon Romanovsky
2017-10-15  5:47               ` Yuval Shaia
2017-10-15  6:27                 ` Leon Romanovsky
2017-10-17  8:18                 ` Jason Gunthorpe
     [not found]                   ` <20171017081837.GA19107-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2017-10-17 10:21                     ` Leon Romanovsky
     [not found]                       ` <20171017102121.GM2106-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
2017-10-19  6:11                         ` Jason Gunthorpe
     [not found]                           ` <20171019061100.GB6555-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2017-10-19  6:14                             ` Leon Romanovsky
     [not found]                               ` <20171019061405.GW2106-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
2017-10-19  6:28                                 ` Alex Vesker
     [not found]                                   ` <c2f3b529-cc53-aef0-fdcf-e86e99360db0-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2017-10-21 19:42                                     ` Jason Gunthorpe
     [not found]                                       ` <20171021194215.GA4094-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2017-10-23  6:04                                         ` Leon Romanovsky
     [not found]                                           ` <20171023060436.GD2106-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
2017-10-23 15:23                                             ` Jason Gunthorpe
     [not found]                                               ` <20171023152340.GA11952-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2017-10-23 17:03                                                 ` Leon Romanovsky

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.