* [PATCH v3] IB/IPoIB: ibX: failed to create mcg debug file
@ 2017-03-27 7:18 Shamir Rabinovitch
[not found] ` <1490599139-12665-1-git-send-email-shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
0 siblings, 1 reply; 13+ messages in thread
From: Shamir Rabinovitch @ 2017-03-27 7:18 UTC (permalink / raw)
To: linux-rdma-u79uwXL29TY76Z2rM5mHXA
Cc: dledford-H+wXaHxf7aLQT0dZR+AlfA,
vijay.ac.kumar-QHcLZuEGTsvQT0dZR+AlfA,
shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA
When udev renames the netdev devices, ipoib debugfs entries does not
get renamed. As a result, if subsequent probe of ipoib device reuse the
name then creating a debugfs entry for the new device would fail.
Also, moved ipoib_create_debug_files and ipoib_delete_debug_files as part
of ipoib event handling in order to avoid any race condition between these.
Signed-off-by: Vijay Kumar <vijay.ac.kumar-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
Signed-off-by: Shamir Rabinovitch <shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
---
v2->v3:
- Move rev change out of commit message
- Fix typo
v1->v2:
- Review comment from Mark Bloch <markb-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Verified again and NETDEV_UNREGISTER is called correctly. Debug
files are removed as expected. Thanks.
- Fix compile warning when CONFIG_INFINIBAND_IPOIB_DEBUG is not set
---
---
drivers/infiniband/ulp/ipoib/ipoib.h | 1 +
drivers/infiniband/ulp/ipoib/ipoib_fs.c | 28 +++++++++++++++++++
drivers/infiniband/ulp/ipoib/ipoib_main.c | 41 ++++++++++++++++++++++++++--
3 files changed, 67 insertions(+), 3 deletions(-)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index da12717..95a457f 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -770,6 +770,7 @@ static inline void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *w
void ipoib_delete_debug_files(struct net_device *dev);
int ipoib_register_debugfs(void);
void ipoib_unregister_debugfs(void);
+void ipoib_debugfs_rename(struct net_device *dev);
#else
static inline void ipoib_create_debug_files(struct net_device *dev) { }
static inline void ipoib_delete_debug_files(struct net_device *dev) { }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index 6bd5740..d849b1d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -259,6 +259,34 @@ static int ipoib_path_open(struct inode *inode, struct file *file)
.release = seq_release
};
+void ipoib_debugfs_rename(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ char name[IFNAMSIZ + sizeof "_path"];
+
+ if (!ipoib_root)
+ return;
+
+ if (priv->mcg_dentry) {
+ snprintf(name, sizeof(name), "%s_mcg", dev->name);
+ priv->mcg_dentry = debugfs_rename(ipoib_root, priv->mcg_dentry,
+ ipoib_root, name);
+ if (!priv->mcg_dentry)
+ ipoib_warn(priv, "failed to rename debug file %s to %s\n",
+ priv->mcg_dentry->d_iname, name);
+ }
+
+ if (priv->path_dentry) {
+ snprintf(name, sizeof(name), "%s_path", dev->name);
+ priv->path_dentry = debugfs_rename(ipoib_root,
+ priv->path_dentry,
+ ipoib_root, name);
+ if (!priv->path_dentry)
+ ipoib_warn(priv, "failed to rename debug file %s to %s\n",
+ priv->mcg_dentry->d_iname, name);
+ }
+}
+
void ipoib_create_debug_files(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index b58d9dc..c84b8ee 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -108,6 +108,32 @@ struct ipoib_path_iter {
.get_net_dev_by_params = ipoib_get_net_dev_by_params,
};
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+static int ipoib_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct netdev_notifier_info *ni = ptr;
+ struct net_device *dev = ni->dev;
+
+ if (dev->netdev_ops->ndo_open != ipoib_open)
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ ipoib_create_debug_files(dev);
+ break;
+ case NETDEV_CHANGENAME:
+ ipoib_debugfs_rename(dev);
+ break;
+ case NETDEV_UNREGISTER:
+ ipoib_delete_debug_files(dev);
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+#endif
+
int ipoib_open(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -2072,8 +2098,6 @@ int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
goto register_failed;
}
- ipoib_create_debug_files(priv->dev);
-
if (ipoib_cm_add_mode_attr(priv->dev))
goto sysfs_failed;
if (ipoib_add_pkey_attr(priv->dev))
@@ -2088,7 +2112,6 @@ int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
return priv->dev;
sysfs_failed:
- ipoib_delete_debug_files(priv->dev);
unregister_netdev(priv->dev);
register_failed:
@@ -2173,6 +2196,12 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
kfree(dev_list);
}
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+static struct notifier_block ipoib_netdev_notifier = {
+ .notifier_call = ipoib_netdev_event,
+};
+#endif
+
static int __init ipoib_init_module(void)
{
int ret;
@@ -2225,6 +2254,9 @@ static int __init ipoib_init_module(void)
if (ret)
goto err_client;
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+ register_netdevice_notifier(&ipoib_netdev_notifier);
+#endif
return 0;
err_client:
@@ -2242,6 +2274,9 @@ static int __init ipoib_init_module(void)
static void __exit ipoib_cleanup_module(void)
{
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+ unregister_netdevice_notifier(&ipoib_netdev_notifier);
+#endif
ipoib_netlink_fini();
ib_unregister_client(&ipoib_client);
ib_sa_unregister_client(&ipoib_sa_client);
--
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH v3] IB/IPoIB: ibX: failed to create mcg debug file
[not found] ` <1490599139-12665-1-git-send-email-shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
@ 2017-03-27 15:06 ` Mark Bloch
[not found] ` <4058624b-a947-9635-76ca-482fd6a6bd95-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2017-03-27 19:55 ` Leon Romanovsky
1 sibling, 1 reply; 13+ messages in thread
From: Mark Bloch @ 2017-03-27 15:06 UTC (permalink / raw)
To: Shamir Rabinovitch, linux-rdma-u79uwXL29TY76Z2rM5mHXA
Cc: dledford-H+wXaHxf7aLQT0dZR+AlfA, vijay.ac.kumar-QHcLZuEGTsvQT0dZR+AlfA
Hi Shamir,
Like I've said in v1 of this patch, I believe we are calling ipoib_delete_debug_files too many times.
We are calling in unconditionally in: ipoib_dev_cleanup
and also in ipoib_netdev_event when we get an NETDEV_UNREGISTER event.
For example, I have a setup with ConnectX-4 dual port configured to be in IB mode.
So I have two ipoib interfaces (ib0, ib1)
When I load and unload mlx5_ib (while ib_ipoib is loaded:
root@dev-r-vrt-175 tools]# ./funccount.py 'ipoib_*_debug_files'
Tracing 2 functions for "ipoib_*_debug_files"... Hit Ctrl-C to end.
^C
FUNC COUNT
ipoib_create_debug_files 2
ipoib_delete_debug_files 4
Detaching...
Why not just remove the call in ipoib_dev_cleanup?
On 27/03/2017 10:18, Shamir Rabinovitch wrote:
> When udev renames the netdev devices, ipoib debugfs entries does not
> get renamed. As a result, if subsequent probe of ipoib device reuse
> the name then creating a debugfs entry for the new device would
> fail.
>
> Also, moved ipoib_create_debug_files and ipoib_delete_debug_files as
> part of ipoib event handling in order to avoid any race condition
> between these.
>
> Signed-off-by: Vijay Kumar <vijay.ac.kumar-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org> Signed-off-by:
> Shamir Rabinovitch <shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
>
> --- v2->v3: - Move rev change out of commit message - Fix typo
> v1->v2: - Review comment from Mark Bloch <markb-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> Verified again and NETDEV_UNREGISTER is called correctly. Debug files
> are removed as expected. Thanks. - Fix compile warning when
> CONFIG_INFINIBAND_IPOIB_DEBUG is not set ---
>
> --- drivers/infiniband/ulp/ipoib/ipoib.h | 1 +
> drivers/infiniband/ulp/ipoib/ipoib_fs.c | 28 +++++++++++++++++++
> drivers/infiniband/ulp/ipoib/ipoib_main.c | 41
> ++++++++++++++++++++++++++-- 3 files changed, 67 insertions(+), 3
> deletions(-)
>
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h
> b/drivers/infiniband/ulp/ipoib/ipoib.h index da12717..95a457f 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++
> b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -770,6 +770,7 @@ static
> inline void ipoib_cm_handle_tx_wc(struct net_device *dev, struct
> ib_wc *w void ipoib_delete_debug_files(struct net_device *dev); int
> ipoib_register_debugfs(void); void ipoib_unregister_debugfs(void);
> +void ipoib_debugfs_rename(struct net_device *dev); #else static
> inline void ipoib_create_debug_files(struct net_device *dev) { }
> static inline void ipoib_delete_debug_files(struct net_device *dev) {
> } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
> b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index 6bd5740..d849b1d
> 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++
> b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -259,6 +259,34 @@ static
> int ipoib_path_open(struct inode *inode, struct file *file) .release
> = seq_release };
>
> +void ipoib_debugfs_rename(struct net_device *dev) +{ + struct
> ipoib_dev_priv *priv = netdev_priv(dev); + char name[IFNAMSIZ +
> sizeof "_path"]; + + if (!ipoib_root) + return; + + if
> (priv->mcg_dentry) { + snprintf(name, sizeof(name), "%s_mcg",
> dev->name); + priv->mcg_dentry = debugfs_rename(ipoib_root,
> priv->mcg_dentry, + ipoib_root, name); + if
> (!priv->mcg_dentry) + ipoib_warn(priv, "failed to rename debug file
> %s to %s\n", + priv->mcg_dentry->d_iname, name); + } + + if
> (priv->path_dentry) { + snprintf(name, sizeof(name), "%s_path",
> dev->name); + priv->path_dentry = debugfs_rename(ipoib_root, +
> priv->path_dentry, + ipoib_root, name); + if
> (!priv->path_dentry) + ipoib_warn(priv, "failed to rename debug
> file %s to %s\n", + priv->mcg_dentry->d_iname, name); + } +} +
> void ipoib_create_debug_files(struct net_device *dev) { struct
> ipoib_dev_priv *priv = netdev_priv(dev); diff --git
> a/drivers/infiniband/ulp/ipoib/ipoib_main.c
> b/drivers/infiniband/ulp/ipoib/ipoib_main.c index b58d9dc..c84b8ee
> 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++
> b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -108,6 +108,32 @@
> struct ipoib_path_iter { .get_net_dev_by_params =
> ipoib_get_net_dev_by_params, };
>
> +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG +static int
> ipoib_netdev_event(struct notifier_block *this, + unsigned
> long event, void *ptr) +{ + struct netdev_notifier_info *ni = ptr; +
> struct net_device *dev = ni->dev; + + if (dev->netdev_ops->ndo_open
> != ipoib_open) + return NOTIFY_DONE; + + switch (event) { + case
> NETDEV_REGISTER: + ipoib_create_debug_files(dev); + break; + case
> NETDEV_CHANGENAME: + ipoib_debugfs_rename(dev); + break; + case
> NETDEV_UNREGISTER: + ipoib_delete_debug_files(dev); + break; + } +
> + return NOTIFY_DONE; +} +#endif + int ipoib_open(struct net_device
> *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -2072,8
> +2098,6 @@ int ipoib_set_dev_features(struct ipoib_dev_priv *priv,
> struct ib_device *hca) goto register_failed; }
>
> - ipoib_create_debug_files(priv->dev); - if
> (ipoib_cm_add_mode_attr(priv->dev)) goto sysfs_failed; if
> (ipoib_add_pkey_attr(priv->dev)) @@ -2088,7 +2112,6 @@ int
> ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device
> *hca) return priv->dev;
>
> sysfs_failed: - ipoib_delete_debug_files(priv->dev);
> unregister_netdev(priv->dev);
>
> register_failed: @@ -2173,6 +2196,12 @@ static void
> ipoib_remove_one(struct ib_device *device, void *client_data)
> kfree(dev_list); }
>
> +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG +static struct notifier_block
> ipoib_netdev_notifier = { + .notifier_call = ipoib_netdev_event, +};
> +#endif + static int __init ipoib_init_module(void) { int ret; @@
> -2225,6 +2254,9 @@ static int __init ipoib_init_module(void) if
> (ret) goto err_client;
>
> +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG +
> register_netdevice_notifier(&ipoib_netdev_notifier); +#endif return
> 0;
>
> err_client: @@ -2242,6 +2274,9 @@ static int __init
> ipoib_init_module(void)
>
> static void __exit ipoib_cleanup_module(void) { +#ifdef
> CONFIG_INFINIBAND_IPOIB_DEBUG +
> unregister_netdevice_notifier(&ipoib_netdev_notifier); +#endif
> ipoib_netlink_fini(); ib_unregister_client(&ipoib_client);
> ib_sa_unregister_client(&ipoib_sa_client);
>
Mark.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v3] IB/IPoIB: ibX: failed to create mcg debug file
[not found] ` <1490599139-12665-1-git-send-email-shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2017-03-27 15:06 ` Mark Bloch
@ 2017-03-27 19:55 ` Leon Romanovsky
[not found] ` <20170327195500.GH20443-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
1 sibling, 1 reply; 13+ messages in thread
From: Leon Romanovsky @ 2017-03-27 19:55 UTC (permalink / raw)
To: Shamir Rabinovitch
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA,
dledford-H+wXaHxf7aLQT0dZR+AlfA,
vijay.ac.kumar-QHcLZuEGTsvQT0dZR+AlfA
[-- Attachment #1: Type: text/plain, Size: 6180 bytes --]
On Mon, Mar 27, 2017 at 03:18:59AM -0400, Shamir Rabinovitch wrote:
> When udev renames the netdev devices, ipoib debugfs entries does not
> get renamed. As a result, if subsequent probe of ipoib device reuse the
> name then creating a debugfs entry for the new device would fail.
>
> Also, moved ipoib_create_debug_files and ipoib_delete_debug_files as part
> of ipoib event handling in order to avoid any race condition between these.
>
> Signed-off-by: Vijay Kumar <vijay.ac.kumar-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> Signed-off-by: Shamir Rabinovitch <shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
>
> ---
> v2->v3:
> - Move rev change out of commit message
> - Fix typo
> v1->v2:
> - Review comment from Mark Bloch <markb-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> Verified again and NETDEV_UNREGISTER is called correctly. Debug
> files are removed as expected. Thanks.
> - Fix compile warning when CONFIG_INFINIBAND_IPOIB_DEBUG is not set
> ---
>
> ---
> drivers/infiniband/ulp/ipoib/ipoib.h | 1 +
> drivers/infiniband/ulp/ipoib/ipoib_fs.c | 28 +++++++++++++++++++
> drivers/infiniband/ulp/ipoib/ipoib_main.c | 41 ++++++++++++++++++++++++++--
> 3 files changed, 67 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
> index da12717..95a457f 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib.h
> +++ b/drivers/infiniband/ulp/ipoib/ipoib.h
> @@ -770,6 +770,7 @@ static inline void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *w
> void ipoib_delete_debug_files(struct net_device *dev);
> int ipoib_register_debugfs(void);
> void ipoib_unregister_debugfs(void);
> +void ipoib_debugfs_rename(struct net_device *dev);
> #else
> static inline void ipoib_create_debug_files(struct net_device *dev) { }
> static inline void ipoib_delete_debug_files(struct net_device *dev) { }
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
> index 6bd5740..d849b1d 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
> @@ -259,6 +259,34 @@ static int ipoib_path_open(struct inode *inode, struct file *file)
> .release = seq_release
> };
>
> +void ipoib_debugfs_rename(struct net_device *dev)
> +{
> + struct ipoib_dev_priv *priv = netdev_priv(dev);
> + char name[IFNAMSIZ + sizeof "_path"];
> +
> + if (!ipoib_root)
> + return;
> +
> + if (priv->mcg_dentry) {
> + snprintf(name, sizeof(name), "%s_mcg", dev->name);
> + priv->mcg_dentry = debugfs_rename(ipoib_root, priv->mcg_dentry,
> + ipoib_root, name);
> + if (!priv->mcg_dentry)
> + ipoib_warn(priv, "failed to rename debug file %s to %s\n",
> + priv->mcg_dentry->d_iname, name);
> + }
> +
> + if (priv->path_dentry) {
> + snprintf(name, sizeof(name), "%s_path", dev->name);
> + priv->path_dentry = debugfs_rename(ipoib_root,
> + priv->path_dentry,
> + ipoib_root, name);
> + if (!priv->path_dentry)
> + ipoib_warn(priv, "failed to rename debug file %s to %s\n",
> + priv->mcg_dentry->d_iname, name);
> + }
> +}
> +
> void ipoib_create_debug_files(struct net_device *dev)
> {
> struct ipoib_dev_priv *priv = netdev_priv(dev);
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> index b58d9dc..c84b8ee 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> @@ -108,6 +108,32 @@ struct ipoib_path_iter {
> .get_net_dev_by_params = ipoib_get_net_dev_by_params,
> };
>
> +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
> +static int ipoib_netdev_event(struct notifier_block *this,
> + unsigned long event, void *ptr)
> +{
> + struct netdev_notifier_info *ni = ptr;
> + struct net_device *dev = ni->dev;
> +
> + if (dev->netdev_ops->ndo_open != ipoib_open)
> + return NOTIFY_DONE;
> +
> + switch (event) {
> + case NETDEV_REGISTER:
> + ipoib_create_debug_files(dev);
> + break;
> + case NETDEV_CHANGENAME:
> + ipoib_debugfs_rename(dev);
Why do we need explicit ipoib_debugfs_rename function?
Will it work by simply calling ipoib_delete_debug_files
and immediately after that ipoib_create_debug_files?
> + break;
> + case NETDEV_UNREGISTER:
> + ipoib_delete_debug_files(dev);
> + break;
> + }
> +
> + return NOTIFY_DONE;
> +}
> +#endif
> +
> int ipoib_open(struct net_device *dev)
> {
> struct ipoib_dev_priv *priv = netdev_priv(dev);
> @@ -2072,8 +2098,6 @@ int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
> goto register_failed;
> }
>
> - ipoib_create_debug_files(priv->dev);
> -
> if (ipoib_cm_add_mode_attr(priv->dev))
> goto sysfs_failed;
> if (ipoib_add_pkey_attr(priv->dev))
> @@ -2088,7 +2112,6 @@ int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
> return priv->dev;
>
> sysfs_failed:
> - ipoib_delete_debug_files(priv->dev);
> unregister_netdev(priv->dev);
>
> register_failed:
> @@ -2173,6 +2196,12 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
> kfree(dev_list);
> }
>
> +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
> +static struct notifier_block ipoib_netdev_notifier = {
> + .notifier_call = ipoib_netdev_event,
> +};
> +#endif
> +
> static int __init ipoib_init_module(void)
> {
> int ret;
> @@ -2225,6 +2254,9 @@ static int __init ipoib_init_module(void)
> if (ret)
> goto err_client;
>
> +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
> + register_netdevice_notifier(&ipoib_netdev_notifier);
> +#endif
> return 0;
>
> err_client:
> @@ -2242,6 +2274,9 @@ static int __init ipoib_init_module(void)
>
> static void __exit ipoib_cleanup_module(void)
> {
> +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
> + unregister_netdevice_notifier(&ipoib_netdev_notifier);
> +#endif
> ipoib_netlink_fini();
> ib_unregister_client(&ipoib_client);
> ib_sa_unregister_client(&ipoib_sa_client);
> --
> 1.7.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v3] IB/IPoIB: ibX: failed to create mcg debug file
[not found] ` <4058624b-a947-9635-76ca-482fd6a6bd95-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
@ 2017-03-27 20:11 ` Shamir Rabinovitch
[not found] ` <20170327201156.GA29831-t9juWtktDCT52KUv/Ok+f8QLKKaP9WJ9VpNB7YpNyf8@public.gmane.org>
0 siblings, 1 reply; 13+ messages in thread
From: Shamir Rabinovitch @ 2017-03-27 20:11 UTC (permalink / raw)
To: Mark Bloch
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA,
dledford-H+wXaHxf7aLQT0dZR+AlfA,
vijay.ac.kumar-QHcLZuEGTsvQT0dZR+AlfA,
shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA
[-- Attachment #1: Type: text/plain, Size: 1692 bytes --]
On Mon, Mar 27, 2017 at 06:06:42PM +0300, Mark Bloch wrote:
> Hi Shamir,
>
> Like I've said in v1 of this patch, I believe we are calling ipoib_delete_debug_files too many times.
> We are calling in unconditionally in: ipoib_dev_cleanup
> and also in ipoib_netdev_event when we get an NETDEV_UNREGISTER event.
>
> For example, I have a setup with ConnectX-4 dual port configured to be in IB mode.
> So I have two ipoib interfaces (ib0, ib1)
>
> When I load and unload mlx5_ib (while ib_ipoib is loaded:
>
> root@dev-r-vrt-175 tools]# ./funccount.py 'ipoib_*_debug_files'
> Tracing 2 functions for "ipoib_*_debug_files"... Hit Ctrl-C to end.
> ^C
> FUNC COUNT
> ipoib_create_debug_files 2
> ipoib_delete_debug_files 4
> Detaching...
>
> Why not just remove the call in ipoib_dev_cleanup?
>
Hi Mark,
v3 of this patch works fine on system that has CX3 with 2 ports and the
below udev rules:
# InfiniBand: Mellanox Technologies MT27500 Family [ConnectX-3]
SUBSYSTEM=="net", ACTION=="add", DRIVERS=="mlx4_core", BUS=="pci",
ID=="0002:01:00.0", ATTR{dev_id}=="0x0", KERNEL=="ib*", NAME="ib1"
SUBSYSTEM=="net", ACTION=="add", DRIVERS=="mlx4_core", BUS=="pci",
ID=="0002:01:00.0", ATTR{dev_id}=="0x1", KERNEL=="ib*", NAME="ib0"
On this system, the udev rules rename ib0-ib1 & ib1->ib0 causing small
chaos in the ipoib device names.
The attached logs include the information collected when the openibd
service was started and when it was stopped. You can have a look in the
files and see what are the network events and how they are processed by
the ipoib devices.
I think it will answer your concerns.
BR, Shamir
[-- Attachment #2: openibd.start --]
[-- Type: text/plain, Size: 4495 bytes --]
mlx4_core: unknown parameter 'module_unload_allowed' ignored
mlx4_core: Mellanox ConnectX core driver v2.2-1 (Feb, 2014)
mlx4_core: Initializing 0002:01:00.0
PCI: Enabling device: (0002:01:00.0), cmd 2
mlx4_core 0002:01:00.0: Old device ETS support detected
mlx4_core 0002:01:00.0: Consider upgrading device FW.
mlx4_core 0002:01:00.0: PCIe link speed is 8.0GT/s, device supports 8.0GT/s
mlx4_core 0002:01:00.0: PCIe link width is x8, device supports x8
mlx4_core: Initializing 0006:01:00.0
PCI: Enabling device: (0006:01:00.0), cmd 2
mlx4_core 0006:01:00.0: Old device ETS support detected
mlx4_core 0006:01:00.0: Consider upgrading device FW.
mlx4_core 0006:01:00.0: PCIe link speed is 8.0GT/s, device supports 8.0GT/s
mlx4_core 0006:01:00.0: PCIe link width is x8, device supports x8
<mlx4_ib> mlx4_ib_add: mlx4_ib: Mellanox ConnectX InfiniBand driver v2.2-1 (Feb 2014)
<mlx4_ib> mlx4_ib_add: counter index 0 for port 1 allocated 0
<mlx4_ib> mlx4_ib_add: counter index 1 for port 2 allocated 0
<mlx4_ib> mlx4_ib_add: counter index 0 for port 1 allocated 0
<mlx4_ib> mlx4_ib_add: counter index 1 for port 2 allocated 0
ib_ipoib: unknown parameter 'module_unload_allowed' ignored
ipoib_netdev_event: dev fff8001f59984000 name ib0 event 0x5
ipoib_netdev_event: dev fff8001f568b4000 name ib1 event 0x5
ipoib_netdev_event: dev fff8001f57b4a000 name ib2 event 0x5
ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0x5
mlx4_core 0002:01:00.0 rename57: renamed from ib1
ipoib_netdev_event: dev fff8001f568b4000 name rename57 event 0xa
mlx4_core 0002:01:00.0 rename56: renamed from ib0
ipoib_netdev_event: dev fff8001f59984000 name rename56 event 0xa
mlx4_core 0002:01:00.0 ib0: renamed from rename57
ipoib_netdev_event: dev fff8001f568b4000 name ib0 event 0xa
mlx4_core 0002:01:00.0 ib1: renamed from rename56
ipoib_netdev_event: dev fff8001f59984000 name ib1 event 0xa
ipoib_netdev_event: dev fff8001f57b4a000 name ib2 event 0x17
ipoib_netdev_event: dev fff8001f57b4a000 name ib2 event 0x7
ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0x17
ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0x7
ipoib_netdev_event: dev fff8001f57b4a000 name ib2 event 0xd
IPv6: ADDRCONF(NETDEV_UP): ib2: link is not ready
ipoib_netdev_event: dev fff8001f57b4a000 name ib2 event 0x1
ipoib_netdev_event: dev fff8001f57b4a000 name ib2 event 0x4
ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0xd
IPv6: ADDRCONF(NETDEV_UP): ib3: link is not ready
ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0x1
ipoib_netdev_event: dev fff8001f59984000 name ib1 event 0x17
ipoib_netdev_event: dev fff8001f59984000 name ib1 event 0x7
ipoib_netdev_event: dev fff8001f568b4000 name ib0 event 0x17
ipoib_netdev_event: dev fff8001f568b4000 name ib0 event 0x7
ipoib_netdev_event: dev fff8001f59984000 name ib1 event 0xd
IPv6: ADDRCONF(NETDEV_UP): ib1: link is not ready
ipoib_netdev_event: dev fff8001f59984000 name ib1 event 0x1
ipoib_netdev_event: dev fff8001f568b4000 name ib0 event 0xd
IPv6: ADDRCONF(NETDEV_UP): ib0: link is not ready
ipoib_netdev_event: dev fff8001f568b4000 name ib0 event 0x1
ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0x4
ipoib_netdev_event: dev fff8001f59984000 name ib1 event 0x4
ipoib_netdev_event: dev fff8001f568b4000 name ib0 event 0x4
ipoib_netdev_event: dev fff8001f568b4000 name ib0 event 0x4
ipoib_netdev_event: dev fff8001f59984000 name ib1 event 0x4
ipoib_netdev_event: dev fff8001f57b4a000 name ib2 event 0x4
ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0x4
Kernel unaligned access at TPC[107ea098] ipoib_dev_addr_changed_valid+0x58/0x1c0 [ib_ipoib]
Kernel unaligned access at TPC[107ea098] ipoib_dev_addr_changed_valid+0x58/0x1c0 [ib_ipoib]
Kernel unaligned access at TPC[107ea098] ipoib_dev_addr_changed_valid+0x58/0x1c0 [ib_ipoib]
Kernel unaligned access at TPC[107ea098] ipoib_dev_addr_changed_valid+0x58/0x1c0 [ib_ipoib]
IPv6: ADDRCONF(NETDEV_CHANGE): ib3: link becomes ready
ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0x4
Kernel unaligned access at TPC[107ea098] ipoib_dev_addr_changed_valid+0x58/0x1c0 [ib_ipoib]
IPv6: ADDRCONF(NETDEV_CHANGE): ib1: link becomes ready
ipoib_netdev_event: dev fff8001f59984000 name ib1 event 0x4
IPv6: ADDRCONF(NETDEV_CHANGE): ib0: link becomes ready
ipoib_netdev_event: dev fff8001f568b4000 name ib0 event 0x4
IPv6: ADDRCONF(NETDEV_CHANGE): ib2: link becomes ready
ipoib_netdev_event: dev fff8001f57b4a000 name ib2 event 0x4
ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0x4
[-- Attachment #3: openibd.stop --]
[-- Type: text/plain, Size: 720 bytes --]
ipoib_netdev_event: dev fff8001f59984000 name ib1 event 0x9
ipoib_netdev_event: dev fff8001f59984000 name ib1 event 0x2
ipoib_netdev_event: dev fff8001f568b4000 name ib0 event 0x9
ipoib_netdev_event: dev fff8001f568b4000 name ib0 event 0x2
ipoib_netdev_event: dev fff8001f57b4a000 name ib2 event 0x9
ipoib_netdev_event: dev fff8001f57b4a000 name ib2 event 0x2
ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0x9
ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0x2
ipoib_netdev_event: dev fff8001f59984000 name ib1 event 0x6
ipoib_netdev_event: dev fff8001f568b4000 name ib0 event 0x6
ipoib_netdev_event: dev fff8001f57b4a000 name ib2 event 0x6
ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0x6
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v3] IB/IPoIB: ibX: failed to create mcg debug file
[not found] ` <20170327195500.GH20443-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
@ 2017-03-27 20:17 ` Shamir Rabinovitch
[not found] ` <20170327201714.GB29831-t9juWtktDCT52KUv/Ok+f8QLKKaP9WJ9VpNB7YpNyf8@public.gmane.org>
0 siblings, 1 reply; 13+ messages in thread
From: Shamir Rabinovitch @ 2017-03-27 20:17 UTC (permalink / raw)
To: Leon Romanovsky
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA,
dledford-H+wXaHxf7aLQT0dZR+AlfA,
vijay.ac.kumar-QHcLZuEGTsvQT0dZR+AlfA
On Mon, Mar 27, 2017 at 10:55:00PM +0300, Leon Romanovsky wrote:
> On Mon, Mar 27, 2017 at 03:18:59AM -0400, Shamir Rabinovitch wrote:
> > When udev renames the netdev devices, ipoib debugfs entries does not
> > get renamed. As a result, if subsequent probe of ipoib device reuse the
> > name then creating a debugfs entry for the new device would fail.
> >
> > Also, moved ipoib_create_debug_files and ipoib_delete_debug_files as part
> > of ipoib event handling in order to avoid any race condition between these.
> >
> > Signed-off-by: Vijay Kumar <vijay.ac.kumar-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> > Signed-off-by: Shamir Rabinovitch <shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> >
> > ---
> > v2->v3:
> > - Move rev change out of commit message
> > - Fix typo
> > v1->v2:
> > - Review comment from Mark Bloch <markb-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> > Verified again and NETDEV_UNREGISTER is called correctly. Debug
> > files are removed as expected. Thanks.
> > - Fix compile warning when CONFIG_INFINIBAND_IPOIB_DEBUG is not set
> > ---
> >
[...]
> >
> > +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
> > +static int ipoib_netdev_event(struct notifier_block *this,
> > + unsigned long event, void *ptr)
> > +{
> > + struct netdev_notifier_info *ni = ptr;
> > + struct net_device *dev = ni->dev;
> > +
> > + if (dev->netdev_ops->ndo_open != ipoib_open)
> > + return NOTIFY_DONE;
> > +
> > + switch (event) {
> > + case NETDEV_REGISTER:
> > + ipoib_create_debug_files(dev);
> > + break;
> > + case NETDEV_CHANGENAME:
> > + ipoib_debugfs_rename(dev);
>
> Why do we need explicit ipoib_debugfs_rename function?
> Will it work by simply calling ipoib_delete_debug_files
> and immediately after that ipoib_create_debug_files?
>
>
> > + break;
> > + case NETDEV_UNREGISTER:
> > + ipoib_delete_debug_files(dev);
> > + break;
> > + }
> > +
> > + return NOTIFY_DONE;
> > +}
> > +#endif
> > +
Hi Leon,
Good point.
I will have look on this idea and get back to you.
BR, Shamir
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v3] IB/IPoIB: ibX: failed to create mcg debug file
[not found] ` <20170327201714.GB29831-t9juWtktDCT52KUv/Ok+f8QLKKaP9WJ9VpNB7YpNyf8@public.gmane.org>
@ 2017-03-28 9:19 ` Shamir Rabinovitch
[not found] ` <20170328091940.GA14058-t9juWtktDCT52KUv/Ok+f8QLKKaP9WJ9VpNB7YpNyf8@public.gmane.org>
0 siblings, 1 reply; 13+ messages in thread
From: Shamir Rabinovitch @ 2017-03-28 9:19 UTC (permalink / raw)
To: Leon Romanovsky
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA,
dledford-H+wXaHxf7aLQT0dZR+AlfA,
vijay.ac.kumar-QHcLZuEGTsvQT0dZR+AlfA
On Mon, Mar 27, 2017 at 11:17:14PM +0300, Shamir Rabinovitch wrote:
>
> > >
> > > +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
> > > +static int ipoib_netdev_event(struct notifier_block *this,
> > > + unsigned long event, void *ptr)
> > > +{
> > > + struct netdev_notifier_info *ni = ptr;
> > > + struct net_device *dev = ni->dev;
> > > +
> > > + if (dev->netdev_ops->ndo_open != ipoib_open)
> > > + return NOTIFY_DONE;
> > > +
> > > + switch (event) {
> > > + case NETDEV_REGISTER:
> > > + ipoib_create_debug_files(dev);
> > > + break;
> > > + case NETDEV_CHANGENAME:
> > > + ipoib_debugfs_rename(dev);
> >
> > Why do we need explicit ipoib_debugfs_rename function?
> > Will it work by simply calling ipoib_delete_debug_files
> > and immediately after that ipoib_create_debug_files?
> >
> >
> > > + break;
> > > + case NETDEV_UNREGISTER:
> > > + ipoib_delete_debug_files(dev);
> > > + break;
> > > + }
> > > +
> > > + return NOTIFY_DONE;
> > > +}
> > > +#endif
> > > +
>
> Hi Leon,
>
> Good point.
> I will have look on this idea and get back to you.
>
> BR, Shamir
Hi Leon,
It seems that the difference between using debugfs_rename and a
combination of debugfs_create_file and debugfs_remove is mainly the
atomicy of the rename operation with regard to the file system.
debugfs_rename is atomic operation while the above combination is not.
As result I see kernel panic when the rename operation interleave with
the delete due to module unload. So after carefully considering what you
suggest I think it might introduce unexpected issues.
BR, Shamir
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v3] IB/IPoIB: ibX: failed to create mcg debug file
[not found] ` <20170327201156.GA29831-t9juWtktDCT52KUv/Ok+f8QLKKaP9WJ9VpNB7YpNyf8@public.gmane.org>
@ 2017-03-28 15:45 ` Mark Bloch
[not found] ` <a141c94a-63b7-491c-ce42-9b8cb08aeb93-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
0 siblings, 1 reply; 13+ messages in thread
From: Mark Bloch @ 2017-03-28 15:45 UTC (permalink / raw)
To: Shamir Rabinovitch
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA,
dledford-H+wXaHxf7aLQT0dZR+AlfA,
vijay.ac.kumar-QHcLZuEGTsvQT0dZR+AlfA
Hi Shamir,
On 27/03/2017 23:11, Shamir Rabinovitch wrote:
> On Mon, Mar 27, 2017 at 06:06:42PM +0300, Mark Bloch wrote:
>> Hi Shamir,
>>
>> Like I've said in v1 of this patch, I believe we are calling ipoib_delete_debug_files too many times.
>> We are calling in unconditionally in: ipoib_dev_cleanup
>> and also in ipoib_netdev_event when we get an NETDEV_UNREGISTER event.
>>
>> For example, I have a setup with ConnectX-4 dual port configured to be in IB mode.
>> So I have two ipoib interfaces (ib0, ib1)
>>
>> When I load and unload mlx5_ib (while ib_ipoib is loaded:
>>
>> root@dev-r-vrt-175 tools]# ./funccount.py 'ipoib_*_debug_files'
>> Tracing 2 functions for "ipoib_*_debug_files"... Hit Ctrl-C to end.
>> ^C
>> FUNC COUNT
>> ipoib_create_debug_files 2
>> ipoib_delete_debug_files 4
>> Detaching...
>>
>> Why not just remove the call in ipoib_dev_cleanup?
>>
>
> Hi Mark,
>
> v3 of this patch works fine on system that has CX3 with 2 ports and the
> below udev rules:
>
> # InfiniBand: Mellanox Technologies MT27500 Family [ConnectX-3]
> SUBSYSTEM=="net", ACTION=="add", DRIVERS=="mlx4_core", BUS=="pci",
> ID=="0002:01:00.0", ATTR{dev_id}=="0x0", KERNEL=="ib*", NAME="ib1"
> SUBSYSTEM=="net", ACTION=="add", DRIVERS=="mlx4_core", BUS=="pci",
> ID=="0002:01:00.0", ATTR{dev_id}=="0x1", KERNEL=="ib*", NAME="ib0"
>
> On this system, the udev rules rename ib0-ib1 & ib1->ib0 causing small
> chaos in the ipoib device names.
>
> The attached logs include the information collected when the openibd
> service was started and when it was stopped. You can have a look in the
> files and see what are the network events and how they are processed by
> the ipoib devices.
>
> I think it will answer your concerns.
>
> BR, Shamir
>
I'm not saying it doesn't work, I'm saying works != works correctly.
We are calling ipoib_delete_debug_file too many times, it works by luck/chance.
While testing the patch, I've encountered another issue, running:
modprobe ib_ipoib
echo "0x0043" > /sys/class/net/ib0/create_child
modprobe -r ib_ipoib
and then looking the at the debugfs dir:
[root@dev-r-vrt-175 ~]# ls /sys/kernel/debug/ipoib/
ib0.8043_mcg ib0.8043_pat1
As you can see the the debugfs entries for the ib0 child weren't removed.
Also notice that after that, I can't load ib_ipoib
[root@dev-r-vrt-175 ~]# modprobe ib_ipoib
modprobe: ERROR: could not insert 'ib_ipoib': Cannot allocate memory
The more interesting issue is, dmesg output has this:
[ 467.185609] ib0.8043: failed to create mcg debug file
[ 467.192551] ib0.8043: failed to create path debug file
so maybe this is a debugfs bug?
Sorry I can't look into it, I have some internal stuff I need to work on :/
Mark.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v3] IB/IPoIB: ibX: failed to create mcg debug file
[not found] ` <a141c94a-63b7-491c-ce42-9b8cb08aeb93-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
@ 2017-03-28 16:02 ` Shamir Rabinovitch
[not found] ` <20170328160251.GA26781-t9juWtktDCT52KUv/Ok+f8QLKKaP9WJ9VpNB7YpNyf8@public.gmane.org>
0 siblings, 1 reply; 13+ messages in thread
From: Shamir Rabinovitch @ 2017-03-28 16:02 UTC (permalink / raw)
To: Mark Bloch
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA,
dledford-H+wXaHxf7aLQT0dZR+AlfA,
vijay.ac.kumar-QHcLZuEGTsvQT0dZR+AlfA
On Tue, Mar 28, 2017 at 06:45:44PM +0300, Mark Bloch wrote:
> >
> > Hi Mark,
> >
> > v3 of this patch works fine on system that has CX3 with 2 ports and the
> > below udev rules:
> >
> > # InfiniBand: Mellanox Technologies MT27500 Family [ConnectX-3]
> > SUBSYSTEM=="net", ACTION=="add", DRIVERS=="mlx4_core", BUS=="pci",
> > ID=="0002:01:00.0", ATTR{dev_id}=="0x0", KERNEL=="ib*", NAME="ib1"
> > SUBSYSTEM=="net", ACTION=="add", DRIVERS=="mlx4_core", BUS=="pci",
> > ID=="0002:01:00.0", ATTR{dev_id}=="0x1", KERNEL=="ib*", NAME="ib0"
> >
> > On this system, the udev rules rename ib0-ib1 & ib1->ib0 causing small
> > chaos in the ipoib device names.
> >
> > The attached logs include the information collected when the openibd
> > service was started and when it was stopped. You can have a look in the
> > files and see what are the network events and how they are processed by
> > the ipoib devices.
> >
> > I think it will answer your concerns.
> >
> > BR, Shamir
> >
>
> I'm not saying it doesn't work, I'm saying works != works correctly.
> We are calling ipoib_delete_debug_file too many times, it works by luck/chance.
>
> While testing the patch, I've encountered another issue, running:
>
> modprobe ib_ipoib
> echo "0x0043" > /sys/class/net/ib0/create_child
> modprobe -r ib_ipoib
>
> and then looking the at the debugfs dir:
> [root@dev-r-vrt-175 ~]# ls /sys/kernel/debug/ipoib/
> ib0.8043_mcg ib0.8043_pat1
>
> As you can see the the debugfs entries for the ib0 child weren't removed.
> Also notice that after that, I can't load ib_ipoib
> [root@dev-r-vrt-175 ~]# modprobe ib_ipoib
> modprobe: ERROR: could not insert 'ib_ipoib': Cannot allocate memory
>
> The more interesting issue is, dmesg output has this:
> [ 467.185609] ib0.8043: failed to create mcg debug file
> [ 467.192551] ib0.8043: failed to create path debug file
>
> so maybe this is a debugfs bug?
>
> Sorry I can't look into it, I have some internal stuff I need to work on :/
>
> Mark.
>
Hi Mark,
I am confused. Have you used v3 of the patch? If yes please add this
print after you apply the patch and send me the output when you stop the
openibd service:
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c
b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index c84b8ee..a2f43ff 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -118,12 +118,17 @@ static int ipoib_netdev_event(struct
notifier_block *this,
if (dev->netdev_ops->ndo_open != ipoib_open)
return NOTIFY_DONE;
+ pr_err("%s: dev %p name %s event 0x%lx\n",
+ __func__, dev, dev->name, event);
+
switch (event) {
case NETDEV_REGISTER:
ipoib_create_debug_files(dev);
break;
My output show this:
ipoib_netdev_event: dev fff8001f59984000 name ib1 event 0x9
ipoib_netdev_event: dev fff8001f59984000 name ib1 event 0x2
ipoib_netdev_event: dev fff8001f568b4000 name ib0 event 0x9
ipoib_netdev_event: dev fff8001f568b4000 name ib0 event 0x2
ipoib_netdev_event: dev fff8001f57b4a000 name ib2 event 0x9
ipoib_netdev_event: dev fff8001f57b4a000 name ib2 event 0x2
ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0x9
ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0x2
ipoib_netdev_event: dev fff8001f59984000 name ib1 event 0x6 <--
NETDEV_UNREGISTER { here we delete the debugfs entries }
ipoib_netdev_event: dev fff8001f568b4000 name ib0 event 0x6
ipoib_netdev_event: dev fff8001f57b4a000 name ib2 event 0x6
ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0x6
So the 4 ports I have are closed only once. Hence no double free.
I am not sure why you see the double free. Please double check your
findings.
I am using the 4.9.9 upstream kernel because the commit "Merge tag
'for-next-dma_ops' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma"
cause MAD DMA mapping kernel panic on SPARC T7.
BR, Shamir
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH v3] IB/IPoIB: ibX: failed to create mcg debug file
[not found] ` <20170328160251.GA26781-t9juWtktDCT52KUv/Ok+f8QLKKaP9WJ9VpNB7YpNyf8@public.gmane.org>
@ 2017-03-28 16:49 ` Mark Bloch
[not found] ` <e3a49c96-b248-7491-301d-faa47109aa41-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
0 siblings, 1 reply; 13+ messages in thread
From: Mark Bloch @ 2017-03-28 16:49 UTC (permalink / raw)
To: Shamir Rabinovitch
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA,
dledford-H+wXaHxf7aLQT0dZR+AlfA,
vijay.ac.kumar-QHcLZuEGTsvQT0dZR+AlfA
On 28/03/2017 19:02, Shamir Rabinovitch wrote:
> On Tue, Mar 28, 2017 at 06:45:44PM +0300, Mark Bloch wrote:
>>>
>>> Hi Mark,
>>>
>>> v3 of this patch works fine on system that has CX3 with 2 ports and the
>>> below udev rules:
>>>
>>> # InfiniBand: Mellanox Technologies MT27500 Family [ConnectX-3]
>>> SUBSYSTEM=="net", ACTION=="add", DRIVERS=="mlx4_core", BUS=="pci",
>>> ID=="0002:01:00.0", ATTR{dev_id}=="0x0", KERNEL=="ib*", NAME="ib1"
>>> SUBSYSTEM=="net", ACTION=="add", DRIVERS=="mlx4_core", BUS=="pci",
>>> ID=="0002:01:00.0", ATTR{dev_id}=="0x1", KERNEL=="ib*", NAME="ib0"
>>>
>>> On this system, the udev rules rename ib0-ib1 & ib1->ib0 causing small
>>> chaos in the ipoib device names.
>>>
>>> The attached logs include the information collected when the openibd
>>> service was started and when it was stopped. You can have a look in the
>>> files and see what are the network events and how they are processed by
>>> the ipoib devices.
>>>
>>> I think it will answer your concerns.
>>>
>>> BR, Shamir
>>>
>>
>> I'm not saying it doesn't work, I'm saying works != works correctly.
>> We are calling ipoib_delete_debug_file too many times, it works by luck/chance.
>>
>> While testing the patch, I've encountered another issue, running:
>>
>> modprobe ib_ipoib
>> echo "0x0043" > /sys/class/net/ib0/create_child
>> modprobe -r ib_ipoib
>>
>> and then looking the at the debugfs dir:
>> [root@dev-r-vrt-175 ~]# ls /sys/kernel/debug/ipoib/
>> ib0.8043_mcg ib0.8043_pat1
>>
>> As you can see the the debugfs entries for the ib0 child weren't removed.
>> Also notice that after that, I can't load ib_ipoib
>> [root@dev-r-vrt-175 ~]# modprobe ib_ipoib
>> modprobe: ERROR: could not insert 'ib_ipoib': Cannot allocate memory
>>
>> The more interesting issue is, dmesg output has this:
>> [ 467.185609] ib0.8043: failed to create mcg debug file
>> [ 467.192551] ib0.8043: failed to create path debug file
>>
>> so maybe this is a debugfs bug?
>>
>> Sorry I can't look into it, I have some internal stuff I need to work on :/
>>
>> Mark.
>>
>
> Hi Mark,
>
> I am confused. Have you used v3 of the patch? If yes please add this
> print after you apply the patch and send me the output when you stop the
> openibd service:
>
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c
> b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> index c84b8ee..a2f43ff 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> @@ -118,12 +118,17 @@ static int ipoib_netdev_event(struct
> notifier_block *this,
> if (dev->netdev_ops->ndo_open != ipoib_open)
> return NOTIFY_DONE;
>
> + pr_err("%s: dev %p name %s event 0x%lx\n",
> + __func__, dev, dev->name, event);
> +
> switch (event) {
> case NETDEV_REGISTER:
> ipoib_create_debug_files(dev);
> break;
>
> My output show this:
>
> ipoib_netdev_event: dev fff8001f59984000 name ib1 event 0x9
> ipoib_netdev_event: dev fff8001f59984000 name ib1 event 0x2
> ipoib_netdev_event: dev fff8001f568b4000 name ib0 event 0x9
> ipoib_netdev_event: dev fff8001f568b4000 name ib0 event 0x2
> ipoib_netdev_event: dev fff8001f57b4a000 name ib2 event 0x9
> ipoib_netdev_event: dev fff8001f57b4a000 name ib2 event 0x2
> ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0x9
> ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0x2
> ipoib_netdev_event: dev fff8001f59984000 name ib1 event 0x6 <--
> NETDEV_UNREGISTER { here we delete the debugfs entries }
> ipoib_netdev_event: dev fff8001f568b4000 name ib0 event 0x6
> ipoib_netdev_event: dev fff8001f57b4a000 name ib2 event 0x6
> ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0x6
>
> So the 4 ports I have are closed only once. Hence no double free.
> I am not sure why you see the double free. Please double check your
> findings.
>
I agree that we call ipoib_delete_debug_files() from ipoib_netdev_event()
the right number of times, but we also call it in ipoib_dev_cleanup()
which means we call it too many times. Your V3 didn't remove that call.
> I am using the 4.9.9 upstream kernel because the commit "Merge tag
> 'for-next-dma_ops' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma"
> cause MAD DMA mapping kernel panic on SPARC T7.
>
> BR, Shamir
>
Mark.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v3] IB/IPoIB: ibX: failed to create mcg debug file
[not found] ` <20170328091940.GA14058-t9juWtktDCT52KUv/Ok+f8QLKKaP9WJ9VpNB7YpNyf8@public.gmane.org>
@ 2017-03-28 17:05 ` Leon Romanovsky
[not found] ` <20170328170506.GI20443-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
0 siblings, 1 reply; 13+ messages in thread
From: Leon Romanovsky @ 2017-03-28 17:05 UTC (permalink / raw)
To: Shamir Rabinovitch
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA,
dledford-H+wXaHxf7aLQT0dZR+AlfA,
vijay.ac.kumar-QHcLZuEGTsvQT0dZR+AlfA
[-- Attachment #1: Type: text/plain, Size: 1901 bytes --]
On Tue, Mar 28, 2017 at 12:19:41PM +0300, Shamir Rabinovitch wrote:
> On Mon, Mar 27, 2017 at 11:17:14PM +0300, Shamir Rabinovitch wrote:
> >
> > > >
> > > > +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
> > > > +static int ipoib_netdev_event(struct notifier_block *this,
> > > > + unsigned long event, void *ptr)
> > > > +{
> > > > + struct netdev_notifier_info *ni = ptr;
> > > > + struct net_device *dev = ni->dev;
> > > > +
> > > > + if (dev->netdev_ops->ndo_open != ipoib_open)
> > > > + return NOTIFY_DONE;
> > > > +
> > > > + switch (event) {
> > > > + case NETDEV_REGISTER:
> > > > + ipoib_create_debug_files(dev);
> > > > + break;
> > > > + case NETDEV_CHANGENAME:
> > > > + ipoib_debugfs_rename(dev);
> > >
> > > Why do we need explicit ipoib_debugfs_rename function?
> > > Will it work by simply calling ipoib_delete_debug_files
> > > and immediately after that ipoib_create_debug_files?
> > >
> > >
> > > > + break;
> > > > + case NETDEV_UNREGISTER:
> > > > + ipoib_delete_debug_files(dev);
> > > > + break;
> > > > + }
> > > > +
> > > > + return NOTIFY_DONE;
> > > > +}
> > > > +#endif
> > > > +
> >
> > Hi Leon,
> >
> > Good point.
> > I will have look on this idea and get back to you.
> >
> > BR, Shamir
>
> Hi Leon,
>
> It seems that the difference between using debugfs_rename and a
> combination of debugfs_create_file and debugfs_remove is mainly the
> atomicy of the rename operation with regard to the file system.
> debugfs_rename is atomic operation while the above combination is not.
> As result I see kernel panic when the rename operation interleave with
> the delete due to module unload. So after carefully considering what you
> suggest I think it might introduce unexpected issues.
Thank you for checking it.
There is one more thing which I noticed, you should check the return
values of debugfs_remove and destroy debugfs in case of failures.
>
> BR, Shamir
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v3] IB/IPoIB: ibX: failed to create mcg debug file
[not found] ` <e3a49c96-b248-7491-301d-faa47109aa41-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
@ 2017-03-28 17:52 ` Shamir Rabinovitch
0 siblings, 0 replies; 13+ messages in thread
From: Shamir Rabinovitch @ 2017-03-28 17:52 UTC (permalink / raw)
To: Mark Bloch
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA,
dledford-H+wXaHxf7aLQT0dZR+AlfA,
vijay.ac.kumar-QHcLZuEGTsvQT0dZR+AlfA
On Tue, Mar 28, 2017 at 07:49:08PM +0300, Mark Bloch wrote:
> I agree that we call ipoib_delete_debug_files() from ipoib_netdev_event()
> the right number of times, but we also call it in ipoib_dev_cleanup()
> which means we call it too many times. Your V3 didn't remove that call.
>
>
> Mark.
Thanks for the review Mark. Yes you are correct. Patch need fix.
BR, Shamir
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v3] IB/IPoIB: ibX: failed to create mcg debug file
[not found] ` <20170328170506.GI20443-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
@ 2017-03-29 10:20 ` Shamir Rabinovitch
[not found] ` <20170329102019.GA19012-t9juWtktDCT52KUv/Ok+f8QLKKaP9WJ9VpNB7YpNyf8@public.gmane.org>
0 siblings, 1 reply; 13+ messages in thread
From: Shamir Rabinovitch @ 2017-03-29 10:20 UTC (permalink / raw)
To: Leon Romanovsky
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA,
dledford-H+wXaHxf7aLQT0dZR+AlfA,
vijay.ac.kumar-QHcLZuEGTsvQT0dZR+AlfA
On Tue, Mar 28, 2017 at 08:05:06PM +0300, Leon Romanovsky wrote:
>
> Thank you for checking it.
>
> There is one more thing which I noticed, you should check the return
> values of debugfs_remove and destroy debugfs in case of failures.
>
Hi Leon,
'debugfs_remove' prototype is "static inline void
debugfs_remove(struct dentry *dentry)".
If you typed the wrong function please let me know.
BR, Shamir
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v3] IB/IPoIB: ibX: failed to create mcg debug file
[not found] ` <20170329102019.GA19012-t9juWtktDCT52KUv/Ok+f8QLKKaP9WJ9VpNB7YpNyf8@public.gmane.org>
@ 2017-03-29 11:21 ` Leon Romanovsky
0 siblings, 0 replies; 13+ messages in thread
From: Leon Romanovsky @ 2017-03-29 11:21 UTC (permalink / raw)
To: Shamir Rabinovitch
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA,
dledford-H+wXaHxf7aLQT0dZR+AlfA,
vijay.ac.kumar-QHcLZuEGTsvQT0dZR+AlfA
[-- Attachment #1: Type: text/plain, Size: 930 bytes --]
On Wed, Mar 29, 2017 at 01:20:20PM +0300, Shamir Rabinovitch wrote:
> On Tue, Mar 28, 2017 at 08:05:06PM +0300, Leon Romanovsky wrote:
> >
> > Thank you for checking it.
> >
> > There is one more thing which I noticed, you should check the return
> > values of debugfs_remove and destroy debugfs in case of failures.
> >
>
> Hi Leon,
>
> 'debugfs_remove' prototype is "static inline void
> debugfs_remove(struct dentry *dentry)".
> If you typed the wrong function please let me know.
Sorry, it was my fault, I mistyped and intended to write debugfs_rename
which you used in previous version of your patch.
linux-rdma git:(master) git grep debugfs_rename
....
fs/debugfs/inode.c: * debugfs_rename - rename a file/directory in the debugfs filesystem
fs/debugfs/inode.c:struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
fs/debugfs/inode.c:EXPORT_SYMBOL_GPL(debugfs_rename);
....
>
> BR, Shamir
>
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2017-03-29 11:21 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-27 7:18 [PATCH v3] IB/IPoIB: ibX: failed to create mcg debug file Shamir Rabinovitch
[not found] ` <1490599139-12665-1-git-send-email-shamir.rabinovitch-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2017-03-27 15:06 ` Mark Bloch
[not found] ` <4058624b-a947-9635-76ca-482fd6a6bd95-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2017-03-27 20:11 ` Shamir Rabinovitch
[not found] ` <20170327201156.GA29831-t9juWtktDCT52KUv/Ok+f8QLKKaP9WJ9VpNB7YpNyf8@public.gmane.org>
2017-03-28 15:45 ` Mark Bloch
[not found] ` <a141c94a-63b7-491c-ce42-9b8cb08aeb93-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2017-03-28 16:02 ` Shamir Rabinovitch
[not found] ` <20170328160251.GA26781-t9juWtktDCT52KUv/Ok+f8QLKKaP9WJ9VpNB7YpNyf8@public.gmane.org>
2017-03-28 16:49 ` Mark Bloch
[not found] ` <e3a49c96-b248-7491-301d-faa47109aa41-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2017-03-28 17:52 ` Shamir Rabinovitch
2017-03-27 19:55 ` Leon Romanovsky
[not found] ` <20170327195500.GH20443-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
2017-03-27 20:17 ` Shamir Rabinovitch
[not found] ` <20170327201714.GB29831-t9juWtktDCT52KUv/Ok+f8QLKKaP9WJ9VpNB7YpNyf8@public.gmane.org>
2017-03-28 9:19 ` Shamir Rabinovitch
[not found] ` <20170328091940.GA14058-t9juWtktDCT52KUv/Ok+f8QLKKaP9WJ9VpNB7YpNyf8@public.gmane.org>
2017-03-28 17:05 ` Leon Romanovsky
[not found] ` <20170328170506.GI20443-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
2017-03-29 10:20 ` Shamir Rabinovitch
[not found] ` <20170329102019.GA19012-t9juWtktDCT52KUv/Ok+f8QLKKaP9WJ9VpNB7YpNyf8@public.gmane.org>
2017-03-29 11:21 ` Leon Romanovsky
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.