linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] IB/mad: Use ID allocator routines to allocate agent number
@ 2018-05-29  7:38 Hans Westgaard Ry
  2018-05-29  8:54 ` Leon Romanovsky
                   ` (3 more replies)
  0 siblings, 4 replies; 24+ messages in thread
From: Hans Westgaard Ry @ 2018-05-29  7:38 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe, Hakon Bugge, Jack Morgenstein,
	Daniel Jurgens, Parav Pandit, Pravin Shedge, linux-rdma,
	linux-kernel

The agent TID is a 64 bit value split in two dwords.  The least
significant dword is the TID running counter. The most significant
dword is the agent number. In the CX-3 shared port model, the mlx4
driver uses the most significant byte of the agent number to store the
slave number, making agent numbers greater and equal to 2^24 (3 bytes)
unusable.  The current codebase uses a variable which is incremented
atomically for each new agent number giving too large agent numbers
over time.  The IDA set of functions are used instead of the simple
counter approach. This allows re-use of agent numbers. A sysctl
variable is also introduced, to control the max agent number.

The signature of the bug is a MAD layer that stops working and the
console is flooded with messages like:
 mlx4_ib: egress mad has non-null tid msb:1 class:4 slave:0

Signed-off-by: Hans Westgaard Ry <hans.westgaard.ry@oracle.com>

---
 drivers/infiniband/core/mad.c | 50 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 48 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index b28452a55a08..adce6cd5fc41 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -41,6 +41,8 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/security.h>
+#include <linux/idr.h>
+#include <linux/sysctl.h>
 #include <rdma/ib_cache.h>
 
 #include "mad_priv.h"
@@ -57,9 +59,27 @@ module_param_named(send_queue_size, mad_sendq_size, int, 0444);
 MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests");
 module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
 MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
+/* Sysctl variable to set largest mad agent number */
+static u32 ib_mad_sysctl_min_client_id_max;
+static u32 ib_mad_sysctl_max_client_id_max;
+static u32 ib_mad_sysctl_client_id_max;
+static struct ctl_table_header *ib_mad_sysctl_hdr;
+
+static struct ctl_table ib_mad_sysctl_table[] = {
+	{
+		.procname       = "client_id_max",
+		.data           = &ib_mad_sysctl_client_id_max,
+		.maxlen         = sizeof(ib_mad_sysctl_client_id_max),
+		.mode           = 0644,
+		.proc_handler   = &proc_douintvec_minmax,
+		.extra1         = &ib_mad_sysctl_min_client_id_max,
+		.extra2         = &ib_mad_sysctl_max_client_id_max,
+	},
+	{ }
+};
 
 static struct list_head ib_mad_port_list;
-static atomic_t ib_mad_client_id = ATOMIC_INIT(0);
+DEFINE_IDA(ib_mad_client_ids);
 
 /* Port list lock */
 static DEFINE_SPINLOCK(ib_mad_port_list_lock);
@@ -212,6 +232,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 	int ret2, qpn;
 	unsigned long flags;
 	u8 mgmt_class, vclass;
+	u32 ib_mad_client_id;
 
 	/* Validate parameters */
 	qpn = get_spl_qp_index(qp_type);
@@ -376,8 +397,18 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 		goto error4;
 	}
 
+	ib_mad_client_id = ida_simple_get(&ib_mad_client_ids,
+					  0,
+					  ib_mad_sysctl_client_id_max,
+					  GFP_KERNEL);
+	if (ib_mad_client_id < 0) {
+		pr_err("Couldn't allocate agent tid; errcode: %#x\n",
+		       ib_mad_client_id);
+		ret = ERR_PTR(ib_mad_client_id);
+		goto error4;
+	}
+	mad_agent_priv->agent.hi_tid = ib_mad_client_id;
 	spin_lock_irqsave(&port_priv->reg_lock, flags);
-	mad_agent_priv->agent.hi_tid = atomic_inc_return(&ib_mad_client_id);
 
 	/*
 	 * Make sure MAD registration (if supplied)
@@ -428,6 +459,8 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 error5:
 	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
 	ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
+	ida_simple_remove(&ib_mad_client_ids, ib_mad_client_id);
+
 error4:
 	kfree(reg_req);
 error3:
@@ -588,6 +621,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
 	cancel_delayed_work(&mad_agent_priv->timed_work);
 
 	spin_lock_irqsave(&port_priv->reg_lock, flags);
+	ida_simple_remove(&ib_mad_client_ids, mad_agent_priv->agent.hi_tid);
 	remove_mad_reg_req(mad_agent_priv);
 	list_del(&mad_agent_priv->agent_list);
 	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
@@ -3341,10 +3375,22 @@ int ib_mad_init(void)
 		return -EINVAL;
 	}
 
+	ib_mad_sysctl_min_client_id_max = 1 << 10;
+	ib_mad_sysctl_max_client_id_max = 1 << 23;
+	ib_mad_sysctl_client_id_max     = 1 << 18;
+	ib_mad_sysctl_hdr = register_net_sysctl(&init_net, "net/ibmad",
+						ib_mad_sysctl_table);
+	if (!ib_mad_sysctl_hdr) {
+		pr_err("%s: register_net_sysctl failed\n",  __func__);
+		ib_mad_cleanup();
+		return -EINVAL;
+	}
 	return 0;
 }
 
 void ib_mad_cleanup(void)
 {
 	ib_unregister_client(&mad_client);
+	ida_destroy(&ib_mad_client_ids);
+	unregister_net_sysctl_table(ib_mad_sysctl_hdr);
 }
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH] IB/mad: Use ID allocator routines to allocate agent number
  2018-05-29  7:38 [PATCH] IB/mad: Use ID allocator routines to allocate agent number Hans Westgaard Ry
@ 2018-05-29  8:54 ` Leon Romanovsky
  2018-05-29  9:54   ` Leon Romanovsky
  2018-05-29 15:49 ` Jason Gunthorpe
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 24+ messages in thread
From: Leon Romanovsky @ 2018-05-29  8:54 UTC (permalink / raw)
  To: Hans Westgaard Ry
  Cc: Doug Ledford, Jason Gunthorpe, Hakon Bugge, Jack Morgenstein,
	Daniel Jurgens, Parav Pandit, Pravin Shedge, linux-rdma,
	linux-kernel

[-- Attachment #1: Type: text/plain, Size: 932 bytes --]

On Tue, May 29, 2018 at 09:38:08AM +0200, Hans Westgaard Ry wrote:
> The agent TID is a 64 bit value split in two dwords.  The least
> significant dword is the TID running counter. The most significant
> dword is the agent number. In the CX-3 shared port model, the mlx4
> driver uses the most significant byte of the agent number to store the
> slave number, making agent numbers greater and equal to 2^24 (3 bytes)
> unusable.  The current codebase uses a variable which is incremented
> atomically for each new agent number giving too large agent numbers
> over time.  The IDA set of functions are used instead of the simple
> counter approach. This allows re-use of agent numbers. A sysctl
> variable is also introduced, to control the max agent number.

Why don't you simply limit this number per-driver? By default, any
variable is allowed and mlx4_ib will set something else.

What is the advantage of having sysctl?

Thanks

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] IB/mad: Use ID allocator routines to allocate agent number
  2018-05-29  8:54 ` Leon Romanovsky
@ 2018-05-29  9:54   ` Leon Romanovsky
  2018-05-30  8:18     ` jackm
  0 siblings, 1 reply; 24+ messages in thread
From: Leon Romanovsky @ 2018-05-29  9:54 UTC (permalink / raw)
  To: Hans Westgaard Ry
  Cc: Doug Ledford, Jason Gunthorpe, Hakon Bugge, Jack Morgenstein,
	Daniel Jurgens, Parav Pandit, Pravin Shedge, linux-rdma,
	linux-kernel

[-- Attachment #1: Type: text/plain, Size: 4686 bytes --]

On Tue, May 29, 2018 at 11:54:59AM +0300, Leon Romanovsky wrote:
> On Tue, May 29, 2018 at 09:38:08AM +0200, Hans Westgaard Ry wrote:
> > The agent TID is a 64 bit value split in two dwords.  The least
> > significant dword is the TID running counter. The most significant
> > dword is the agent number. In the CX-3 shared port model, the mlx4
> > driver uses the most significant byte of the agent number to store the
> > slave number, making agent numbers greater and equal to 2^24 (3 bytes)
> > unusable.  The current codebase uses a variable which is incremented
> > atomically for each new agent number giving too large agent numbers
> > over time.  The IDA set of functions are used instead of the simple
> > counter approach. This allows re-use of agent numbers. A sysctl
> > variable is also introduced, to control the max agent number.
>
> Why don't you simply limit this number per-driver? By default, any
> variable is allowed and mlx4_ib will set something else.
>
> What is the advantage of having sysctl?

Anyway, it doesn't pass smoke test.

[  126.428407] RPC: Unregistered rdma transport module.
[  126.428513] RPC: Unregistered rdma backchannel transport module.
[  194.664081] IPv6: ADDRCONF(NETDEV_UP): ib0: link is not ready
[  209.068702] BUG: unable to handle kernel NULL pointer dereference at 0000000000000070
[  209.068858] PGD 80000000341cf067 P4D 80000000341cf067 PUD 34188067 PMD 0
[  209.068941] Oops: 0002 [#1] SMP PTI
[  209.069006] Modules linked in: netconsole nfsv3 nfs fscache
mlx4_ib(-) mlx4_en mlx4_core devlink ib_ipoib rdma_ucm ib_ucm ib_uverbs
ib_umad rdma_cm ib_cm iw_cm ib_core dm_mirror dm_region_hash dm_log
dm_mod nfsd pcspkr i2c_piix4 auth_rpcgss nfs_acl lockd grace sunrpc
ip_tables ata_generic cirrus drm_kms_helper pata_acpi syscopyarea
sysfillrect sysimgblt fb_sys_fops ttm drm e1000 virtio_console i2c_core
serio_raw ata_piix floppy [last unloaded: mlxfw]
[  209.069312] CPU: 4 PID: 11048 Comm: modprobe Not tainted 4.17.0-rc7-2018-05-29_11-04-56_Hans_Westgaard_Ry__hans_westga #1
[  209.069413] Hardware name: Red Hat KVM, BIOS Bochs 01/01/2011
[  209.069486] RIP: 0010:_raw_spin_lock_irqsave+0x1e/0x40
[  209.069536] RSP: 0018:ffffc90000b4fd70 EFLAGS: 00010046
[  209.069591] RAX: 0000000000000000 RBX: 0000000000000246 RCX: ffffea0004d7ed00
[  209.069653] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000000070
[  209.069717] RBP: 0000000000000000 R08: ffff88013446fc00 R09: 000000018010000f
[  209.069778] R10: 0000000000000001 R11: ffff88013446fc00 R12: 0000000000000070
[  209.069849] R13: 0000000000000202 R14: 0000000000000000 R15: 0000000000000000
[  209.069915] FS:  00007fc34caf7740(0000) GS:ffff88013fd00000(0000) knlGS:0000000000000000
[  209.069987] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  209.070043] CR2: 0000000000000070 CR3: 000000008853e000 CR4: 00000000000006e0
[  209.070128] Call Trace:
[  209.070189]  ib_unregister_mad_agent+0x2d/0x540 [ib_core]
[  209.070260]  ? __slab_free+0x9a/0x2d0
[  209.070332]  ib_agent_port_close+0xad/0xf0 [ib_core]
[  209.070396]  ib_mad_remove_device+0x59/0xb0 [ib_core]
[  209.070466]  ib_unregister_device+0xd4/0x180 [ib_core]
[  209.070537]  mlx4_ib_remove+0x67/0x1f0 [mlx4_ib]
[  209.070594]  mlx4_remove_device+0x93/0xa0 [mlx4_core]
[  209.070648]  mlx4_unregister_interface+0x37/0x90 [mlx4_core]
[  209.070705]  mlx4_ib_cleanup+0xc/0x4db [mlx4_ib]
[  209.072113]  __x64_sys_delete_module+0x15b/0x260
[  209.073567]  ? exit_to_usermode_loop+0x7f/0x95
[  209.074945]  do_syscall_64+0x48/0x100
[  209.076448]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[  209.077799] RIP: 0033:0x7fc34bfe36b7
[  209.079122] RSP: 002b:00007ffc8ffa98b8 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
[  209.080500] RAX: ffffffffffffffda RBX: 00000000013455c0 RCX: 00007fc34bfe36b7
[  209.081875] RDX: 0000000000000000 RSI: 0000000000000800 RDI: 0000000001345628
[  209.083265] RBP: 0000000000000000 R08: 00007fc34c2a8060 R09: 00007fc34c053a40
[  209.084655] R10: 00007ffc8ffa9640 R11: 0000000000000206 R12: 0000000000000000
[  209.086028] R13: 0000000000000001 R14: 0000000001345628 R15: 0000000000000000
[  209.087416] Code: 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00
00 53 9c 58 0f 1f 44 00 00 48 89 c3 fa 66 0f 1f 44 00 00 31 c0 ba 01 00
00 00 <f0> 0f b1 17 85 c0 75 05 48 89 d8 5b c3 89 c6 e8 1e c9 81 ff eb
[  209.090262] RIP: _raw_spin_lock_irqsave+0x1e/0x40 RSP: ffffc90000b4fd70
[  209.091720] CR2: 0000000000000070
[  209.093137] ---[ end trace 7b8a6faa27868861 ]---
[  209.094546] Kernel panic - not syncing: Fatal exception
[  209.096910] Kernel Offset: disabled
[  209.098291] ---[ end Kernel panic - not syncing: Fatal exception ]---

Thanks

>
> Thanks



[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] IB/mad: Use ID allocator routines to allocate agent number
  2018-05-29  7:38 [PATCH] IB/mad: Use ID allocator routines to allocate agent number Hans Westgaard Ry
  2018-05-29  8:54 ` Leon Romanovsky
@ 2018-05-29 15:49 ` Jason Gunthorpe
  2018-05-29 16:16   ` Håkon Bugge
  2018-06-07 10:52 ` [PATCH v2 0/2] IB:mad " Hans Westgaard Ry
  2018-06-07 11:14 ` [PATCH v3 0/2] IB:mad " Hans Westgaard Ry
  3 siblings, 1 reply; 24+ messages in thread
From: Jason Gunthorpe @ 2018-05-29 15:49 UTC (permalink / raw)
  To: Hans Westgaard Ry
  Cc: Doug Ledford, Hakon Bugge, Jack Morgenstein, Daniel Jurgens,
	Parav Pandit, Pravin Shedge, linux-rdma, linux-kernel

On Tue, May 29, 2018 at 09:38:08AM +0200, Hans Westgaard Ry wrote:
> The agent TID is a 64 bit value split in two dwords.  The least
> significant dword is the TID running counter. The most significant
> dword is the agent number. In the CX-3 shared port model, the mlx4
> driver uses the most significant byte of the agent number to store the
> slave number, making agent numbers greater and equal to 2^24 (3 bytes)
> unusable.

There is no reason for this to be an ida, just do something like

 mad_agent_priv->agent.hi_tid = atomic_inc_return(&ib_mad_client_id) & mad_agent_priv->ib_dev->tid_mask;

And have the driver set tid_mask to 3 bytes of 0xFF

And no sysctl.

Jason

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] IB/mad: Use ID allocator routines to allocate agent number
  2018-05-29 15:49 ` Jason Gunthorpe
@ 2018-05-29 16:16   ` Håkon Bugge
  2018-05-29 16:40     ` Jason Gunthorpe
  0 siblings, 1 reply; 24+ messages in thread
From: Håkon Bugge @ 2018-05-29 16:16 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Hans Westgaard Ry, Doug Ledford, Jack Morgenstein,
	Daniel Jurgens, Parav Pandit, Pravin Shedge, OFED mailing list,
	linux-kernel



> On 29 May 2018, at 17:49, Jason Gunthorpe <jgg@ziepe.ca> wrote:
> 
> On Tue, May 29, 2018 at 09:38:08AM +0200, Hans Westgaard Ry wrote:
>> The agent TID is a 64 bit value split in two dwords.  The least
>> significant dword is the TID running counter. The most significant
>> dword is the agent number. In the CX-3 shared port model, the mlx4
>> driver uses the most significant byte of the agent number to store the
>> slave number, making agent numbers greater and equal to 2^24 (3 bytes)
>> unusable.
> 
> There is no reason for this to be an ida, just do something like
> 
> mad_agent_priv->agent.hi_tid = atomic_inc_return(&ib_mad_client_id) & mad_agent_priv->ib_dev->tid_mask;
> 
> And have the driver set tid_mask to 3 bytes of 0xFF

The issue is that some of the mad agents are long-lived, so you will wrap and use the same TID twice.


Thxs, Håkon


> 
> And no sysctl.
> 
> Jason
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] IB/mad: Use ID allocator routines to allocate agent number
  2018-05-29 16:16   ` Håkon Bugge
@ 2018-05-29 16:40     ` Jason Gunthorpe
  2018-05-30  7:32       ` Håkon Bugge
  2018-05-30  8:02       ` jackm
  0 siblings, 2 replies; 24+ messages in thread
From: Jason Gunthorpe @ 2018-05-29 16:40 UTC (permalink / raw)
  To: Håkon Bugge
  Cc: Hans Westgaard Ry, Doug Ledford, Jack Morgenstein,
	Daniel Jurgens, Parav Pandit, Pravin Shedge, OFED mailing list,
	linux-kernel

On Tue, May 29, 2018 at 06:16:14PM +0200, Håkon Bugge wrote:
>
> > On 29 May 2018, at 17:49, Jason Gunthorpe <jgg@ziepe.ca> wrote:
> > 
> > On Tue, May 29, 2018 at 09:38:08AM +0200, Hans Westgaard Ry wrote:
> >> The agent TID is a 64 bit value split in two dwords.  The least
> >> significant dword is the TID running counter. The most significant
> >> dword is the agent number. In the CX-3 shared port model, the mlx4
> >> driver uses the most significant byte of the agent number to store the
> >> slave number, making agent numbers greater and equal to 2^24 (3 bytes)
> >> unusable.
> > 
> > There is no reason for this to be an ida, just do something like
> > 
> > mad_agent_priv->agent.hi_tid = atomic_inc_return(&ib_mad_client_id) & mad_agent_priv->ib_dev->tid_mask;
> > 
> > And have the driver set tid_mask to 3 bytes of 0xFF
> 
> The issue is that some of the mad agents are long-lived, so you will
> wrap and use the same TID twice.

We already have that problem, and using ida is problematic because we
need to maximize the time between TID re-use, which ida isn't doing.

Preventing re-use seems like a seperate issue from limiting the range
to be compatible with mlx4.

Jason

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] IB/mad: Use ID allocator routines to allocate agent number
  2018-05-29 16:40     ` Jason Gunthorpe
@ 2018-05-30  7:32       ` Håkon Bugge
  2018-05-30 15:15         ` Jason Gunthorpe
  2018-05-30  8:02       ` jackm
  1 sibling, 1 reply; 24+ messages in thread
From: Håkon Bugge @ 2018-05-30  7:32 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Hans Westgaard Ry, Doug Ledford, Jack Morgenstein,
	Daniel Jurgens, Parav Pandit, Pravin Shedge, OFED mailing list,
	linux-kernel



> On 29 May 2018, at 18:40, Jason Gunthorpe <jgg@ziepe.ca> wrote:
> 
> On Tue, May 29, 2018 at 06:16:14PM +0200, Håkon Bugge wrote:
>> 
>>> On 29 May 2018, at 17:49, Jason Gunthorpe <jgg@ziepe.ca> wrote:
>>> 
>>> On Tue, May 29, 2018 at 09:38:08AM +0200, Hans Westgaard Ry wrote:
>>>> The agent TID is a 64 bit value split in two dwords.  The least
>>>> significant dword is the TID running counter. The most significant
>>>> dword is the agent number. In the CX-3 shared port model, the mlx4
>>>> driver uses the most significant byte of the agent number to store the
>>>> slave number, making agent numbers greater and equal to 2^24 (3 bytes)
>>>> unusable.
>>> 
>>> There is no reason for this to be an ida, just do something like
>>> 
>>> mad_agent_priv->agent.hi_tid = atomic_inc_return(&ib_mad_client_id) & mad_agent_priv->ib_dev->tid_mask;
>>> 
>>> And have the driver set tid_mask to 3 bytes of 0xFF
>> 
>> The issue is that some of the mad agents are long-lived, so you will
>> wrap and use the same TID twice.
> 
> We already have that problem, and using ida is problematic because we
> need to maximize the time between TID re-use, which ida isn't doing.

Initially, I thought that too, but consulted the spec which states:

C13-18.1.1: When initiating a new operation, MADHeader:TransactionID
shall be set to such a value that within that MAD the combination of TID,
SGID, and MgmtClass is different from that of any other currently executing
operation. []

"currently executing operation" that is.

But if timeouts etc. leads to MAD floating around in the fabric, its of course more robust to maximize the time between reuse.

Then idr_alloc_cyclic() can be used.

> Preventing re-use seems like a seperate issue from limiting the range
> to be compatible with mlx4.

Yes. But a v2 of Hans' patch using idr_alloc_cyclic() solves both issues.


Thxs, Håkon

> 
> Jason
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] IB/mad: Use ID allocator routines to allocate agent number
  2018-05-29 16:40     ` Jason Gunthorpe
  2018-05-30  7:32       ` Håkon Bugge
@ 2018-05-30  8:02       ` jackm
  2018-05-30 12:22         ` Hans Westgaard Ry
  1 sibling, 1 reply; 24+ messages in thread
From: jackm @ 2018-05-30  8:02 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Håkon Bugge, Hans Westgaard Ry, Doug Ledford,
	Daniel Jurgens, Parav Pandit, Pravin Shedge, OFED mailing list,
	linux-kernel, Leon Romanovsky

On Tue, 29 May 2018 10:40:32 -0600
Jason Gunthorpe <jgg@ziepe.ca> wrote:

> On Tue, May 29, 2018 at 06:16:14PM +0200, Håkon Bugge wrote:
> >  
> > > On 29 May 2018, at 17:49, Jason Gunthorpe <jgg@ziepe.ca> wrote:
> > > 
> > > On Tue, May 29, 2018 at 09:38:08AM +0200, Hans Westgaard Ry
> > > wrote:  
> > >> The agent TID is a 64 bit value split in two dwords.  The least
> > >> significant dword is the TID running counter. The most
> > >> significant dword is the agent number. In the CX-3 shared port
> > >> model, the mlx4 driver uses the most significant byte of the
> > >> agent number to store the slave number, making agent numbers
> > >> greater and equal to 2^24 (3 bytes) unusable.  
> > > 
> > > There is no reason for this to be an ida, just do something like
> > > 
> > > mad_agent_priv->agent.hi_tid =
> > > atomic_inc_return(&ib_mad_client_id) &
> > > mad_agent_priv->ib_dev->tid_mask;
> > > 
> > > And have the driver set tid_mask to 3 bytes of 0xFF  
> > 
> > The issue is that some of the mad agents are long-lived, so you will
> > wrap and use the same TID twice.  
> 
> We already have that problem, and using ida is problematic because we
> need to maximize the time between TID re-use, which ida isn't doing.
> 
> Preventing re-use seems like a seperate issue from limiting the range
> to be compatible with mlx4.
> 

Preventing immediate re-use can be accomplished by judicious use of the
start argument (second argument) in the call to ida_simple_get (to
introduce hysteresis into the id allocations).

For example, can do something like:

static atomic_t ib_mad_client_id_min = ATOMIC_INIT(1);
...
        ib_mad_client_id = ida_simple_get(&ib_mad_client_ids,
                                          atomic_read(&ib_mad_client_id_min),
                                          ib_mad_sysctl_client_id_max,
                                          GFP_KERNEL);
....
        if (!(ib_mad_client_id % 1000) ||
            ib_mad_sysctl_client_id_max - ib_mad_client_id <= 1000)
		atomic_set(&ib_mad_client_id_min, 1);
	else
                atomic_set(&ib_mad_client_id_min, ib_mad_client_id + 1);

The above avoids immediate re-use of ids, and only searches for past
freed ids if the last allocated-id is zero mod 1000.

This is just suggestion -- will probably need some variation of the
above to handle what happens over time (i.e., to not depend on the
modulo operation to reset the search start to 1), to properly handle
how we deal with the start value when we are close to the allowed
client_id_max, and also to implement some sort of locking.

-Jack

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] IB/mad: Use ID allocator routines to allocate agent number
  2018-05-29  9:54   ` Leon Romanovsky
@ 2018-05-30  8:18     ` jackm
  0 siblings, 0 replies; 24+ messages in thread
From: jackm @ 2018-05-30  8:18 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Hans Westgaard Ry, Doug Ledford, Jason Gunthorpe, Hakon Bugge,
	Daniel Jurgens, Parav Pandit, Pravin Shedge, linux-rdma,
	linux-kernel

On Tue, 29 May 2018 12:54:45 +0300
Leon Romanovsky <leon@kernel.org> wrote:

> On Tue, May 29, 2018 at 11:54:59AM +0300, Leon Romanovsky wrote:
> > On Tue, May 29, 2018 at 09:38:08AM +0200, Hans Westgaard Ry wrote:  
> > > The agent TID is a 64 bit value split in two dwords.  The least
> > > significant dword is the TID running counter. The most significant
> > > dword is the agent number. In the CX-3 shared port model, the mlx4
> > > driver uses the most significant byte of the agent number to
> > > store the slave number, making agent numbers greater and equal to
> > > 2^24 (3 bytes) unusable.  The current codebase uses a variable
> > > which is incremented atomically for each new agent number giving
> > > too large agent numbers over time.  The IDA set of functions are
> > > used instead of the simple counter approach. This allows re-use
> > > of agent numbers. A sysctl variable is also introduced, to
> > > control the max agent number.  
> >
> > Why don't you simply limit this number per-driver? By default, any
> > variable is allowed and mlx4_ib will set something else.

It is much simpler to do things here -- the current allocation scheme
does have a wrap-around problem, so there is a good reason for using a
global allocator residing in the ib core.

> >
> > What is the advantage of having sysctl?  
> 
> Anyway, it doesn't pass smoke test.

REASON:  The start argument in ida_simple_get should be 1, not 0:
+       ib_mad_client_id = ida_simple_get(&ib_mad_client_ids,
+                                         1,
+                                         ib_mad_sysctl_client_id_max,
+                                         GFP_KERNEL);


The agent ID 0 is interpreted as -no agent-, is used by snoop agents,
and is never allocated.

The first allocated agent id is 1:
static atomic_t ib_mad_client_id_min = ATOMIC_INIT(0);
...
mad_agent_priv->agent.hi_tid =atomic_inc_return(&ib_mad_client_id);

If this is fixed, we do not get the panic.

However, please note Jason's feedback and my suggestion -- this
ida-based interface needs to avoid immediate re-use of freed ids.

 -Jack
> 
> [  126.428407] RPC: Unregistered rdma transport module.
> [  126.428513] RPC: Unregistered rdma backchannel transport module.
> [  194.664081] IPv6: ADDRCONF(NETDEV_UP): ib0: link is not ready
> [  209.068702] BUG: unable to handle kernel NULL pointer dereference
> at 0000000000000070 [  209.068858] PGD 80000000341cf067 P4D
> 80000000341cf067 PUD 34188067 PMD 0 [  209.068941] Oops: 0002 [#1]
> SMP PTI [  209.069006] Modules linked in: netconsole nfsv3 nfs fscache
> mlx4_ib(-) mlx4_en mlx4_core devlink ib_ipoib rdma_ucm ib_ucm
> ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_core dm_mirror
> dm_region_hash dm_log dm_mod nfsd pcspkr i2c_piix4 auth_rpcgss
> nfs_acl lockd grace sunrpc ip_tables ata_generic cirrus
> drm_kms_helper pata_acpi syscopyarea sysfillrect sysimgblt
> fb_sys_fops ttm drm e1000 virtio_console i2c_core serio_raw ata_piix
> floppy [last unloaded: mlxfw] [  209.069312] CPU: 4 PID: 11048 Comm:
> modprobe Not tainted
> 4.17.0-rc7-2018-05-29_11-04-56_Hans_Westgaard_Ry__hans_westga #1
> [  209.069413] Hardware name: Red Hat KVM, BIOS Bochs 01/01/2011
> [  209.069486] RIP: 0010:_raw_spin_lock_irqsave+0x1e/0x40
> [  209.069536] RSP: 0018:ffffc90000b4fd70 EFLAGS: 00010046
> [  209.069591] RAX: 0000000000000000 RBX: 0000000000000246 RCX:
> ffffea0004d7ed00 [  209.069653] RDX: 0000000000000001 RSI:
> 0000000000000000 RDI: 0000000000000070 [  209.069717] RBP:
> 0000000000000000 R08: ffff88013446fc00 R09: 000000018010000f
> [  209.069778] R10: 0000000000000001 R11: ffff88013446fc00 R12:
> 0000000000000070 [  209.069849] R13: 0000000000000202 R14:
> 0000000000000000 R15: 0000000000000000 [  209.069915] FS:
> 00007fc34caf7740(0000) GS:ffff88013fd00000(0000)
> knlGS:0000000000000000 [  209.069987] CS:  0010 DS: 0000 ES: 0000
> CR0: 0000000080050033 [  209.070043] CR2: 0000000000000070 CR3:
> 000000008853e000 CR4: 00000000000006e0 [  209.070128] Call Trace:
> [  209.070189]  ib_unregister_mad_agent+0x2d/0x540 [ib_core]
> [  209.070260]  ? __slab_free+0x9a/0x2d0 [  209.070332]
> ib_agent_port_close+0xad/0xf0 [ib_core] [  209.070396]
> ib_mad_remove_device+0x59/0xb0 [ib_core] [  209.070466]
> ib_unregister_device+0xd4/0x180 [ib_core] [  209.070537]
> mlx4_ib_remove+0x67/0x1f0 [mlx4_ib] [  209.070594]
> mlx4_remove_device+0x93/0xa0 [mlx4_core] [  209.070648]
> mlx4_unregister_interface+0x37/0x90 [mlx4_core] [  209.070705]
> mlx4_ib_cleanup+0xc/0x4db [mlx4_ib] [  209.072113]
> __x64_sys_delete_module+0x15b/0x260 [  209.073567]  ?
> exit_to_usermode_loop+0x7f/0x95 [  209.074945]
> do_syscall_64+0x48/0x100 [  209.076448]
> entry_SYSCALL_64_after_hwframe+0x44/0xa9 [  209.077799] RIP:
> 0033:0x7fc34bfe36b7 [  209.079122] RSP: 002b:00007ffc8ffa98b8 EFLAGS:
> 00000206 ORIG_RAX: 00000000000000b0 [  209.080500] RAX:
> ffffffffffffffda RBX: 00000000013455c0 RCX: 00007fc34bfe36b7
> [  209.081875] RDX: 0000000000000000 RSI: 0000000000000800 RDI:
> 0000000001345628 [  209.083265] RBP: 0000000000000000 R08:
> 00007fc34c2a8060 R09: 00007fc34c053a40 [  209.084655] R10:
> 00007ffc8ffa9640 R11: 0000000000000206 R12: 0000000000000000
> [  209.086028] R13: 0000000000000001 R14: 0000000001345628 R15:
> 0000000000000000 [  209.087416] Code: 66 66 66 66 2e 0f 1f 84 00 00
> 00 00 00 0f 1f 44 00 00 53 9c 58 0f 1f 44 00 00 48 89 c3 fa 66 0f 1f
> 44 00 00 31 c0 ba 01 00 00 00 <f0> 0f b1 17 85 c0 75 05 48 89 d8 5b
> c3 89 c6 e8 1e c9 81 ff eb [  209.090262] RIP:
> _raw_spin_lock_irqsave+0x1e/0x40 RSP: ffffc90000b4fd70 [  209.091720]
> CR2: 0000000000000070 [  209.093137] ---[ end trace 7b8a6faa27868861
> ]--- [  209.094546] Kernel panic - not syncing: Fatal exception
> [  209.096910] Kernel Offset: disabled [  209.098291] ---[ end Kernel
> panic - not syncing: Fatal exception ]---
> 
> Thanks
> 
> >
> > Thanks  
> 
> 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] IB/mad: Use ID allocator routines to allocate agent number
  2018-05-30  8:02       ` jackm
@ 2018-05-30 12:22         ` Hans Westgaard Ry
  2018-05-30 15:10           ` Jason Gunthorpe
  0 siblings, 1 reply; 24+ messages in thread
From: Hans Westgaard Ry @ 2018-05-30 12:22 UTC (permalink / raw)
  To: jackm, Jason Gunthorpe
  Cc: Håkon Bugge, Doug Ledford, Daniel Jurgens, Parav Pandit,
	Pravin Shedge, OFED mailing list, linux-kernel, Leon Romanovsky



On 05/30/2018 10:02 AM, jackm wrote:
> On Tue, 29 May 2018 10:40:32 -0600
> Jason Gunthorpe <jgg@ziepe.ca> wrote:
>
>> On Tue, May 29, 2018 at 06:16:14PM +0200, Håkon Bugge wrote:
>>>   
>>>> On 29 May 2018, at 17:49, Jason Gunthorpe <jgg@ziepe.ca> wrote:
>>>>
>>>> On Tue, May 29, 2018 at 09:38:08AM +0200, Hans Westgaard Ry
>>>> wrote:
>>>>> The agent TID is a 64 bit value split in two dwords.  The least
>>>>> significant dword is the TID running counter. The most
>>>>> significant dword is the agent number. In the CX-3 shared port
>>>>> model, the mlx4 driver uses the most significant byte of the
>>>>> agent number to store the slave number, making agent numbers
>>>>> greater and equal to 2^24 (3 bytes) unusable.
>>>> There is no reason for this to be an ida, just do something like
>>>>
>>>> mad_agent_priv->agent.hi_tid =
>>>> atomic_inc_return(&ib_mad_client_id) &
>>>> mad_agent_priv->ib_dev->tid_mask;
>>>>
>>>> And have the driver set tid_mask to 3 bytes of 0xFF
>>> The issue is that some of the mad agents are long-lived, so you will
>>> wrap and use the same TID twice.
>> We already have that problem, and using ida is problematic because we
>> need to maximize the time between TID re-use, which ida isn't doing.
>>
>> Preventing re-use seems like a seperate issue from limiting the range
>> to be compatible with mlx4.
>>
> Preventing immediate re-use can be accomplished by judicious use of the
> start argument (second argument) in the call to ida_simple_get (to
> introduce hysteresis into the id allocations).
>
> For example, can do something like:
>
> static atomic_t ib_mad_client_id_min = ATOMIC_INIT(1);
> ...
>          ib_mad_client_id = ida_simple_get(&ib_mad_client_ids,
>                                            atomic_read(&ib_mad_client_id_min),
>                                            ib_mad_sysctl_client_id_max,
>                                            GFP_KERNEL);
> ....
>          if (!(ib_mad_client_id % 1000) ||
>              ib_mad_sysctl_client_id_max - ib_mad_client_id <= 1000)
> 		atomic_set(&ib_mad_client_id_min, 1);
> 	else
>                  atomic_set(&ib_mad_client_id_min, ib_mad_client_id + 1);
>
> The above avoids immediate re-use of ids, and only searches for past
> freed ids if the last allocated-id is zero mod 1000.
>
> This is just suggestion -- will probably need some variation of the
> above to handle what happens over time (i.e., to not depend on the
> modulo operation to reset the search start to 1), to properly handle
> how we deal with the start value when we are close to the allowed
> client_id_max, and also to implement some sort of locking.
>
> -Jack
>
We came up with this code snippet which we think handles both preventing 
immediate re-use and too big/wrapping...

         max = mad_agent_priv->ib_dev->tid_max;
         start = atomic_inc_return(&start);
retry:
         tid = ida_simple_get(&ib_mad_client_ids, start, max, GFP_KERNEL);
         if (unlikely(tid == -ENOSPC))    {
                 spin_lock_irq_save();
                 tid = ida_simple_get(&ib_mad_client_ids, start, max, 
GFP_ATOMIC);
                 if (unlikely(tid == -ENOSPC))    {
                         atomic_set(&start, 1);
                         spin_unlock_irq_restore();
                         goto retry;
                 }
                 spin_unlock_irq_restore();
         }


Hans

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] IB/mad: Use ID allocator routines to allocate agent number
  2018-05-30 12:22         ` Hans Westgaard Ry
@ 2018-05-30 15:10           ` Jason Gunthorpe
  2018-05-30 20:07             ` Håkon Bugge
  0 siblings, 1 reply; 24+ messages in thread
From: Jason Gunthorpe @ 2018-05-30 15:10 UTC (permalink / raw)
  To: Hans Westgaard Ry
  Cc: jackm, Håkon Bugge, Doug Ledford, Daniel Jurgens,
	Parav Pandit, Pravin Shedge, OFED mailing list, linux-kernel,
	Leon Romanovsky

On Wed, May 30, 2018 at 02:22:56PM +0200, Hans Westgaard Ry wrote:

> We came up with this code snippet which we think handles both preventing
> immediate re-use and too big/wrapping...

Isn't this basically the same as idr_alloc_cyclic ?

Jason

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] IB/mad: Use ID allocator routines to allocate agent number
  2018-05-30  7:32       ` Håkon Bugge
@ 2018-05-30 15:15         ` Jason Gunthorpe
  0 siblings, 0 replies; 24+ messages in thread
From: Jason Gunthorpe @ 2018-05-30 15:15 UTC (permalink / raw)
  To: Håkon Bugge
  Cc: Hans Westgaard Ry, Doug Ledford, Jack Morgenstein,
	Daniel Jurgens, Parav Pandit, Pravin Shedge, OFED mailing list,
	linux-kernel

On Wed, May 30, 2018 at 09:32:02AM +0200, Håkon Bugge wrote:
> 
> 
> > On 29 May 2018, at 18:40, Jason Gunthorpe <jgg@ziepe.ca> wrote:
> > 
> > On Tue, May 29, 2018 at 06:16:14PM +0200, Håkon Bugge wrote:
> >> 
> >>> On 29 May 2018, at 17:49, Jason Gunthorpe <jgg@ziepe.ca> wrote:
> >>> 
> >>> On Tue, May 29, 2018 at 09:38:08AM +0200, Hans Westgaard Ry wrote:
> >>>> The agent TID is a 64 bit value split in two dwords.  The least
> >>>> significant dword is the TID running counter. The most significant
> >>>> dword is the agent number. In the CX-3 shared port model, the mlx4
> >>>> driver uses the most significant byte of the agent number to store the
> >>>> slave number, making agent numbers greater and equal to 2^24 (3 bytes)
> >>>> unusable.
> >>> 
> >>> There is no reason for this to be an ida, just do something like
> >>> 
> >>> mad_agent_priv->agent.hi_tid = atomic_inc_return(&ib_mad_client_id) & mad_agent_priv->ib_dev->tid_mask;
> >>> 
> >>> And have the driver set tid_mask to 3 bytes of 0xFF
> >> 
> >> The issue is that some of the mad agents are long-lived, so you will
> >> wrap and use the same TID twice.
> > 
> > We already have that problem, and using ida is problematic because we
> > need to maximize the time between TID re-use, which ida isn't doing.
> 
> Initially, I thought that too, but consulted the spec which states:
> 
> C13-18.1.1: When initiating a new operation, MADHeader:TransactionID
> shall be set to such a value that within that MAD the combination of TID,
> SGID, and MgmtClass is different from that of any other currently executing
> operation. []
> 
> "currently executing operation" that is.

'currently executing operation' encompasses a wide range of behaviors
and it is not just 'completing the MAD at the local node'

You have to exceed all the timeouts before a single node can be
certain that the condition is satisfied.

This is why we just simply maximize the time between allocations, and
encourage the user providing the rest of the tid to do the same.

> > Preventing re-use seems like a seperate issue from limiting the range
> > to be compatible with mlx4.
> 
> Yes. But a v2 of Hans' patch using idr_alloc_cyclic() solves both issues.

That might work, but still have to get rid of the sysctl

Jason

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] IB/mad: Use ID allocator routines to allocate agent number
  2018-05-30 15:10           ` Jason Gunthorpe
@ 2018-05-30 20:07             ` Håkon Bugge
  2018-05-30 22:09               ` Jason Gunthorpe
  0 siblings, 1 reply; 24+ messages in thread
From: Håkon Bugge @ 2018-05-30 20:07 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Hans Westgaard Ry, jackm, Doug Ledford, Daniel Jurgens,
	Parav Pandit, Pravin Shedge, OFED mailing list, linux-kernel,
	Leon Romanovsky



> On 30 May 2018, at 17:10, Jason Gunthorpe <jgg@ziepe.ca> wrote:
> 
> On Wed, May 30, 2018 at 02:22:56PM +0200, Hans Westgaard Ry wrote:
> 
>> We came up with this code snippet which we think handles both preventing
>> immediate re-use and too big/wrapping...
> 
> Isn't this basically the same as idr_alloc_cyclic ?

I draw my statement back. The idr_alloc_cyclic() is the family of idr's that associates a pointer with the bit. Hence, each bit is a bit + 64b.

That's why we ended up with Hans' pseudo code.


Thxs, Håkon


> 
> Jason
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] IB/mad: Use ID allocator routines to allocate agent number
  2018-05-30 20:07             ` Håkon Bugge
@ 2018-05-30 22:09               ` Jason Gunthorpe
  2018-05-31 19:54                 ` Håkon Bugge
  0 siblings, 1 reply; 24+ messages in thread
From: Jason Gunthorpe @ 2018-05-30 22:09 UTC (permalink / raw)
  To: Håkon Bugge
  Cc: Hans Westgaard Ry, jackm, Doug Ledford, Daniel Jurgens,
	Parav Pandit, Pravin Shedge, OFED mailing list, linux-kernel,
	Leon Romanovsky

On Wed, May 30, 2018 at 10:07:16PM +0200, Håkon Bugge wrote:
> 
> 
> > On 30 May 2018, at 17:10, Jason Gunthorpe <jgg@ziepe.ca> wrote:
> > 
> > On Wed, May 30, 2018 at 02:22:56PM +0200, Hans Westgaard Ry wrote:
> > 
> >> We came up with this code snippet which we think handles both preventing
> >> immediate re-use and too big/wrapping...
> > 
> > Isn't this basically the same as idr_alloc_cyclic ?
> 
> I draw my statement back. The idr_alloc_cyclic() is the family of idr's that associates a pointer with the bit. Hence, each bit is a bit + 64b.
> 
> That's why we ended up with Hans' pseudo code.

Okay, fair enough.

Is it worth adding a ida_get_new_cyclic for this?

Jason

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] IB/mad: Use ID allocator routines to allocate agent number
  2018-05-30 22:09               ` Jason Gunthorpe
@ 2018-05-31 19:54                 ` Håkon Bugge
  0 siblings, 0 replies; 24+ messages in thread
From: Håkon Bugge @ 2018-05-31 19:54 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Hans Westgaard Ry, jackm, Doug Ledford, Daniel Jurgens,
	Parav Pandit, Pravin Shedge, OFED mailing list, linux-kernel,
	Leon Romanovsky



> On 31 May 2018, at 00:09, Jason Gunthorpe <jgg@ziepe.ca> wrote:
> 
> On Wed, May 30, 2018 at 10:07:16PM +0200, Håkon Bugge wrote:
>> 
>> 
>>> On 30 May 2018, at 17:10, Jason Gunthorpe <jgg@ziepe.ca> wrote:
>>> 
>>> On Wed, May 30, 2018 at 02:22:56PM +0200, Hans Westgaard Ry wrote:
>>> 
>>>> We came up with this code snippet which we think handles both preventing
>>>> immediate re-use and too big/wrapping...
>>> 
>>> Isn't this basically the same as idr_alloc_cyclic ?
>> 
>> I draw my statement back. The idr_alloc_cyclic() is the family of idr's that associates a pointer with the bit. Hence, each bit is a bit + 64b.
>> 
>> That's why we ended up with Hans' pseudo code.
> 
> Okay, fair enough.
> 
> Is it worth adding a ida_get_new_cyclic for this?

My initial reaction was "no", but then I found the same cyclic behaviour in drivers/net/ipvlan/ipvlan_main.c

Then in my opinion, it makes sense to create a ida_simple_get_cyclic()


Thxs, Håkon

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v2 0/2] IB:mad Use ID allocator routines to allocate agent number
  2018-05-29  7:38 [PATCH] IB/mad: Use ID allocator routines to allocate agent number Hans Westgaard Ry
  2018-05-29  8:54 ` Leon Romanovsky
  2018-05-29 15:49 ` Jason Gunthorpe
@ 2018-06-07 10:52 ` Hans Westgaard Ry
  2018-06-07 10:52   ` [PATCH v2 1/2] idr: Add ida_simple_get_cyclic Hans Westgaard Ry
  2018-06-07 10:52   ` [PATCH v2 2/2] IB/mad: Use ID allocator routines to allocate agent number Hans Westgaard Ry
  2018-06-07 11:14 ` [PATCH v3 0/2] IB:mad " Hans Westgaard Ry
  3 siblings, 2 replies; 24+ messages in thread
From: Hans Westgaard Ry @ 2018-06-07 10:52 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe, Hakon Bugge, Parav Pandit,
	Jack Morgenstein, Pravin Shedge, Matthew Wilcox, Andrew Morton,
	Jeff Layton, Wei Wang, Chris Mi, Eric Biggers, Rasmus Villemoes,
	Mel Gorman, linux-rdma, linux-kernel

v2:

Implement ida_get_simple_cyclic and use it when allocating ib_mad_client_ids.

Hans Westgaard Ry (2):
  idr: Add ida_simple_get_cyclic
  IB/mad: Use ID allocator routines to allocate agent number

 drivers/infiniband/core/mad.c | 24 +++++++++++++---
 include/linux/idr.h           |  8 ++++--
 lib/idr.c                     | 66 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 92 insertions(+), 6 deletions(-)

-- 
2.14.3

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v2 1/2] idr: Add ida_simple_get_cyclic
  2018-06-07 10:52 ` [PATCH v2 0/2] IB:mad " Hans Westgaard Ry
@ 2018-06-07 10:52   ` Hans Westgaard Ry
  2018-06-07 10:52   ` [PATCH v2 2/2] IB/mad: Use ID allocator routines to allocate agent number Hans Westgaard Ry
  1 sibling, 0 replies; 24+ messages in thread
From: Hans Westgaard Ry @ 2018-06-07 10:52 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe, Hakon Bugge, Parav Pandit,
	Jack Morgenstein, Pravin Shedge, Matthew Wilcox, Andrew Morton,
	Jeff Layton, Wei Wang, Chris Mi, Eric Biggers, Rasmus Villemoes,
	Mel Gorman, linux-rdma, linux-kernel

Orabug: 25571450

Signed-off-by: Hans Westgaard Ry <hans.westgaard.ry@oracle.com>
---
 include/linux/idr.h |  8 +++++--
 lib/idr.c           | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/include/linux/idr.h b/include/linux/idr.h
index e856f4e0ab35..f151ce89124a 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -218,10 +218,12 @@ DECLARE_PER_CPU(struct ida_bitmap *, ida_bitmap);
 
 struct ida {
 	struct radix_tree_root	ida_rt;
+	unsigned int		ida_next;
 };
 
-#define IDA_INIT(name)	{						\
-	.ida_rt = RADIX_TREE_INIT(name, IDR_RT_MARKER | GFP_NOWAIT),	\
+#define IDA_INIT(name)	{						     \
+		.ida_rt = RADIX_TREE_INIT(name, IDR_RT_MARKER | GFP_NOWAIT), \
+		.ida_next = 0,						     \
 }
 #define DEFINE_IDA(name)	struct ida name = IDA_INIT(name)
 
@@ -232,6 +234,8 @@ void ida_destroy(struct ida *ida);
 
 int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
 		   gfp_t gfp_mask);
+int ida_simple_get_cyclic(struct ida *ida, unsigned int start, unsigned int end,
+			  gfp_t gfp_mask);
 void ida_simple_remove(struct ida *ida, unsigned int id);
 
 static inline void ida_init(struct ida *ida)
diff --git a/lib/idr.c b/lib/idr.c
index 823b813f08f8..3374c3fa4027 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -599,6 +599,72 @@ int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
 	return ret;
 }
 EXPORT_SYMBOL(ida_simple_get);
+/**
+ * ida_simple_get_cyclic - get a new id.
+ * @ida: the (initialized) ida.
+ * @start: the minimum id (inclusive, < 0x8000000)
+ * @end: the maximum id (exclusive, < 0x8000000 or 0)
+ * @gfp_mask: memory allocation flags
+ *
+ * Allocates an id in the range start <= id < end, or returns -ENOSPC.
+ * On memory allocation failure, returns -ENOMEM.
+ * The search for an unused id will start at the last id allocated and will
+ * wrap around to @start if no free ids are found before reaching @end.
+ *
+ * Compared to ida_get_new_above() this function does its own locking, and
+ * should be used unless there are special requirements.
+ *
+ * Use ida_simple_remove() to get rid of an id.
+ */
+int ida_simple_get_cyclic(struct ida *ida, unsigned int start, unsigned int end,
+		   gfp_t gfp_mask)
+{
+	int ret, id, next;
+	unsigned int max;
+	unsigned long flags;
+
+	WARN_ON((int)start < 0);
+	WARN_ON((int)end < 0);
+
+	if (end == 0) {
+		max = 0x80000000;
+	} else {
+		WARN_ON(end < start);
+		max = end - 1;
+	}
+
+again:
+	if (!ida_pre_get(ida, gfp_mask))
+		return -ENOMEM;
+
+	spin_lock_irqsave(&simple_ida_lock, flags);
+	next = ida->ida_next;
+	if (next < start || next >= end)
+		next = start;
+
+	ret = ida_get_new_above(ida, next, &id);
+	if (likely(!ret)) {
+		if (unlikely(id >= max)) {
+			ida_remove(ida, id);
+			if (next == start) {
+				ret = -ENOSPC;
+			} else {
+				ida->ida_next = start;
+				spin_unlock_irqrestore(&simple_ida_lock, flags);
+				goto again;
+			}
+		} else {
+			ida->ida_next = id + 1;
+			ret = id;
+		}
+	}
+	spin_unlock_irqrestore(&simple_ida_lock, flags);
+
+	if (unlikely(ret == -EAGAIN))
+		goto again;
+	return ret;
+}
+EXPORT_SYMBOL(ida_simple_get_cyclic);
 
 /**
  * ida_simple_remove - remove an allocated id.
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH v2 2/2] IB/mad: Use ID allocator routines to allocate agent number
  2018-06-07 10:52 ` [PATCH v2 0/2] IB:mad " Hans Westgaard Ry
  2018-06-07 10:52   ` [PATCH v2 1/2] idr: Add ida_simple_get_cyclic Hans Westgaard Ry
@ 2018-06-07 10:52   ` Hans Westgaard Ry
  1 sibling, 0 replies; 24+ messages in thread
From: Hans Westgaard Ry @ 2018-06-07 10:52 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe, Hakon Bugge, Parav Pandit,
	Jack Morgenstein, Pravin Shedge, Matthew Wilcox, Andrew Morton,
	Jeff Layton, Wei Wang, Chris Mi, Eric Biggers, Rasmus Villemoes,
	Mel Gorman, linux-rdma, linux-kernel

The agent TID is a 64 bit value split in two dwords.  The least
significant dword is the TID running counter. The most significant
dword is the agent number. In the CX-3 shared port model, the mlx4
driver uses the most significant byte of the agent number to store the
slave number, making agent numbers greater and equal to 2^24 (3 bytes)
unusable.  The current codebase uses a variable which is incremented
atomically for each new agent number giving too large agent numbers
over time.  The IDA set of functions are used instead of the simple
counter approach. This allows re-use of agent numbers.

The signature of the bug is a MAD layer that stops working and the
console is flooded with messages like:
 mlx4_ib: egress mad has non-null tid msb:1 class:4 slave:0

Orabug: 25571450

Signed-off-by: Hans Westgaard Ry <hans.westgaard.ry@oracle.com>
---
 drivers/infiniband/core/mad.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index b28452a55a08..c01a2d63ffa2 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -41,6 +41,7 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/security.h>
+#include <linux/idr.h>
 #include <rdma/ib_cache.h>
 
 #include "mad_priv.h"
@@ -59,8 +60,7 @@ module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
 MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
 
 static struct list_head ib_mad_port_list;
-static atomic_t ib_mad_client_id = ATOMIC_INIT(0);
-
+static DEFINE_IDA(ib_mad_client_ids);
 /* Port list lock */
 static DEFINE_SPINLOCK(ib_mad_port_list_lock);
 
@@ -212,7 +212,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 	int ret2, qpn;
 	unsigned long flags;
 	u8 mgmt_class, vclass;
-
+	u32 ib_mad_client_id;
 	/* Validate parameters */
 	qpn = get_spl_qp_index(qp_type);
 	if (qpn == -1) {
@@ -375,9 +375,19 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 		ret = ERR_PTR(ret2);
 		goto error4;
 	}
+	ib_mad_client_id = ida_simple_get_cyclic(&ib_mad_client_ids,
+						1,
+						BIT(24) - 1,
+						GFP_KERNEL);
+	if (ib_mad_client_id < 0) {
+		pr_err("Couldn't allocate agent tid; errcode: %#x\n",
+			ib_mad_client_id);
+		ret = ERR_PTR(ib_mad_client_id);
+		goto error4;
+	}
+	mad_agent_priv->agent.hi_tid = ib_mad_client_id;
 
 	spin_lock_irqsave(&port_priv->reg_lock, flags);
-	mad_agent_priv->agent.hi_tid = atomic_inc_return(&ib_mad_client_id);
 
 	/*
 	 * Make sure MAD registration (if supplied)
@@ -428,6 +438,8 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 error5:
 	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
 	ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
+	ida_simple_remove(&ib_mad_client_ids, ib_mad_client_id);
+
 error4:
 	kfree(reg_req);
 error3:
@@ -576,6 +588,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
 {
 	struct ib_mad_port_private *port_priv;
 	unsigned long flags;
+	u32 ib_mad_client_id;
 
 	/* Note that we could still be handling received MADs */
 
@@ -587,6 +600,8 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
 	port_priv = mad_agent_priv->qp_info->port_priv;
 	cancel_delayed_work(&mad_agent_priv->timed_work);
 
+	ib_mad_client_id = mad_agent_priv->agent.hi_tid;
+
 	spin_lock_irqsave(&port_priv->reg_lock, flags);
 	remove_mad_reg_req(mad_agent_priv);
 	list_del(&mad_agent_priv->agent_list);
@@ -602,6 +617,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
 
 	kfree(mad_agent_priv->reg_req);
 	kfree(mad_agent_priv);
+	ida_simple_remove(&ib_mad_client_ids, ib_mad_client_id);
 }
 
 static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
@@ -3347,4 +3363,5 @@ int ib_mad_init(void)
 void ib_mad_cleanup(void)
 {
 	ib_unregister_client(&mad_client);
+	ida_destroy(&ib_mad_client_ids);
 }
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH v3 0/2] IB:mad Use ID allocator routines to allocate agent number
  2018-05-29  7:38 [PATCH] IB/mad: Use ID allocator routines to allocate agent number Hans Westgaard Ry
                   ` (2 preceding siblings ...)
  2018-06-07 10:52 ` [PATCH v2 0/2] IB:mad " Hans Westgaard Ry
@ 2018-06-07 11:14 ` Hans Westgaard Ry
  2018-06-07 11:14   ` [PATCH v3 1/2] idr: Add ida_simple_get_cyclic Hans Westgaard Ry
  2018-06-07 11:14   ` [PATCH v3 2/2] IB/mad: Use ID allocator routines to allocate agent number Hans Westgaard Ry
  3 siblings, 2 replies; 24+ messages in thread
From: Hans Westgaard Ry @ 2018-06-07 11:14 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe, Hakon Bugge, Parav Pandit,
	Jack Morgenstein, Pravin Shedge, Matthew Wilcox, Andrew Morton,
	Jeff Layton, Wei Wang, Chris Mi, Eric Biggers, Rasmus Villemoes,
	Mel Gorman, linux-rdma, linux-kernel

v3:
- Removed Oracle internal bug number information
v2:

- Implement ida_get_simple_cyclic and use it when allocating
  ib_mad_client_ids.

Hans Westgaard Ry (2):
  idr: Add ida_simple_get_cyclic
  IB/mad: Use ID allocator routines to allocate agent number

 drivers/infiniband/core/mad.c | 24 +++++++++++++---
 include/linux/idr.h           |  8 ++++--
 lib/idr.c                     | 66 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 92 insertions(+), 6 deletions(-)

-- 
2.14.3

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v3 1/2] idr: Add ida_simple_get_cyclic
  2018-06-07 11:14 ` [PATCH v3 0/2] IB:mad " Hans Westgaard Ry
@ 2018-06-07 11:14   ` Hans Westgaard Ry
  2018-06-07 18:50     ` Jason Gunthorpe
  2018-06-07 11:14   ` [PATCH v3 2/2] IB/mad: Use ID allocator routines to allocate agent number Hans Westgaard Ry
  1 sibling, 1 reply; 24+ messages in thread
From: Hans Westgaard Ry @ 2018-06-07 11:14 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe, Hakon Bugge, Parav Pandit,
	Jack Morgenstein, Pravin Shedge, Matthew Wilcox, Andrew Morton,
	Jeff Layton, Wei Wang, Chris Mi, Eric Biggers, Rasmus Villemoes,
	Mel Gorman, linux-rdma, linux-kernel

Signed-off-by: Hans Westgaard Ry <hans.westgaard.ry@oracle.com>
---
 include/linux/idr.h |  8 +++++--
 lib/idr.c           | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/include/linux/idr.h b/include/linux/idr.h
index e856f4e0ab35..f151ce89124a 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -218,10 +218,12 @@ DECLARE_PER_CPU(struct ida_bitmap *, ida_bitmap);
 
 struct ida {
 	struct radix_tree_root	ida_rt;
+	unsigned int		ida_next;
 };
 
-#define IDA_INIT(name)	{						\
-	.ida_rt = RADIX_TREE_INIT(name, IDR_RT_MARKER | GFP_NOWAIT),	\
+#define IDA_INIT(name)	{						     \
+		.ida_rt = RADIX_TREE_INIT(name, IDR_RT_MARKER | GFP_NOWAIT), \
+		.ida_next = 0,						     \
 }
 #define DEFINE_IDA(name)	struct ida name = IDA_INIT(name)
 
@@ -232,6 +234,8 @@ void ida_destroy(struct ida *ida);
 
 int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
 		   gfp_t gfp_mask);
+int ida_simple_get_cyclic(struct ida *ida, unsigned int start, unsigned int end,
+			  gfp_t gfp_mask);
 void ida_simple_remove(struct ida *ida, unsigned int id);
 
 static inline void ida_init(struct ida *ida)
diff --git a/lib/idr.c b/lib/idr.c
index 823b813f08f8..3374c3fa4027 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -599,6 +599,72 @@ int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
 	return ret;
 }
 EXPORT_SYMBOL(ida_simple_get);
+/**
+ * ida_simple_get_cyclic - get a new id.
+ * @ida: the (initialized) ida.
+ * @start: the minimum id (inclusive, < 0x8000000)
+ * @end: the maximum id (exclusive, < 0x8000000 or 0)
+ * @gfp_mask: memory allocation flags
+ *
+ * Allocates an id in the range start <= id < end, or returns -ENOSPC.
+ * On memory allocation failure, returns -ENOMEM.
+ * The search for an unused id will start at the last id allocated and will
+ * wrap around to @start if no free ids are found before reaching @end.
+ *
+ * Compared to ida_get_new_above() this function does its own locking, and
+ * should be used unless there are special requirements.
+ *
+ * Use ida_simple_remove() to get rid of an id.
+ */
+int ida_simple_get_cyclic(struct ida *ida, unsigned int start, unsigned int end,
+		   gfp_t gfp_mask)
+{
+	int ret, id, next;
+	unsigned int max;
+	unsigned long flags;
+
+	WARN_ON((int)start < 0);
+	WARN_ON((int)end < 0);
+
+	if (end == 0) {
+		max = 0x80000000;
+	} else {
+		WARN_ON(end < start);
+		max = end - 1;
+	}
+
+again:
+	if (!ida_pre_get(ida, gfp_mask))
+		return -ENOMEM;
+
+	spin_lock_irqsave(&simple_ida_lock, flags);
+	next = ida->ida_next;
+	if (next < start || next >= end)
+		next = start;
+
+	ret = ida_get_new_above(ida, next, &id);
+	if (likely(!ret)) {
+		if (unlikely(id >= max)) {
+			ida_remove(ida, id);
+			if (next == start) {
+				ret = -ENOSPC;
+			} else {
+				ida->ida_next = start;
+				spin_unlock_irqrestore(&simple_ida_lock, flags);
+				goto again;
+			}
+		} else {
+			ida->ida_next = id + 1;
+			ret = id;
+		}
+	}
+	spin_unlock_irqrestore(&simple_ida_lock, flags);
+
+	if (unlikely(ret == -EAGAIN))
+		goto again;
+	return ret;
+}
+EXPORT_SYMBOL(ida_simple_get_cyclic);
 
 /**
  * ida_simple_remove - remove an allocated id.
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH v3 2/2] IB/mad: Use ID allocator routines to allocate agent number
  2018-06-07 11:14 ` [PATCH v3 0/2] IB:mad " Hans Westgaard Ry
  2018-06-07 11:14   ` [PATCH v3 1/2] idr: Add ida_simple_get_cyclic Hans Westgaard Ry
@ 2018-06-07 11:14   ` Hans Westgaard Ry
  2018-06-07 15:37     ` Matthew Wilcox
  1 sibling, 1 reply; 24+ messages in thread
From: Hans Westgaard Ry @ 2018-06-07 11:14 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe, Hakon Bugge, Parav Pandit,
	Jack Morgenstein, Pravin Shedge, Matthew Wilcox, Andrew Morton,
	Jeff Layton, Wei Wang, Chris Mi, Eric Biggers, Rasmus Villemoes,
	Mel Gorman, linux-rdma, linux-kernel

The agent TID is a 64 bit value split in two dwords.  The least
significant dword is the TID running counter. The most significant
dword is the agent number. In the CX-3 shared port model, the mlx4
driver uses the most significant byte of the agent number to store the
slave number, making agent numbers greater and equal to 2^24 (3 bytes)
unusable.  The current codebase uses a variable which is incremented
atomically for each new agent number giving too large agent numbers
over time.  The IDA set of functions are used instead of the simple
counter approach. This allows re-use of agent numbers.

The signature of the bug is a MAD layer that stops working and the
console is flooded with messages like:
 mlx4_ib: egress mad has non-null tid msb:1 class:4 slave:0

Signed-off-by: Hans Westgaard Ry <hans.westgaard.ry@oracle.com>
---
 drivers/infiniband/core/mad.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index b28452a55a08..c01a2d63ffa2 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -41,6 +41,7 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/security.h>
+#include <linux/idr.h>
 #include <rdma/ib_cache.h>
 
 #include "mad_priv.h"
@@ -59,8 +60,7 @@ module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
 MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
 
 static struct list_head ib_mad_port_list;
-static atomic_t ib_mad_client_id = ATOMIC_INIT(0);
-
+static DEFINE_IDA(ib_mad_client_ids);
 /* Port list lock */
 static DEFINE_SPINLOCK(ib_mad_port_list_lock);
 
@@ -212,7 +212,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 	int ret2, qpn;
 	unsigned long flags;
 	u8 mgmt_class, vclass;
-
+	u32 ib_mad_client_id;
 	/* Validate parameters */
 	qpn = get_spl_qp_index(qp_type);
 	if (qpn == -1) {
@@ -375,9 +375,19 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 		ret = ERR_PTR(ret2);
 		goto error4;
 	}
+	ib_mad_client_id = ida_simple_get_cyclic(&ib_mad_client_ids,
+						1,
+						BIT(24) - 1,
+						GFP_KERNEL);
+	if (ib_mad_client_id < 0) {
+		pr_err("Couldn't allocate agent tid; errcode: %#x\n",
+			ib_mad_client_id);
+		ret = ERR_PTR(ib_mad_client_id);
+		goto error4;
+	}
+	mad_agent_priv->agent.hi_tid = ib_mad_client_id;
 
 	spin_lock_irqsave(&port_priv->reg_lock, flags);
-	mad_agent_priv->agent.hi_tid = atomic_inc_return(&ib_mad_client_id);
 
 	/*
 	 * Make sure MAD registration (if supplied)
@@ -428,6 +438,8 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 error5:
 	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
 	ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
+	ida_simple_remove(&ib_mad_client_ids, ib_mad_client_id);
+
 error4:
 	kfree(reg_req);
 error3:
@@ -576,6 +588,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
 {
 	struct ib_mad_port_private *port_priv;
 	unsigned long flags;
+	u32 ib_mad_client_id;
 
 	/* Note that we could still be handling received MADs */
 
@@ -587,6 +600,8 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
 	port_priv = mad_agent_priv->qp_info->port_priv;
 	cancel_delayed_work(&mad_agent_priv->timed_work);
 
+	ib_mad_client_id = mad_agent_priv->agent.hi_tid;
+
 	spin_lock_irqsave(&port_priv->reg_lock, flags);
 	remove_mad_reg_req(mad_agent_priv);
 	list_del(&mad_agent_priv->agent_list);
@@ -602,6 +617,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
 
 	kfree(mad_agent_priv->reg_req);
 	kfree(mad_agent_priv);
+	ida_simple_remove(&ib_mad_client_ids, ib_mad_client_id);
 }
 
 static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
@@ -3347,4 +3363,5 @@ int ib_mad_init(void)
 void ib_mad_cleanup(void)
 {
 	ib_unregister_client(&mad_client);
+	ida_destroy(&ib_mad_client_ids);
 }
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* RE: [PATCH v3 2/2] IB/mad: Use ID allocator routines to allocate agent number
  2018-06-07 11:14   ` [PATCH v3 2/2] IB/mad: Use ID allocator routines to allocate agent number Hans Westgaard Ry
@ 2018-06-07 15:37     ` Matthew Wilcox
  2018-06-07 17:59       ` Håkon Bugge
  0 siblings, 1 reply; 24+ messages in thread
From: Matthew Wilcox @ 2018-06-07 15:37 UTC (permalink / raw)
  To: Hans Westgaard Ry, Doug Ledford, Jason Gunthorpe, Hakon Bugge,
	Parav Pandit, Jack Morgenstein, Pravin Shedge, Andrew Morton,
	Jeff Layton, Wei Wang, Chris Mi, Eric Biggers, Rasmus Villemoes,
	Mel Gorman, linux-rdma, linux-kernel

Why do you need the ID to increment like this?  Why can't you just use a unique ID?

> -----Original Message-----
> From: Hans Westgaard Ry [mailto:hans.westgaard.ry@oracle.com]
> Sent: Thursday, June 7, 2018 7:15 AM
> To: Doug Ledford <dledford@redhat.com>; Jason Gunthorpe <jgg@ziepe.ca>;
> Hakon Bugge <haakon.bugge@oracle.com>; Parav Pandit
> <parav@mellanox.com>; Jack Morgenstein <jackm@dev.mellanox.co.il>; Pravin
> Shedge <pravin.shedge4linux@gmail.com>; Matthew Wilcox
> <mawilcox@microsoft.com>; Andrew Morton <akpm@linux-foundation.org>;
> Jeff Layton <jlayton@kernel.org>; Wei Wang <wei.w.wang@intel.com>; Chris
> Mi <chrism@mellanox.com>; Eric Biggers <ebiggers@google.com>; Rasmus
> Villemoes <linux@rasmusvillemoes.dk>; Mel Gorman
> <mgorman@techsingularity.net>; linux-rdma@vger.kernel.org; linux-
> kernel@vger.kernel.org
> Subject: [PATCH v3 2/2] IB/mad: Use ID allocator routines to allocate agent
> number
> 
> The agent TID is a 64 bit value split in two dwords.  The least
> significant dword is the TID running counter. The most significant
> dword is the agent number. In the CX-3 shared port model, the mlx4
> driver uses the most significant byte of the agent number to store the
> slave number, making agent numbers greater and equal to 2^24 (3 bytes)
> unusable.  The current codebase uses a variable which is incremented
> atomically for each new agent number giving too large agent numbers
> over time.  The IDA set of functions are used instead of the simple
> counter approach. This allows re-use of agent numbers.
> 
> The signature of the bug is a MAD layer that stops working and the
> console is flooded with messages like:
>  mlx4_ib: egress mad has non-null tid msb:1 class:4 slave:0
> 
> Signed-off-by: Hans Westgaard Ry <hans.westgaard.ry@oracle.com>
> ---
>  drivers/infiniband/core/mad.c | 25 +++++++++++++++++++++----
>  1 file changed, 21 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
> index b28452a55a08..c01a2d63ffa2 100644
> --- a/drivers/infiniband/core/mad.c
> +++ b/drivers/infiniband/core/mad.c
> @@ -41,6 +41,7 @@
>  #include <linux/slab.h>
>  #include <linux/module.h>
>  #include <linux/security.h>
> +#include <linux/idr.h>
>  #include <rdma/ib_cache.h>
> 
>  #include "mad_priv.h"
> @@ -59,8 +60,7 @@ module_param_named(recv_queue_size,
> mad_recvq_size, int, 0444);
>  MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of
> work requests");
> 
>  static struct list_head ib_mad_port_list;
> -static atomic_t ib_mad_client_id = ATOMIC_INIT(0);
> -
> +static DEFINE_IDA(ib_mad_client_ids);
>  /* Port list lock */
>  static DEFINE_SPINLOCK(ib_mad_port_list_lock);
> 
> @@ -212,7 +212,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct
> ib_device *device,
>  	int ret2, qpn;
>  	unsigned long flags;
>  	u8 mgmt_class, vclass;
> -
> +	u32 ib_mad_client_id;
>  	/* Validate parameters */
>  	qpn = get_spl_qp_index(qp_type);
>  	if (qpn == -1) {
> @@ -375,9 +375,19 @@ struct ib_mad_agent *ib_register_mad_agent(struct
> ib_device *device,
>  		ret = ERR_PTR(ret2);
>  		goto error4;
>  	}
> +	ib_mad_client_id = ida_simple_get_cyclic(&ib_mad_client_ids,
> +						1,
> +						BIT(24) - 1,
> +						GFP_KERNEL);
> +	if (ib_mad_client_id < 0) {
> +		pr_err("Couldn't allocate agent tid; errcode: %#x\n",
> +			ib_mad_client_id);
> +		ret = ERR_PTR(ib_mad_client_id);
> +		goto error4;
> +	}
> +	mad_agent_priv->agent.hi_tid = ib_mad_client_id;
> 
>  	spin_lock_irqsave(&port_priv->reg_lock, flags);
> -	mad_agent_priv->agent.hi_tid =
> atomic_inc_return(&ib_mad_client_id);
> 
>  	/*
>  	 * Make sure MAD registration (if supplied)
> @@ -428,6 +438,8 @@ struct ib_mad_agent *ib_register_mad_agent(struct
> ib_device *device,
>  error5:
>  	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
>  	ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
> +	ida_simple_remove(&ib_mad_client_ids, ib_mad_client_id);
> +
>  error4:
>  	kfree(reg_req);
>  error3:
> @@ -576,6 +588,7 @@ static void unregister_mad_agent(struct
> ib_mad_agent_private *mad_agent_priv)
>  {
>  	struct ib_mad_port_private *port_priv;
>  	unsigned long flags;
> +	u32 ib_mad_client_id;
> 
>  	/* Note that we could still be handling received MADs */
> 
> @@ -587,6 +600,8 @@ static void unregister_mad_agent(struct
> ib_mad_agent_private *mad_agent_priv)
>  	port_priv = mad_agent_priv->qp_info->port_priv;
>  	cancel_delayed_work(&mad_agent_priv->timed_work);
> 
> +	ib_mad_client_id = mad_agent_priv->agent.hi_tid;
> +
>  	spin_lock_irqsave(&port_priv->reg_lock, flags);
>  	remove_mad_reg_req(mad_agent_priv);
>  	list_del(&mad_agent_priv->agent_list);
> @@ -602,6 +617,7 @@ static void unregister_mad_agent(struct
> ib_mad_agent_private *mad_agent_priv)
> 
>  	kfree(mad_agent_priv->reg_req);
>  	kfree(mad_agent_priv);
> +	ida_simple_remove(&ib_mad_client_ids, ib_mad_client_id);
>  }
> 
>  static void unregister_mad_snoop(struct ib_mad_snoop_private
> *mad_snoop_priv)
> @@ -3347,4 +3363,5 @@ int ib_mad_init(void)
>  void ib_mad_cleanup(void)
>  {
>  	ib_unregister_client(&mad_client);
> +	ida_destroy(&ib_mad_client_ids);
>  }
> --
> 2.14.3


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 2/2] IB/mad: Use ID allocator routines to allocate agent number
  2018-06-07 15:37     ` Matthew Wilcox
@ 2018-06-07 17:59       ` Håkon Bugge
  0 siblings, 0 replies; 24+ messages in thread
From: Håkon Bugge @ 2018-06-07 17:59 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Hans Westgaard Ry, Doug Ledford, Jason Gunthorpe, Parav Pandit,
	Jack Morgenstein, Pravin Shedge, Andrew Morton, Jeff Layton,
	Wei Wang, Chris Mi, Eric Biggers, Rasmus Villemoes, Mel Gorman,
	OFED mailing list, linux-kernel



> On 7 Jun 2018, at 17:37, Matthew Wilcox <mawilcox@microsoft.com> wrote:
> 
> Why do you need the ID to increment like this?  Why can't you just use a unique ID?

Hans first patch did that, but it was "beaten" up. Turns out, MAD packets can be hiding and lingering in the fabric, hence, when an ID is released after, lets say a timeout, we want to maximize the time until its reuse.


Thxs, Håkon

> 
>> -----Original Message-----
>> From: Hans Westgaard Ry [mailto:hans.westgaard.ry@oracle.com]
>> Sent: Thursday, June 7, 2018 7:15 AM
>> To: Doug Ledford <dledford@redhat.com>; Jason Gunthorpe <jgg@ziepe.ca>;
>> Hakon Bugge <haakon.bugge@oracle.com>; Parav Pandit
>> <parav@mellanox.com>; Jack Morgenstein <jackm@dev.mellanox.co.il>; Pravin
>> Shedge <pravin.shedge4linux@gmail.com>; Matthew Wilcox
>> <mawilcox@microsoft.com>; Andrew Morton <akpm@linux-foundation.org>;
>> Jeff Layton <jlayton@kernel.org>; Wei Wang <wei.w.wang@intel.com>; Chris
>> Mi <chrism@mellanox.com>; Eric Biggers <ebiggers@google.com>; Rasmus
>> Villemoes <linux@rasmusvillemoes.dk>; Mel Gorman
>> <mgorman@techsingularity.net>; linux-rdma@vger.kernel.org; linux-
>> kernel@vger.kernel.org
>> Subject: [PATCH v3 2/2] IB/mad: Use ID allocator routines to allocate agent
>> number
>> 
>> The agent TID is a 64 bit value split in two dwords.  The least
>> significant dword is the TID running counter. The most significant
>> dword is the agent number. In the CX-3 shared port model, the mlx4
>> driver uses the most significant byte of the agent number to store the
>> slave number, making agent numbers greater and equal to 2^24 (3 bytes)
>> unusable.  The current codebase uses a variable which is incremented
>> atomically for each new agent number giving too large agent numbers
>> over time.  The IDA set of functions are used instead of the simple
>> counter approach. This allows re-use of agent numbers.
>> 
>> The signature of the bug is a MAD layer that stops working and the
>> console is flooded with messages like:
>> mlx4_ib: egress mad has non-null tid msb:1 class:4 slave:0
>> 
>> Signed-off-by: Hans Westgaard Ry <hans.westgaard.ry@oracle.com>
>> ---
>> drivers/infiniband/core/mad.c | 25 +++++++++++++++++++++----
>> 1 file changed, 21 insertions(+), 4 deletions(-)
>> 
>> diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
>> index b28452a55a08..c01a2d63ffa2 100644
>> --- a/drivers/infiniband/core/mad.c
>> +++ b/drivers/infiniband/core/mad.c
>> @@ -41,6 +41,7 @@
>> #include <linux/slab.h>
>> #include <linux/module.h>
>> #include <linux/security.h>
>> +#include <linux/idr.h>
>> #include <rdma/ib_cache.h>
>> 
>> #include "mad_priv.h"
>> @@ -59,8 +60,7 @@ module_param_named(recv_queue_size,
>> mad_recvq_size, int, 0444);
>> MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of
>> work requests");
>> 
>> static struct list_head ib_mad_port_list;
>> -static atomic_t ib_mad_client_id = ATOMIC_INIT(0);
>> -
>> +static DEFINE_IDA(ib_mad_client_ids);
>> /* Port list lock */
>> static DEFINE_SPINLOCK(ib_mad_port_list_lock);
>> 
>> @@ -212,7 +212,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct
>> ib_device *device,
>> 	int ret2, qpn;
>> 	unsigned long flags;
>> 	u8 mgmt_class, vclass;
>> -
>> +	u32 ib_mad_client_id;
>> 	/* Validate parameters */
>> 	qpn = get_spl_qp_index(qp_type);
>> 	if (qpn == -1) {
>> @@ -375,9 +375,19 @@ struct ib_mad_agent *ib_register_mad_agent(struct
>> ib_device *device,
>> 		ret = ERR_PTR(ret2);
>> 		goto error4;
>> 	}
>> +	ib_mad_client_id = ida_simple_get_cyclic(&ib_mad_client_ids,
>> +						1,
>> +						BIT(24) - 1,
>> +						GFP_KERNEL);
>> +	if (ib_mad_client_id < 0) {
>> +		pr_err("Couldn't allocate agent tid; errcode: %#x\n",
>> +			ib_mad_client_id);
>> +		ret = ERR_PTR(ib_mad_client_id);
>> +		goto error4;
>> +	}
>> +	mad_agent_priv->agent.hi_tid = ib_mad_client_id;
>> 
>> 	spin_lock_irqsave(&port_priv->reg_lock, flags);
>> -	mad_agent_priv->agent.hi_tid =
>> atomic_inc_return(&ib_mad_client_id);
>> 
>> 	/*
>> 	 * Make sure MAD registration (if supplied)
>> @@ -428,6 +438,8 @@ struct ib_mad_agent *ib_register_mad_agent(struct
>> ib_device *device,
>> error5:
>> 	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
>> 	ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
>> +	ida_simple_remove(&ib_mad_client_ids, ib_mad_client_id);
>> +
>> error4:
>> 	kfree(reg_req);
>> error3:
>> @@ -576,6 +588,7 @@ static void unregister_mad_agent(struct
>> ib_mad_agent_private *mad_agent_priv)
>> {
>> 	struct ib_mad_port_private *port_priv;
>> 	unsigned long flags;
>> +	u32 ib_mad_client_id;
>> 
>> 	/* Note that we could still be handling received MADs */
>> 
>> @@ -587,6 +600,8 @@ static void unregister_mad_agent(struct
>> ib_mad_agent_private *mad_agent_priv)
>> 	port_priv = mad_agent_priv->qp_info->port_priv;
>> 	cancel_delayed_work(&mad_agent_priv->timed_work);
>> 
>> +	ib_mad_client_id = mad_agent_priv->agent.hi_tid;
>> +
>> 	spin_lock_irqsave(&port_priv->reg_lock, flags);
>> 	remove_mad_reg_req(mad_agent_priv);
>> 	list_del(&mad_agent_priv->agent_list);
>> @@ -602,6 +617,7 @@ static void unregister_mad_agent(struct
>> ib_mad_agent_private *mad_agent_priv)
>> 
>> 	kfree(mad_agent_priv->reg_req);
>> 	kfree(mad_agent_priv);
>> +	ida_simple_remove(&ib_mad_client_ids, ib_mad_client_id);
>> }
>> 
>> static void unregister_mad_snoop(struct ib_mad_snoop_private
>> *mad_snoop_priv)
>> @@ -3347,4 +3363,5 @@ int ib_mad_init(void)
>> void ib_mad_cleanup(void)
>> {
>> 	ib_unregister_client(&mad_client);
>> +	ida_destroy(&ib_mad_client_ids);
>> }
>> --
>> 2.14.3
> 
> N‹§ēæėrļ›yúčšØbēXŽķĮ§vØ^–)Þš{.nĮ+‰·ĨŠ{ą­ŲšŠ{ayš\x1dʇڙë,j\a­ĒfĢĒ·hš‹āzđ\x1eŪwĨĒļ\fĒ·Ķj:+v‰ĻŠwčjØmķŸĸū\aŦ‘ęįzZ+ƒųšŽŠÝĒj"ú!

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 1/2] idr: Add ida_simple_get_cyclic
  2018-06-07 11:14   ` [PATCH v3 1/2] idr: Add ida_simple_get_cyclic Hans Westgaard Ry
@ 2018-06-07 18:50     ` Jason Gunthorpe
  0 siblings, 0 replies; 24+ messages in thread
From: Jason Gunthorpe @ 2018-06-07 18:50 UTC (permalink / raw)
  To: Hans Westgaard Ry
  Cc: Doug Ledford, Hakon Bugge, Parav Pandit, Jack Morgenstein,
	Pravin Shedge, Matthew Wilcox, Andrew Morton, Jeff Layton,
	Wei Wang, Chris Mi, Eric Biggers, Rasmus Villemoes, Mel Gorman,
	linux-rdma, linux-kernel

On Thu, Jun 07, 2018 at 01:14:34PM +0200, Hans Westgaard Ry wrote:
> Signed-off-by: Hans Westgaard Ry <hans.westgaard.ry@oracle.com>
> ---
>  include/linux/idr.h |  8 +++++--
>  lib/idr.c           | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 72 insertions(+), 2 deletions(-)

If you are going to do this you should convert the other places that
open coded this as well..

Jason

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2018-06-07 18:50 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-29  7:38 [PATCH] IB/mad: Use ID allocator routines to allocate agent number Hans Westgaard Ry
2018-05-29  8:54 ` Leon Romanovsky
2018-05-29  9:54   ` Leon Romanovsky
2018-05-30  8:18     ` jackm
2018-05-29 15:49 ` Jason Gunthorpe
2018-05-29 16:16   ` Håkon Bugge
2018-05-29 16:40     ` Jason Gunthorpe
2018-05-30  7:32       ` Håkon Bugge
2018-05-30 15:15         ` Jason Gunthorpe
2018-05-30  8:02       ` jackm
2018-05-30 12:22         ` Hans Westgaard Ry
2018-05-30 15:10           ` Jason Gunthorpe
2018-05-30 20:07             ` Håkon Bugge
2018-05-30 22:09               ` Jason Gunthorpe
2018-05-31 19:54                 ` Håkon Bugge
2018-06-07 10:52 ` [PATCH v2 0/2] IB:mad " Hans Westgaard Ry
2018-06-07 10:52   ` [PATCH v2 1/2] idr: Add ida_simple_get_cyclic Hans Westgaard Ry
2018-06-07 10:52   ` [PATCH v2 2/2] IB/mad: Use ID allocator routines to allocate agent number Hans Westgaard Ry
2018-06-07 11:14 ` [PATCH v3 0/2] IB:mad " Hans Westgaard Ry
2018-06-07 11:14   ` [PATCH v3 1/2] idr: Add ida_simple_get_cyclic Hans Westgaard Ry
2018-06-07 18:50     ` Jason Gunthorpe
2018-06-07 11:14   ` [PATCH v3 2/2] IB/mad: Use ID allocator routines to allocate agent number Hans Westgaard Ry
2018-06-07 15:37     ` Matthew Wilcox
2018-06-07 17:59       ` Håkon Bugge

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).