All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH for-next] IB/cma: Introduce rdma_set_min_rnr_timer()
@ 2021-03-25 13:05 Håkon Bugge
  2021-03-30 23:12 ` Jason Gunthorpe
  0 siblings, 1 reply; 17+ messages in thread
From: Håkon Bugge @ 2021-03-25 13:05 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe, linux-rdma

Introduce the ability for both user-space and kernel ULPs to adjust
the minimum RNR Retry timer. The INIT -> RTR transition executed by
RDMA CM will be used for this adjustment. This avoids an additional
ib_modify_qp() call.

Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
---
 drivers/infiniband/core/cma.c      | 23 +++++++++++++++++++++++
 drivers/infiniband/core/cma_priv.h |  2 ++
 drivers/infiniband/core/ucma.c     |  7 +++++++
 include/rdma/rdma_cm.h             |  2 ++
 include/uapi/rdma/rdma_user_cm.h   |  3 ++-
 5 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 9409651..f50dc30 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -852,6 +852,7 @@ static void cma_id_put(struct rdma_id_private *id_priv)
 	id_priv->id.qp_type = qp_type;
 	id_priv->tos_set = false;
 	id_priv->timeout_set = false;
+	id_priv->min_rnr_timer_set = false;
 	id_priv->gid_type = IB_GID_TYPE_IB;
 	spin_lock_init(&id_priv->lock);
 	mutex_init(&id_priv->qp_mutex);
@@ -1141,6 +1142,9 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
 	if ((*qp_attr_mask & IB_QP_TIMEOUT) && id_priv->timeout_set)
 		qp_attr->timeout = id_priv->timeout;
 
+	if ((*qp_attr_mask & IB_QP_MIN_RNR_TIMER) && id_priv->min_rnr_timer_set)
+		qp_attr->min_rnr_timer = id_priv->min_rnr_timer;
+
 	return ret;
 }
 EXPORT_SYMBOL(rdma_init_qp_attr);
@@ -2615,6 +2619,25 @@ int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout)
 }
 EXPORT_SYMBOL(rdma_set_ack_timeout);
 
+int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer)
+{
+	struct rdma_id_private *id_priv;
+
+	/* It is a five-bit value */
+	if (min_rnr_timer & 0xe0)
+		return -EINVAL;
+
+	if (id->qp_type != IB_QPT_RC && id->qp_type != IB_QPT_XRC_TGT)
+		return -EINVAL;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	id_priv->min_rnr_timer = min_rnr_timer;
+	id_priv->min_rnr_timer_set = true;
+
+	return 0;
+}
+EXPORT_SYMBOL(rdma_set_min_rnr_timer);
+
 static void cma_query_handler(int status, struct sa_path_rec *path_rec,
 			      void *context)
 {
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
index caece96..bf83d32 100644
--- a/drivers/infiniband/core/cma_priv.h
+++ b/drivers/infiniband/core/cma_priv.h
@@ -86,9 +86,11 @@ struct rdma_id_private {
 	u8			tos;
 	u8			tos_set:1;
 	u8                      timeout_set:1;
+	u8			min_rnr_timer_set:1;
 	u8			reuseaddr;
 	u8			afonly;
 	u8			timeout;
+	u8			min_rnr_timer;
 	enum ib_gid_type	gid_type;
 
 	/*
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index da2512c..f183ace 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -1282,6 +1282,13 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname,
 		}
 		ret = rdma_set_ack_timeout(ctx->cm_id, *((u8 *)optval));
 		break;
+	case RDMA_OPTION_ID_RNR_RETRY_TIMER:
+		if (optlen != sizeof(u8)) {
+			ret = -EINVAL;
+			break;
+		}
+		ret = rdma_set_min_rnr_timer(ctx->cm_id, *((u8 *)optval));
+		break;
 	default:
 		ret = -ENOSYS;
 	}
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 32a67af..8b0f66e 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -331,6 +331,8 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
 int rdma_set_afonly(struct rdma_cm_id *id, int afonly);
 
 int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout);
+
+int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer);
  /**
  * rdma_get_service_id - Return the IB service ID for a specified address.
  * @id: Communication identifier associated with the address.
diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h
index ed5a514..13b46af 100644
--- a/include/uapi/rdma/rdma_user_cm.h
+++ b/include/uapi/rdma/rdma_user_cm.h
@@ -313,7 +313,8 @@ enum {
 	RDMA_OPTION_ID_TOS	 = 0,
 	RDMA_OPTION_ID_REUSEADDR = 1,
 	RDMA_OPTION_ID_AFONLY	 = 2,
-	RDMA_OPTION_ID_ACK_TIMEOUT = 3
+	RDMA_OPTION_ID_ACK_TIMEOUT = 3,
+	RDMA_OPTION_ID_RNR_RETRY_TIMER = 4,
 };
 
 enum {
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH for-next] IB/cma: Introduce rdma_set_min_rnr_timer()
  2021-03-25 13:05 [PATCH for-next] IB/cma: Introduce rdma_set_min_rnr_timer() Håkon Bugge
@ 2021-03-30 23:12 ` Jason Gunthorpe
  2021-03-31 10:38   ` Haakon Bugge
  0 siblings, 1 reply; 17+ messages in thread
From: Jason Gunthorpe @ 2021-03-30 23:12 UTC (permalink / raw)
  To: Håkon Bugge; +Cc: Doug Ledford, linux-rdma

On Thu, Mar 25, 2021 at 02:05:47PM +0100, Håkon Bugge wrote:
> Introduce the ability for both user-space and kernel ULPs to adjust
> the minimum RNR Retry timer. The INIT -> RTR transition executed by
> RDMA CM will be used for this adjustment. This avoids an additional
> ib_modify_qp() call.

Can't userspace override the ibv_modify_qp() call the librdmacm wants
to make to do this?

You'll need to make the rdma-core patches before this can go ahead

Thanks,
Jason

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH for-next] IB/cma: Introduce rdma_set_min_rnr_timer()
  2021-03-30 23:12 ` Jason Gunthorpe
@ 2021-03-31 10:38   ` Haakon Bugge
  2021-03-31 12:00     ` Jason Gunthorpe
  0 siblings, 1 reply; 17+ messages in thread
From: Haakon Bugge @ 2021-03-31 10:38 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Doug Ledford, OFED mailing list



> On 31 Mar 2021, at 01:12, Jason Gunthorpe <jgg@nvidia.com> wrote:
> 
> On Thu, Mar 25, 2021 at 02:05:47PM +0100, Håkon Bugge wrote:
>> Introduce the ability for both user-space and kernel ULPs to adjust
>> the minimum RNR Retry timer. The INIT -> RTR transition executed by
>> RDMA CM will be used for this adjustment. This avoids an additional
>> ib_modify_qp() call.
> 
> Can't userspace override the ibv_modify_qp() call the librdmacm wants
> to make to do this?

Not sure I understand. The point is, that user-land which intends to set said timer, can do so without an additional ibv_modify_qp() call. May be I should have added:

Shamelessly-inspired-by: 2c1619edef61 ("IB/cma: Define option to set ack timeout and pack tos_set")

> You'll need to make the rdma-core patches before this can go ahead

Ooh, I thought it was the other way around. Will do.

I also intend to send an RDS patch, so there will be at least two users of this.


Thxs, Håkon



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH for-next] IB/cma: Introduce rdma_set_min_rnr_timer()
  2021-03-31 10:38   ` Haakon Bugge
@ 2021-03-31 12:00     ` Jason Gunthorpe
  2021-03-31 12:58       ` Haakon Bugge
  0 siblings, 1 reply; 17+ messages in thread
From: Jason Gunthorpe @ 2021-03-31 12:00 UTC (permalink / raw)
  To: Haakon Bugge; +Cc: Doug Ledford, OFED mailing list

On Wed, Mar 31, 2021 at 10:38:02AM +0000, Haakon Bugge wrote:
> 
> 
> > On 31 Mar 2021, at 01:12, Jason Gunthorpe <jgg@nvidia.com> wrote:
> > 
> > On Thu, Mar 25, 2021 at 02:05:47PM +0100, Håkon Bugge wrote:
> >> Introduce the ability for both user-space and kernel ULPs to adjust
> >> the minimum RNR Retry timer. The INIT -> RTR transition executed by
> >> RDMA CM will be used for this adjustment. This avoids an additional
> >> ib_modify_qp() call.
> > 
> > Can't userspace override the ibv_modify_qp() call the librdmacm wants
> > to make to do this?
> 
> Not sure I understand. The point is, that user-land which intends to
> set said timer, can do so without an additional ibv_modify_qp()
> call. May be I should have added:

IIRC in userspace the application has the option to call
ibv_modify_qp() so it can just change it before it makes the call?

> Shamelessly-inspired-by: 2c1619edef61 ("IB/cma: Define option to set ack timeout and pack tos_set")

Hmm..

Jason

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH for-next] IB/cma: Introduce rdma_set_min_rnr_timer()
  2021-03-31 12:00     ` Jason Gunthorpe
@ 2021-03-31 12:58       ` Haakon Bugge
  2021-03-31 13:15         ` Jason Gunthorpe
  2021-03-31 13:25         ` Haakon Bugge
  0 siblings, 2 replies; 17+ messages in thread
From: Haakon Bugge @ 2021-03-31 12:58 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Doug Ledford, OFED mailing list



> On 31 Mar 2021, at 14:00, Jason Gunthorpe <jgg@nvidia.com> wrote:
> 
> On Wed, Mar 31, 2021 at 10:38:02AM +0000, Haakon Bugge wrote:
>> 
>> 
>>> On 31 Mar 2021, at 01:12, Jason Gunthorpe <jgg@nvidia.com> wrote:
>>> 
>>> On Thu, Mar 25, 2021 at 02:05:47PM +0100, Håkon Bugge wrote:
>>>> Introduce the ability for both user-space and kernel ULPs to adjust
>>>> the minimum RNR Retry timer. The INIT -> RTR transition executed by
>>>> RDMA CM will be used for this adjustment. This avoids an additional
>>>> ib_modify_qp() call.
>>> 
>>> Can't userspace override the ibv_modify_qp() call the librdmacm wants
>>> to make to do this?
>> 
>> Not sure I understand. The point is, that user-land which intends to
>> set said timer, can do so without an additional ibv_modify_qp()
>> call. May be I should have added:
> 
> IIRC in userspace the application has the option to call
> ibv_modify_qp() so it can just change it before it makes the call?

User-space can call ibv_modify_qp, but that call is inherently expensive on some HCA implementations running virtualized. So this commit enables user-space to use rdma_set_option() to set information in the kernel's cm_id such that the required INIT -> RTR transition takes care of the RNR Retry timer value as well - with an additional modify_qp.

Thxs, Håkon

>> Shamelessly-inspired-by: 2c1619edef61 ("IB/cma: Define option to set ack timeout and pack tos_set")
> 
> Hmm..
> 
> Jason


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH for-next] IB/cma: Introduce rdma_set_min_rnr_timer()
  2021-03-31 12:58       ` Haakon Bugge
@ 2021-03-31 13:15         ` Jason Gunthorpe
  2021-03-31 13:34           ` Haakon Bugge
  2021-03-31 13:25         ` Haakon Bugge
  1 sibling, 1 reply; 17+ messages in thread
From: Jason Gunthorpe @ 2021-03-31 13:15 UTC (permalink / raw)
  To: Haakon Bugge; +Cc: Doug Ledford, OFED mailing list

On Wed, Mar 31, 2021 at 12:58:41PM +0000, Haakon Bugge wrote:
> 
> 
> > On 31 Mar 2021, at 14:00, Jason Gunthorpe <jgg@nvidia.com> wrote:
> > 
> > On Wed, Mar 31, 2021 at 10:38:02AM +0000, Haakon Bugge wrote:
> >> 
> >> 
> >>> On 31 Mar 2021, at 01:12, Jason Gunthorpe <jgg@nvidia.com> wrote:
> >>> 
> >>> On Thu, Mar 25, 2021 at 02:05:47PM +0100, Håkon Bugge wrote:
> >>>> Introduce the ability for both user-space and kernel ULPs to adjust
> >>>> the minimum RNR Retry timer. The INIT -> RTR transition executed by
> >>>> RDMA CM will be used for this adjustment. This avoids an additional
> >>>> ib_modify_qp() call.
> >>> 
> >>> Can't userspace override the ibv_modify_qp() call the librdmacm wants
> >>> to make to do this?
> >> 
> >> Not sure I understand. The point is, that user-land which intends to
> >> set said timer, can do so without an additional ibv_modify_qp()
> >> call. May be I should have added:
> > 
> > IIRC in userspace the application has the option to call
> > ibv_modify_qp() so it can just change it before it makes the call?
> 
> User-space can call ibv_modify_qp, but that call is inherently
> expensive on some HCA implementations running virtualized.

you are not following.

In rdmacm userspace *always* calls the ibv_modify_qp

rdmacm has a 'helper' mode where it hides the call inside its logic in
librdmacm.

But, IIRC, a ULP can get some event and do the ibv_modify_qp in its
logic instead of using the hidden version inside rdma cm. Though that
may only be possible if you eliminate the entire librdmacm hidden logic

It just seems wrong to send data to the kernel just to have the kernel
copy that same data out to another system call and never use it at
all.

Actually I bet you could do this same thing entirely in userspace by
adjusting rdma_init_qp_attr() to copy the data that would be stored in
the cm_id.. ??

Jason

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH for-next] IB/cma: Introduce rdma_set_min_rnr_timer()
  2021-03-31 12:58       ` Haakon Bugge
  2021-03-31 13:15         ` Jason Gunthorpe
@ 2021-03-31 13:25         ` Haakon Bugge
  1 sibling, 0 replies; 17+ messages in thread
From: Haakon Bugge @ 2021-03-31 13:25 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Doug Ledford, OFED mailing list



> On 31 Mar 2021, at 14:58, Haakon Bugge <haakon.bugge@oracle.com> wrote:
> 
> 
> 
>> On 31 Mar 2021, at 14:00, Jason Gunthorpe <jgg@nvidia.com> wrote:
>> 
>> On Wed, Mar 31, 2021 at 10:38:02AM +0000, Haakon Bugge wrote:
>>> 
>>> 
>>>> On 31 Mar 2021, at 01:12, Jason Gunthorpe <jgg@nvidia.com> wrote:
>>>> 
>>>> On Thu, Mar 25, 2021 at 02:05:47PM +0100, Håkon Bugge wrote:
>>>>> Introduce the ability for both user-space and kernel ULPs to adjust
>>>>> the minimum RNR Retry timer. The INIT -> RTR transition executed by
>>>>> RDMA CM will be used for this adjustment. This avoids an additional
>>>>> ib_modify_qp() call.
>>>> 
>>>> Can't userspace override the ibv_modify_qp() call the librdmacm wants
>>>> to make to do this?
>>> 
>>> Not sure I understand. The point is, that user-land which intends to
>>> set said timer, can do so without an additional ibv_modify_qp()
>>> call. May be I should have added:
>> 
>> IIRC in userspace the application has the option to call
>> ibv_modify_qp() so it can just change it before it makes the call?
> 
> User-space can call ibv_modify_qp, but that call is inherently expensive on some HCA implementations running virtualized. So this commit enables user-space to use rdma_set_option() to set information in the kernel's cm_id such that the required INIT -> RTR transition takes care of the RNR Retry timer value as well - with an additional modify_qp.

s/with/without/

-h

> 
> Thxs, Håkon
> 
>>> Shamelessly-inspired-by: 2c1619edef61 ("IB/cma: Define option to set ack timeout and pack tos_set")
>> 
>> Hmm..
>> 
>> Jason
> 


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH for-next] IB/cma: Introduce rdma_set_min_rnr_timer()
  2021-03-31 13:15         ` Jason Gunthorpe
@ 2021-03-31 13:34           ` Haakon Bugge
  2021-03-31 13:35             ` Jason Gunthorpe
  0 siblings, 1 reply; 17+ messages in thread
From: Haakon Bugge @ 2021-03-31 13:34 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Doug Ledford, OFED mailing list



> On 31 Mar 2021, at 15:15, Jason Gunthorpe <jgg@nvidia.com> wrote:
> 
> On Wed, Mar 31, 2021 at 12:58:41PM +0000, Haakon Bugge wrote:
>> 
>> 
>>> On 31 Mar 2021, at 14:00, Jason Gunthorpe <jgg@nvidia.com> wrote:
>>> 
>>> On Wed, Mar 31, 2021 at 10:38:02AM +0000, Haakon Bugge wrote:
>>>> 
>>>> 
>>>>> On 31 Mar 2021, at 01:12, Jason Gunthorpe <jgg@nvidia.com> wrote:
>>>>> 
>>>>> On Thu, Mar 25, 2021 at 02:05:47PM +0100, Håkon Bugge wrote:
>>>>>> Introduce the ability for both user-space and kernel ULPs to adjust
>>>>>> the minimum RNR Retry timer. The INIT -> RTR transition executed by
>>>>>> RDMA CM will be used for this adjustment. This avoids an additional
>>>>>> ib_modify_qp() call.
>>>>> 
>>>>> Can't userspace override the ibv_modify_qp() call the librdmacm wants
>>>>> to make to do this?
>>>> 
>>>> Not sure I understand. The point is, that user-land which intends to
>>>> set said timer, can do so without an additional ibv_modify_qp()
>>>> call. May be I should have added:
>>> 
>>> IIRC in userspace the application has the option to call
>>> ibv_modify_qp() so it can just change it before it makes the call?
>> 
>> User-space can call ibv_modify_qp, but that call is inherently
>> expensive on some HCA implementations running virtualized.
> 
> you are not following.
> 
> In rdmacm userspace *always* calls the ibv_modify_qp
> 
> rdmacm has a 'helper' mode where it hides the call inside its logic in
> librdmacm.
> 
> But, IIRC, a ULP can get some event and do the ibv_modify_qp in its
> logic instead of using the hidden version inside rdma cm. Though that
> may only be possible if you eliminate the entire librdmacm hidden logic
> 
> It just seems wrong to send data to the kernel just to have the kernel
> copy that same data out to another system call and never use it at
> all.
> 
> Actually I bet you could do this same thing entirely in userspace by
> adjusting rdma_init_qp_attr() to copy the data that would be stored in
> the cm_id.. ??

This will definitely not solve the issue for kernel ULP, e.g., RDS. Further, why do we have rdma_set_option() with option RDMA_OPTION_ID_ACK_TIMEOUT ?

Let me dig into what you're saying.


Thxs, Håkon

> 
> Jason


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH for-next] IB/cma: Introduce rdma_set_min_rnr_timer()
  2021-03-31 13:34           ` Haakon Bugge
@ 2021-03-31 13:35             ` Jason Gunthorpe
  2021-03-31 14:49               ` Haakon Bugge
  0 siblings, 1 reply; 17+ messages in thread
From: Jason Gunthorpe @ 2021-03-31 13:35 UTC (permalink / raw)
  To: Haakon Bugge; +Cc: Doug Ledford, OFED mailing list

On Wed, Mar 31, 2021 at 01:34:06PM +0000, Haakon Bugge wrote:

> > Actually I bet you could do this same thing entirely in userspace by
> > adjusting rdma_init_qp_attr() to copy the data that would be stored in
> > the cm_id.. ??
> 
> This will definitely not solve the issue for kernel ULP, e.g., RDS. 

Sure, that makes sense to have some rdmacm api in-kernel only

> Further, why do we have rdma_set_option() with option RDMA_OPTION_ID_ACK_TIMEOUT ?

It may have been a mistake to do it like that

Jason

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH for-next] IB/cma: Introduce rdma_set_min_rnr_timer()
  2021-03-31 13:35             ` Jason Gunthorpe
@ 2021-03-31 14:49               ` Haakon Bugge
  2021-03-31 17:09                 ` Parav Pandit
  0 siblings, 1 reply; 17+ messages in thread
From: Haakon Bugge @ 2021-03-31 14:49 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Doug Ledford, OFED mailing list



> On 31 Mar 2021, at 15:35, Jason Gunthorpe <jgg@nvidia.com> wrote:
> 
> On Wed, Mar 31, 2021 at 01:34:06PM +0000, Haakon Bugge wrote:
> 
>>> Actually I bet you could do this same thing entirely in userspace by
>>> adjusting rdma_init_qp_attr() to copy the data that would be stored in
>>> the cm_id.. ??
>> 
>> This will definitely not solve the issue for kernel ULP, e.g., RDS. 
> 
> Sure, that makes sense to have some rdmacm api in-kernel only

Let me send a v2 doing only that.

>> Further, why do we have rdma_set_option() with option RDMA_OPTION_ID_ACK_TIMEOUT ?
> 
> It may have been a mistake to do it like that

I see what you are saying. If id_priv in user space was augmented with timeout_set, timeout, rnr_retry_timer_set, and rnr_retry_time and corresponding rdma_set_{ack_timeout,rnr_retry_timer} functions, use said information in ucma_modify_qp_rtr() and ucma_modify_qp_rts(), it will be a user-space game only.


Håkon

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [PATCH for-next] IB/cma: Introduce rdma_set_min_rnr_timer()
  2021-03-31 14:49               ` Haakon Bugge
@ 2021-03-31 17:09                 ` Parav Pandit
  2021-03-31 17:35                   ` Jason Gunthorpe
  0 siblings, 1 reply; 17+ messages in thread
From: Parav Pandit @ 2021-03-31 17:09 UTC (permalink / raw)
  To: Haakon Bugge, Jason Gunthorpe; +Cc: Doug Ledford, OFED mailing list



> From: Haakon Bugge <haakon.bugge@oracle.com>
> Sent: Wednesday, March 31, 2021 8:20 PM
> 
> > On 31 Mar 2021, at 15:35, Jason Gunthorpe <jgg@nvidia.com> wrote:
> >
> > On Wed, Mar 31, 2021 at 01:34:06PM +0000, Haakon Bugge wrote:
> >
> >>> Actually I bet you could do this same thing entirely in userspace by
> >>> adjusting rdma_init_qp_attr() to copy the data that would be stored
> >>> in the cm_id.. ??
> >>
> >> This will definitely not solve the issue for kernel ULP, e.g., RDS.
> >
> > Sure, that makes sense to have some rdmacm api in-kernel only
> 
> Let me send a v2 doing only that.
> 
> >> Further, why do we have rdma_set_option() with option
> RDMA_OPTION_ID_ACK_TIMEOUT ?
> >
> > It may have been a mistake to do it like that
> 
Timeout value goes in the CM request message so setting it through the cm_id object was likely correct.
This reflects into cm msg as well as in the QP of the cm_id.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH for-next] IB/cma: Introduce rdma_set_min_rnr_timer()
  2021-03-31 17:09                 ` Parav Pandit
@ 2021-03-31 17:35                   ` Jason Gunthorpe
  2021-03-31 17:38                     ` Haakon Bugge
  0 siblings, 1 reply; 17+ messages in thread
From: Jason Gunthorpe @ 2021-03-31 17:35 UTC (permalink / raw)
  To: Parav Pandit; +Cc: Haakon Bugge, Doug Ledford, OFED mailing list

On Wed, Mar 31, 2021 at 05:09:27PM +0000, Parav Pandit wrote:
> 
> 
> > From: Haakon Bugge <haakon.bugge@oracle.com>
> > Sent: Wednesday, March 31, 2021 8:20 PM
> > 
> > > On 31 Mar 2021, at 15:35, Jason Gunthorpe <jgg@nvidia.com> wrote:
> > >
> > > On Wed, Mar 31, 2021 at 01:34:06PM +0000, Haakon Bugge wrote:
> > >
> > >>> Actually I bet you could do this same thing entirely in userspace by
> > >>> adjusting rdma_init_qp_attr() to copy the data that would be stored
> > >>> in the cm_id.. ??
> > >>
> > >> This will definitely not solve the issue for kernel ULP, e.g., RDS.
> > >
> > > Sure, that makes sense to have some rdmacm api in-kernel only
> > 
> > Let me send a v2 doing only that.
> > 
> > >> Further, why do we have rdma_set_option() with option
> > RDMA_OPTION_ID_ACK_TIMEOUT ?
> > >
> > > It may have been a mistake to do it like that
> > 
> Timeout value goes in the CM request message so setting it through
> the cm_id object was likely correct.  This reflects into cm msg as
> well as in the QP of the cm_id.

Ah, yes if it goes in the wire in a CM message it has to go to the
kernel.

Jason

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH for-next] IB/cma: Introduce rdma_set_min_rnr_timer()
  2021-03-31 17:35                   ` Jason Gunthorpe
@ 2021-03-31 17:38                     ` Haakon Bugge
  2021-03-31 17:39                       ` Jason Gunthorpe
  0 siblings, 1 reply; 17+ messages in thread
From: Haakon Bugge @ 2021-03-31 17:38 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Parav Pandit, Doug Ledford, OFED mailing list



> On 31 Mar 2021, at 19:35, Jason Gunthorpe <jgg@nvidia.com> wrote:
> 
> On Wed, Mar 31, 2021 at 05:09:27PM +0000, Parav Pandit wrote:
>> 
>> 
>>> From: Haakon Bugge <haakon.bugge@oracle.com>
>>> Sent: Wednesday, March 31, 2021 8:20 PM
>>> 
>>>> On 31 Mar 2021, at 15:35, Jason Gunthorpe <jgg@nvidia.com> wrote:
>>>> 
>>>> On Wed, Mar 31, 2021 at 01:34:06PM +0000, Haakon Bugge wrote:
>>>> 
>>>>>> Actually I bet you could do this same thing entirely in userspace by
>>>>>> adjusting rdma_init_qp_attr() to copy the data that would be stored
>>>>>> in the cm_id.. ??
>>>>> 
>>>>> This will definitely not solve the issue for kernel ULP, e.g., RDS.
>>>> 
>>>> Sure, that makes sense to have some rdmacm api in-kernel only
>>> 
>>> Let me send a v2 doing only that.
>>> 
>>>>> Further, why do we have rdma_set_option() with option
>>> RDMA_OPTION_ID_ACK_TIMEOUT ?
>>>> 
>>>> It may have been a mistake to do it like that
>>> 
>> Timeout value goes in the CM request message so setting it through
>> the cm_id object was likely correct.  This reflects into cm msg as
>> well as in the QP of the cm_id.
> 
> Ah, yes if it goes in the wire in a CM message it has to go to the
> kernel.

But does it go on the wire? No. The RNR Retry timer is not part of the negotiation with the peer.

Håkon

> 
> Jason


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH for-next] IB/cma: Introduce rdma_set_min_rnr_timer()
  2021-03-31 17:38                     ` Haakon Bugge
@ 2021-03-31 17:39                       ` Jason Gunthorpe
  2021-03-31 17:45                         ` Haakon Bugge
  0 siblings, 1 reply; 17+ messages in thread
From: Jason Gunthorpe @ 2021-03-31 17:39 UTC (permalink / raw)
  To: Haakon Bugge; +Cc: Parav Pandit, Doug Ledford, OFED mailing list

On Wed, Mar 31, 2021 at 05:38:26PM +0000, Haakon Bugge wrote:
> 
> 
> > On 31 Mar 2021, at 19:35, Jason Gunthorpe <jgg@nvidia.com> wrote:
> > 
> > On Wed, Mar 31, 2021 at 05:09:27PM +0000, Parav Pandit wrote:
> >> 
> >> 
> >>> From: Haakon Bugge <haakon.bugge@oracle.com>
> >>> Sent: Wednesday, March 31, 2021 8:20 PM
> >>> 
> >>>> On 31 Mar 2021, at 15:35, Jason Gunthorpe <jgg@nvidia.com> wrote:
> >>>> 
> >>>> On Wed, Mar 31, 2021 at 01:34:06PM +0000, Haakon Bugge wrote:
> >>>> 
> >>>>>> Actually I bet you could do this same thing entirely in userspace by
> >>>>>> adjusting rdma_init_qp_attr() to copy the data that would be stored
> >>>>>> in the cm_id.. ??
> >>>>> 
> >>>>> This will definitely not solve the issue for kernel ULP, e.g., RDS.
> >>>> 
> >>>> Sure, that makes sense to have some rdmacm api in-kernel only
> >>> 
> >>> Let me send a v2 doing only that.
> >>> 
> >>>>> Further, why do we have rdma_set_option() with option
> >>> RDMA_OPTION_ID_ACK_TIMEOUT ?
> >>>> 
> >>>> It may have been a mistake to do it like that
> >>> 
> >> Timeout value goes in the CM request message so setting it through
> >> the cm_id object was likely correct.  This reflects into cm msg as
> >> well as in the QP of the cm_id.
> > 
> > Ah, yes if it goes in the wire in a CM message it has to go to the
> > kernel.
> 
> But does it go on the wire? No. The RNR Retry timer is not part of
> the negotiation with the peer.

I think Parav was talking about the ID_ACK_TIMEOUT

Jason

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH for-next] IB/cma: Introduce rdma_set_min_rnr_timer()
  2021-03-31 17:39                       ` Jason Gunthorpe
@ 2021-03-31 17:45                         ` Haakon Bugge
  2021-03-31 17:50                           ` Jason Gunthorpe
  0 siblings, 1 reply; 17+ messages in thread
From: Haakon Bugge @ 2021-03-31 17:45 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Parav Pandit, Doug Ledford, OFED mailing list



> On 31 Mar 2021, at 19:39, Jason Gunthorpe <jgg@nvidia.com> wrote:
> 
> On Wed, Mar 31, 2021 at 05:38:26PM +0000, Haakon Bugge wrote:
>> 
>> 
>>> On 31 Mar 2021, at 19:35, Jason Gunthorpe <jgg@nvidia.com> wrote:
>>> 
>>> On Wed, Mar 31, 2021 at 05:09:27PM +0000, Parav Pandit wrote:
>>>> 
>>>> 
>>>>> From: Haakon Bugge <haakon.bugge@oracle.com>
>>>>> Sent: Wednesday, March 31, 2021 8:20 PM
>>>>> 
>>>>>> On 31 Mar 2021, at 15:35, Jason Gunthorpe <jgg@nvidia.com> wrote:
>>>>>> 
>>>>>> On Wed, Mar 31, 2021 at 01:34:06PM +0000, Haakon Bugge wrote:
>>>>>> 
>>>>>>>> Actually I bet you could do this same thing entirely in userspace by
>>>>>>>> adjusting rdma_init_qp_attr() to copy the data that would be stored
>>>>>>>> in the cm_id.. ??
>>>>>>> 
>>>>>>> This will definitely not solve the issue for kernel ULP, e.g., RDS.
>>>>>> 
>>>>>> Sure, that makes sense to have some rdmacm api in-kernel only
>>>>> 
>>>>> Let me send a v2 doing only that.
>>>>> 
>>>>>>> Further, why do we have rdma_set_option() with option
>>>>> RDMA_OPTION_ID_ACK_TIMEOUT ?
>>>>>> 
>>>>>> It may have been a mistake to do it like that
>>>>> 
>>>> Timeout value goes in the CM request message so setting it through
>>>> the cm_id object was likely correct.  This reflects into cm msg as
>>>> well as in the QP of the cm_id.
>>> 
>>> Ah, yes if it goes in the wire in a CM message it has to go to the
>>> kernel.
>> 
>> But does it go on the wire? No. The RNR Retry timer is not part of
>> the negotiation with the peer.
> 
> I think Parav was talking about the ID_ACK_TIMEOUT

True. Local ACK Timeout is part of CM REQ. But that makes 2c1619edef61 ("IB/cma: Define option to set ack timeout and pack tos_set") fuzzy, as it claims in the commentary: "The timeout will affect the local side of the QP, it is not negotiated with remote side ..."


Håkon

> 
> Jason


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH for-next] IB/cma: Introduce rdma_set_min_rnr_timer()
  2021-03-31 17:45                         ` Haakon Bugge
@ 2021-03-31 17:50                           ` Jason Gunthorpe
  2021-04-01  4:08                             ` Parav Pandit
  0 siblings, 1 reply; 17+ messages in thread
From: Jason Gunthorpe @ 2021-03-31 17:50 UTC (permalink / raw)
  To: Haakon Bugge; +Cc: Parav Pandit, Doug Ledford, OFED mailing list

On Wed, Mar 31, 2021 at 05:45:30PM +0000, Haakon Bugge wrote:

> True. Local ACK Timeout is part of CM REQ. But that makes
> 2c1619edef61 ("IB/cma: Define option to set ack timeout and pack
> tos_set") fuzzy, as it claims in the commentary: "The timeout will
> affect the local side of the QP, it is not negotiated with remote
> side ..."

Hmm, is it intended to replace the CM provided local ack timeout on
the responder side?

It doesn't look like it copied into the MAD though, and the
qp_attr->timeout only looks like it is et for the IB_QPT_XRC_TGT case??

Very confusing indeed. Parav?

Jason

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [PATCH for-next] IB/cma: Introduce rdma_set_min_rnr_timer()
  2021-03-31 17:50                           ` Jason Gunthorpe
@ 2021-04-01  4:08                             ` Parav Pandit
  0 siblings, 0 replies; 17+ messages in thread
From: Parav Pandit @ 2021-04-01  4:08 UTC (permalink / raw)
  To: Jason Gunthorpe, Haakon Bugge; +Cc: Doug Ledford, OFED mailing list



> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Wednesday, March 31, 2021 11:20 PM
> 
> On Wed, Mar 31, 2021 at 05:45:30PM +0000, Haakon Bugge wrote:
> 
> > True. Local ACK Timeout is part of CM REQ. But that makes
> > 2c1619edef61 ("IB/cma: Define option to set ack timeout and pack
> > tos_set") fuzzy, as it claims in the commentary: "The timeout will
> > affect the local side of the QP, it is not negotiated with remote side
> > ..."
> 
> Hmm, is it intended to replace the CM provided local ack timeout on the
> responder side?
> 
> It doesn't look like it copied into the MAD though, and the qp_attr->timeout
> only looks like it is et for the IB_QPT_XRC_TGT case??
>
> Very confusing indeed. Parav?
It is copied in core/cm.c at CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT in cm_format_req() with consideration of primary path packet_life_time.
Primary path packet lifetime is filled from cm_id.timeout -> path_rec.packet_life_time.

Comment says its not negotiated, because spec doesn't mandate to honor this value on passive side.
And connection doesn't fail if passive side doesn't like. So it's not negotiated.
But it indicates what ack timeout (and retries) to expect for a connection when one looks at the connection trace in wireshark.
 

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2021-04-01  4:09 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-25 13:05 [PATCH for-next] IB/cma: Introduce rdma_set_min_rnr_timer() Håkon Bugge
2021-03-30 23:12 ` Jason Gunthorpe
2021-03-31 10:38   ` Haakon Bugge
2021-03-31 12:00     ` Jason Gunthorpe
2021-03-31 12:58       ` Haakon Bugge
2021-03-31 13:15         ` Jason Gunthorpe
2021-03-31 13:34           ` Haakon Bugge
2021-03-31 13:35             ` Jason Gunthorpe
2021-03-31 14:49               ` Haakon Bugge
2021-03-31 17:09                 ` Parav Pandit
2021-03-31 17:35                   ` Jason Gunthorpe
2021-03-31 17:38                     ` Haakon Bugge
2021-03-31 17:39                       ` Jason Gunthorpe
2021-03-31 17:45                         ` Haakon Bugge
2021-03-31 17:50                           ` Jason Gunthorpe
2021-04-01  4:08                             ` Parav Pandit
2021-03-31 13:25         ` Haakon Bugge

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.