All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC rdma-core] verbs: relaxed ordering memory regions
@ 2019-11-17  7:41 Haggai Eran
  2019-11-19 19:29 ` Jason Gunthorpe
  0 siblings, 1 reply; 3+ messages in thread
From: Haggai Eran @ 2019-11-17  7:41 UTC (permalink / raw)
  To: linux-rdma, Jason Gunthorpe; +Cc: Yishai Hadas, michaelgur, Haggai Eran

Add flag to allow the creation of relaxed ordering memory regions.
Access through such MRs can improve performance by allowing the system
to reorder certain memory accesses.

As relaxed ordering is an optimization, drivers that do not support it
can simply ignore it, and it is recommended that users enable it
whenever completions are used to synchronize with the HCA.

We replace the symbols ibv_reg_mr/ibv_reg_mr_iova with a new symbol
(ibv_reg_mr_iova2). The new function would check against the kernel
whether relaxed ordering is supported or not, and disable it if
necessary. Running against an older library will fail with a linker
error instead of an obscure EINVAL from the kernel.

Signed-off-by: Haggai Eran <haggaie@mellanox.com>
---
 libibverbs/libibverbs.map.in |  5 +++++
 libibverbs/man/ibv_reg_mr.3  | 15 +++++++++++++++
 libibverbs/verbs.c           | 15 +++++++++++++++
 libibverbs/verbs.h           | 34 +++++++++++++++++++++++++++++++++-
 providers/mlx5/verbs.c       |  3 ++-
 5 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/libibverbs/libibverbs.map.in b/libibverbs/libibverbs.map.in
index c1b4537a5d1f..5280cfe6cc44 100644
--- a/libibverbs/libibverbs.map.in
+++ b/libibverbs/libibverbs.map.in
@@ -121,6 +121,11 @@ IBVERBS_1.7 {
 		ibv_reg_mr_iova;
 } IBVERBS_1.6;
 
+IBVERBS_1.8 {
+	global:
+		ibv_reg_mr_iova2;
+} IBVERBS_1.7;
+
 /* If any symbols in this stanza change ABI then the entire staza gets a new symbol
    version. See the top level CMakeLists.txt for this setting. */
 
diff --git a/libibverbs/man/ibv_reg_mr.3 b/libibverbs/man/ibv_reg_mr.3
index be90a57bb989..163fca20d85f 100644
--- a/libibverbs/man/ibv_reg_mr.3
+++ b/libibverbs/man/ibv_reg_mr.3
@@ -43,6 +43,21 @@ describes the desired memory protection attributes; it is either 0 or the bitwis
 .B IBV_ACCESS_ZERO_BASED\fR    Use byte offset from beginning of MR to access this MR, instead of a pointer address
 .TP
 .B IBV_ACCESS_ON_DEMAND\fR    Create an on-demand paging MR
+.TP
+.B IBV_ACCESS_RELAXED_ORDERING\fR    Create an MR that uses relaxed ordering.
+
+Such MRs may reorder the scattering of Send/Write/Atomic operations to memory,
+so the target CPU and other RDMA operations may observe them in a
+different order than requested at the initiator. In addition, memory
+reads from such MR do not flush PCI-Express writes from peer devices.
+
+It is only safe to read a data buffer that was written to a relaxed MR after
+observing the approperiate completion, or after observing a subsequent atomic
+operation on a strongly ordered MR.
+
+Relaxed MRs can improve performance on some systems. On systems/drivers without
+adequate support this flag is ignored.
+
 .PP
 If
 .B IBV_ACCESS_REMOTE_WRITE
diff --git a/libibverbs/verbs.c b/libibverbs/verbs.c
index e5063af26dd4..e0f44129376e 100644
--- a/libibverbs/verbs.c
+++ b/libibverbs/verbs.c
@@ -296,6 +296,7 @@ LATEST_SYMVER_FUNC(ibv_dealloc_pd, 1_1, "IBVERBS_1.1",
 	return get_ops(pd->context)->dealloc_pd(pd);
 }
 
+#undef ibv_reg_mr
 LATEST_SYMVER_FUNC(ibv_reg_mr, 1_1, "IBVERBS_1.1",
 		   struct ibv_mr *,
 		   struct ibv_pd *pd, void *addr,
@@ -319,6 +320,8 @@ LATEST_SYMVER_FUNC(ibv_reg_mr, 1_1, "IBVERBS_1.1",
 	return mr;
 }
 
+
+#undef ibv_reg_mr_iova
 struct ibv_mr *ibv_reg_mr_iova(struct ibv_pd *pd, void *addr, size_t length,
 			       uint64_t iova, int access)
 {
@@ -339,6 +342,18 @@ struct ibv_mr *ibv_reg_mr_iova(struct ibv_pd *pd, void *addr, size_t length,
 	return mr;
 }
 
+LATEST_SYMVER_FUNC(ibv_reg_mr_iova2, 1_8, "IBVERBS_1.8",
+		   struct ibv_mr *,
+		   struct ibv_pd *pd, void *addr, size_t length,
+		   uint64_t iova, int access)
+{
+	/* TODO check whether kernel supports optional
+	 * IBV_ACCESS_RELAXED_ORDERING and clear it if it is not supported
+	 */
+
+	return ibv_reg_mr_iova(pd, addr, length, iova, access);
+}
+
 LATEST_SYMVER_FUNC(ibv_rereg_mr, 1_1, "IBVERBS_1.1",
 		   int,
 		   struct ibv_mr *mr, int flags,
diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h
index 7b8d431024b9..597167c4c963 100644
--- a/libibverbs/verbs.h
+++ b/libibverbs/verbs.h
@@ -580,6 +580,7 @@ enum ibv_access_flags {
 	IBV_ACCESS_MW_BIND		= (1<<4),
 	IBV_ACCESS_ZERO_BASED		= (1<<5),
 	IBV_ACCESS_ON_DEMAND		= (1<<6),
+	IBV_ACCESS_RELAXED_ORDERING	= (1<<8),
 };
 
 struct ibv_mw_bind_info {
@@ -2387,6 +2388,18 @@ static inline int ibv_close_xrcd(struct ibv_xrcd *xrcd)
  */
 struct ibv_mr *ibv_reg_mr(struct ibv_pd *pd, void *addr,
 			  size_t length, int access);
+/* Use the new version of ibv_reg_mr only if flags that require it are used. */
+#define ibv_reg_mr(pd, addr, length, access) ({\
+	const int optional_access_flags = IBV_ACCESS_RELAXED_ORDERING; \
+	struct ibv_mr *__mr; \
+	\
+	if (__builtin_constant_p(access) && \
+	    !((access) & optional_access_flags)) \
+		__mr = ibv_reg_mr(pd, addr, length, access); \
+	else \
+		__mr = ibv_reg_mr_iova2(pd, addr, length, (uintptr_t)addr, \
+					access); \
+	__mr; })
 
 /**
  * ibv_reg_mr_iova - Register a memory region with a virtual offset
@@ -2394,7 +2407,26 @@ struct ibv_mr *ibv_reg_mr(struct ibv_pd *pd, void *addr,
  */
 struct ibv_mr *ibv_reg_mr_iova(struct ibv_pd *pd, void *addr, size_t length,
 			       uint64_t iova, int access);
-
+/* Use the new version of ibv_reg_mr only if flags that require it are used. */
+#define ibv_reg_mr_iova(pd, addr, length, iova, access) ({\
+	const int optional_access_flags = IBV_ACCESS_RELAXED_ORDERING; \
+	struct ibv_mr *__mr; \
+	\
+	if (__builtin_constant_p(access) && \
+	    !((access) & optional_access_flags)) \
+		__mr = ibv_reg_mr_iova(pd, addr, length, iova, access); \
+	else \
+		__mr = ibv_reg_mr_iova2(pd, addr, length, iova, access); \
+	__mr; })
+
+/**
+ * ibv_reg_mr_iova2 - Register a memory region with a virtual offset
+ *
+ * This version of ibv_reg_mr_iova can ignore optional access flags such as
+ * IBV_ACCESS_RELAXED_ORDERING if they are not supported.
+ */
+struct ibv_mr *ibv_reg_mr_iova2(struct ibv_pd *pd, void *addr, size_t length,
+			   uint64_t iova, int access);
 
 enum ibv_rereg_mr_err_code {
 	/* Old MR is valid, invalid input */
diff --git a/providers/mlx5/verbs.c b/providers/mlx5/verbs.c
index 48bfd682eb5f..cabb2eca815c 100644
--- a/providers/mlx5/verbs.c
+++ b/providers/mlx5/verbs.c
@@ -454,7 +454,8 @@ enum {
 				 IBV_ACCESS_REMOTE_WRITE	|
 				 IBV_ACCESS_REMOTE_READ		|
 				 IBV_ACCESS_REMOTE_ATOMIC	|
-				 IBV_ACCESS_ZERO_BASED
+				 IBV_ACCESS_ZERO_BASED		|
+				 IBV_ACCESS_RELAXED_ORDERING
 };
 
 struct ibv_mr *mlx5_reg_dm_mr(struct ibv_pd *pd, struct ibv_dm *ibdm,
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH RFC rdma-core] verbs: relaxed ordering memory regions
  2019-11-17  7:41 [PATCH RFC rdma-core] verbs: relaxed ordering memory regions Haggai Eran
@ 2019-11-19 19:29 ` Jason Gunthorpe
  2019-11-21 13:38   ` Haggai Eran
  0 siblings, 1 reply; 3+ messages in thread
From: Jason Gunthorpe @ 2019-11-19 19:29 UTC (permalink / raw)
  To: Haggai Eran; +Cc: linux-rdma, Yishai Hadas, michaelgur

On Sun, Nov 17, 2019 at 09:41:28AM +0200, Haggai Eran wrote:
> +/* Use the new version of ibv_reg_mr only if flags that require it are used. */
> +#define ibv_reg_mr(pd, addr, length, access) ({\
> +	const int optional_access_flags = IBV_ACCESS_RELAXED_ORDERING;
> \

We need to set aside more bits for optional as we can't keep revising
this

> +	struct ibv_mr *__mr; \
> +	\
> +	if (__builtin_constant_p(access) && \
> +	    !((access) & optional_access_flags)) \
> +		__mr = ibv_reg_mr(pd, addr, length, access); \
> +	else \
> +		__mr = ibv_reg_mr_iova2(pd, addr, length, (uintptr_t)addr, \
> +					access); \

Missing brackets around addr

This also wants to be a ?: expression to avoid the ({}) extension

Jason

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH RFC rdma-core] verbs: relaxed ordering memory regions
  2019-11-19 19:29 ` Jason Gunthorpe
@ 2019-11-21 13:38   ` Haggai Eran
  0 siblings, 0 replies; 3+ messages in thread
From: Haggai Eran @ 2019-11-21 13:38 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: linux-rdma, Yishai Hadas, Michael Guralnik

On 11/19/2019 9:29 PM, Jason Gunthorpe wrote:
> On Sun, Nov 17, 2019 at 09:41:28AM +0200, Haggai Eran wrote:
>> +/* Use the new version of ibv_reg_mr only if flags that require it are used. */
>> +#define ibv_reg_mr(pd, addr, length, access) ({\
>> +	const int optional_access_flags = IBV_ACCESS_RELAXED_ORDERING;
>> \
> 
> We need to set aside more bits for optional as we can't keep revising
> this

Sure. Is 10 bits okay?

#define IBV_ACCESS_OPTIONAL ((1 << 10) - 1) << 8

> 
>> +	struct ibv_mr *__mr; \
>> +	\
>> +	if (__builtin_constant_p(access) && \
>> +	    !((access) & optional_access_flags)) \
>> +		__mr = ibv_reg_mr(pd, addr, length, access); \
>> +	else \
>> +		__mr = ibv_reg_mr_iova2(pd, addr, length, (uintptr_t)addr, \
>> +					access); \
> 
> Missing brackets around addr
> 
> This also wants to be a ?: expression to avoid the ({}) extension
Sure, I'll do that.

Thanks,
Haggai

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-11-21 13:38 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-17  7:41 [PATCH RFC rdma-core] verbs: relaxed ordering memory regions Haggai Eran
2019-11-19 19:29 ` Jason Gunthorpe
2019-11-21 13:38   ` Haggai Eran

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.