* [PATCH v2] RDMA/rxe: Use acquire/release for memory ordering
@ 2020-12-10 17:42 Bob Pearson
2020-12-10 23:14 ` Jason Gunthorpe
2020-12-11 23:59 ` Jason Gunthorpe
0 siblings, 2 replies; 3+ messages in thread
From: Bob Pearson @ 2020-12-10 17:42 UTC (permalink / raw)
To: jgg, zyjzyj2000, linux-rdma; +Cc: Bob Pearson
Changed work and completion queues to use smp_load_acquire() and
smp_store_release() to synchronize between driver and users.
This commit goes with a matching series of commits in the rxe user
space provider.
v2: Addressed same issue for kernel ULPs which use rxe_post_send/recv().
Signed-off-by: Bob Pearson <rpearson@hpe.com>
---
drivers/infiniband/sw/rxe/rxe_cq.c | 5 --
drivers/infiniband/sw/rxe/rxe_queue.h | 94 +++++++++++++++++----------
drivers/infiniband/sw/rxe/rxe_verbs.c | 11 ----
include/uapi/rdma/rdma_user_rxe.h | 21 ++++++
4 files changed, 81 insertions(+), 50 deletions(-)
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index 43394c3f29d4..b315ebf041ac 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -123,11 +123,6 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
memcpy(producer_addr(cq->queue), cqe, sizeof(*cqe));
- /* make sure all changes to the CQ are written before we update the
- * producer pointer
- */
- smp_wmb();
-
advance_producer(cq->queue);
spin_unlock_irqrestore(&cq->cq_lock, flags);
diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h
index 7d434a6837a7..2902ca7b288c 100644
--- a/drivers/infiniband/sw/rxe/rxe_queue.h
+++ b/drivers/infiniband/sw/rxe/rxe_queue.h
@@ -7,9 +7,11 @@
#ifndef RXE_QUEUE_H
#define RXE_QUEUE_H
+/* for definition of shared struct rxe_queue_buf */
+#include <uapi/rdma/rdma_user_rxe.h>
+
/* implements a simple circular buffer that can optionally be
* shared between user space and the kernel and can be resized
-
* the requested element size is rounded up to a power of 2
* and the number of elements in the buffer is also rounded
* up to a power of 2. Since the queue is empty when the
@@ -17,28 +19,6 @@
* of the queue is one less than the number of element slots
*/
-/* this data structure is shared between user space and kernel
- * space for those cases where the queue is shared. It contains
- * the producer and consumer indices. Is also contains a copy
- * of the queue size parameters for user space to use but the
- * kernel must use the parameters in the rxe_queue struct
- * this MUST MATCH the corresponding librxe struct
- * for performance reasons arrange to have producer and consumer
- * pointers in separate cache lines
- * the kernel should always mask the indices to avoid accessing
- * memory outside of the data area
- */
-struct rxe_queue_buf {
- __u32 log2_elem_size;
- __u32 index_mask;
- __u32 pad_1[30];
- __u32 producer_index;
- __u32 pad_2[31];
- __u32 consumer_index;
- __u32 pad_3[31];
- __u8 data[];
-};
-
struct rxe_queue {
struct rxe_dev *rxe;
struct rxe_queue_buf *buf;
@@ -46,7 +26,7 @@ struct rxe_queue {
size_t buf_size;
size_t elem_size;
unsigned int log2_elem_size;
- unsigned int index_mask;
+ u32 index_mask;
};
int do_mmap_info(struct rxe_dev *rxe, struct mminfo __user *outbuf,
@@ -76,26 +56,56 @@ static inline int next_index(struct rxe_queue *q, int index)
static inline int queue_empty(struct rxe_queue *q)
{
- return ((q->buf->producer_index - q->buf->consumer_index)
- & q->index_mask) == 0;
+ u32 prod;
+ u32 cons;
+
+ /* make sure all changes to queue complete before
+ * testing queue empty
+ */
+ prod = smp_load_acquire(&q->buf->producer_index);
+ /* same */
+ cons = smp_load_acquire(&q->buf->consumer_index);
+
+ return ((prod - cons) & q->index_mask) == 0;
}
static inline int queue_full(struct rxe_queue *q)
{
- return ((q->buf->producer_index + 1 - q->buf->consumer_index)
- & q->index_mask) == 0;
+ u32 prod;
+ u32 cons;
+
+ /* make sure all changes to queue complete before
+ * testing queue full
+ */
+ prod = smp_load_acquire(&q->buf->producer_index);
+ /* same */
+ cons = smp_load_acquire(&q->buf->consumer_index);
+
+ return ((prod + 1 - cons) & q->index_mask) == 0;
}
static inline void advance_producer(struct rxe_queue *q)
{
- q->buf->producer_index = (q->buf->producer_index + 1)
- & q->index_mask;
+ u32 prod;
+
+ prod = (q->buf->producer_index + 1) & q->index_mask;
+
+ /* make sure all changes to queue complete before
+ * changing producer index
+ */
+ smp_store_release(&q->buf->producer_index, prod);
}
static inline void advance_consumer(struct rxe_queue *q)
{
- q->buf->consumer_index = (q->buf->consumer_index + 1)
- & q->index_mask;
+ u32 cons;
+
+ cons = (q->buf->consumer_index + 1) & q->index_mask;
+
+ /* make sure all changes to queue complete before
+ * changing consumer index
+ */
+ smp_store_release(&q->buf->consumer_index, cons);
}
static inline void *producer_addr(struct rxe_queue *q)
@@ -112,12 +122,28 @@ static inline void *consumer_addr(struct rxe_queue *q)
static inline unsigned int producer_index(struct rxe_queue *q)
{
- return q->buf->producer_index;
+ u32 index;
+
+ /* make sure all changes to queue
+ * complete before getting producer index
+ */
+ index = smp_load_acquire(&q->buf->producer_index);
+ index &= q->index_mask;
+
+ return index;
}
static inline unsigned int consumer_index(struct rxe_queue *q)
{
- return q->buf->consumer_index;
+ u32 index;
+
+ /* make sure all changes to queue
+ * complete before getting consumer index
+ */
+ index = smp_load_acquire(&q->buf->consumer_index);
+ index &= q->index_mask;
+
+ return index;
}
static inline void *addr_from_index(struct rxe_queue *q, unsigned int index)
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 2fbea2b2d72a..a031514e2f41 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -244,11 +244,6 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
recv_wqe->dma.cur_sge = 0;
recv_wqe->dma.sge_offset = 0;
- /* make sure all changes to the work queue are written before we
- * update the producer pointer
- */
- smp_wmb();
-
advance_producer(rq->queue);
return 0;
@@ -633,12 +628,6 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
if (unlikely(err))
goto err1;
- /*
- * make sure all changes to the work queue are
- * written before we update the producer pointer
- */
- smp_wmb();
-
advance_producer(sq->queue);
spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h
index e591d8c1f3cf..068433e2229d 100644
--- a/include/uapi/rdma/rdma_user_rxe.h
+++ b/include/uapi/rdma/rdma_user_rxe.h
@@ -181,4 +181,25 @@ struct rxe_modify_srq_cmd {
__aligned_u64 mmap_info_addr;
};
+/* This data structure is stored at the base of work and
+ * completion queues shared between user space and kernel space.
+ * It contains the producer and consumer indices. Is also
+ * contains a copy of the queue size parameters for user space
+ * to use but the kernel must use the parameters in the
+ * rxe_queue struct. For performance reasons arrange to have
+ * producer and consumer indices in separate cache lines
+ * the kernel should always mask the indices to avoid accessing
+ * memory outside of the data area
+ */
+struct rxe_queue_buf {
+ __u32 log2_elem_size;
+ __u32 index_mask;
+ __u32 pad_1[30];
+ __u32 producer_index;
+ __u32 pad_2[31];
+ __u32 consumer_index;
+ __u32 pad_3[31];
+ __u8 data[];
+};
+
#endif /* RDMA_USER_RXE_H */
--
2.27.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH v2] RDMA/rxe: Use acquire/release for memory ordering
2020-12-10 17:42 [PATCH v2] RDMA/rxe: Use acquire/release for memory ordering Bob Pearson
@ 2020-12-10 23:14 ` Jason Gunthorpe
2020-12-11 23:59 ` Jason Gunthorpe
1 sibling, 0 replies; 3+ messages in thread
From: Jason Gunthorpe @ 2020-12-10 23:14 UTC (permalink / raw)
To: Bob Pearson; +Cc: zyjzyj2000, linux-rdma, Bob Pearson
On Thu, Dec 10, 2020 at 11:42:59AM -0600, Bob Pearson wrote:
> @@ -76,26 +56,56 @@ static inline int next_index(struct rxe_queue *q, int index)
>
> static inline int queue_empty(struct rxe_queue *q)
> {
> - return ((q->buf->producer_index - q->buf->consumer_index)
> - & q->index_mask) == 0;
> + u32 prod;
> + u32 cons;
> +
> + /* make sure all changes to queue complete before
> + * testing queue empty
> + */
> + prod = smp_load_acquire(&q->buf->producer_index);
> + /* same */
> + cons = smp_load_acquire(&q->buf->consumer_index);
This is not so sensible. The one written by the kernel should be just
a normal load and there must be some lock held here to protect that
The one written by user space should be just READ_ONCE
acquire only has meaning if you go on to read additional data based on
the result of the acquire - then acquire ensures the additional reads
can't cross writes that occured before the matching release.
Jason
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH v2] RDMA/rxe: Use acquire/release for memory ordering
2020-12-10 17:42 [PATCH v2] RDMA/rxe: Use acquire/release for memory ordering Bob Pearson
2020-12-10 23:14 ` Jason Gunthorpe
@ 2020-12-11 23:59 ` Jason Gunthorpe
1 sibling, 0 replies; 3+ messages in thread
From: Jason Gunthorpe @ 2020-12-11 23:59 UTC (permalink / raw)
To: Bob Pearson; +Cc: zyjzyj2000, linux-rdma, Bob Pearson
On Thu, Dec 10, 2020 at 11:42:59AM -0600, Bob Pearson wrote:
> Changed work and completion queues to use smp_load_acquire() and
> smp_store_release() to synchronize between driver and users.
> This commit goes with a matching series of commits in the rxe user
> space provider.
>
> v2: Addressed same issue for kernel ULPs which use rxe_post_send/recv().
>
> Signed-off-by: Bob Pearson <rpearson@hpe.com>
> ---
> drivers/infiniband/sw/rxe/rxe_cq.c | 5 --
> drivers/infiniband/sw/rxe/rxe_queue.h | 94 +++++++++++++++++----------
> drivers/infiniband/sw/rxe/rxe_verbs.c | 11 ----
> include/uapi/rdma/rdma_user_rxe.h | 21 ++++++
> 4 files changed, 81 insertions(+), 50 deletions(-)
This really is a lot better than what was here, the extra barriers on
empty/full can be fine tuned later
So applied to for-next
Thanks,
Jason
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2020-12-12 1:01 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-10 17:42 [PATCH v2] RDMA/rxe: Use acquire/release for memory ordering Bob Pearson
2020-12-10 23:14 ` Jason Gunthorpe
2020-12-11 23:59 ` Jason Gunthorpe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).