All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH rdma-core 0/4] Support compiling on ARM32 and others
@ 2017-01-10 19:02 Jason Gunthorpe
       [not found] ` <1484074931-3847-1-git-send-email-jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 6+ messages in thread
From: Jason Gunthorpe @ 2017-01-10 19:02 UTC (permalink / raw)
  To: Jarod Wilson, linux-rdma-u79uwXL29TY76Z2rM5mHXA

This is PR

https://github.com/linux-rdma/rdma-core/

The basic idea is to limit the use of arch.h to only places working with DMA,
detect when the arch does not support coherent DMA, and then disable compiling
those providers. non-DMA providers are ported to use C11 atomics and
can compile on all arches.

- Only use inifiniband/arch.h in provides that require coherent DMA
- Convert rxe/hfi1/ipath to use the C11 memory model to create portable barriers for this non-DMA use
- Update cmake to enable C11
- Update cmake to support stdatomic.h

Jason Gunthorpe (4):
  Minimize the places where infiniband/arch.h is included
  Detect if infiniband/arch.h is supported by the compiler
  Provide cmake support to enable C11 stdatomic.h
  Use C11 atomics instead of wmb/rmb macros for CPU-only atomics

 CMakeLists.txt                     |  23 ++-
 buildlib/RDMA_EnableCStd.cmake     |  11 +-
 buildlib/fixup-include/stdatomic.h | 369 +++++++++++++++++++++++++++++++++++++
 ibacm/linux/osd.h                  |   1 -
 ibacm/src/libacm.c                 |   6 +-
 libibumad/sysfs.c                  |   3 +-
 libibverbs/device.c                |   4 +-
 libibverbs/examples/device_list.c  |   3 +-
 libibverbs/examples/devinfo.c      |   3 +-
 librdmacm/addrinfo.c               |   6 +-
 librdmacm/cma.c                    |   6 +-
 librdmacm/cma.h                    |   1 -
 librdmacm/examples/rping.c         |   7 +-
 librdmacm/rsocket.c                |   8 +-
 providers/hfi1verbs/hfiverbs.h     |  10 +-
 providers/hfi1verbs/verbs.c        |  17 +-
 providers/ipathverbs/ipathverbs.h  |  10 +-
 providers/ipathverbs/verbs.c       |  18 +-
 providers/rxe/rxe.c                |   5 +-
 providers/rxe/rxe_queue.h          |  72 ++++----
 srp_daemon/srp_daemon.h            |   1 -
 21 files changed, 484 insertions(+), 100 deletions(-)
 create mode 100644 buildlib/fixup-include/stdatomic.h

-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH rdma-core 1/4] Minimize the places where infiniband/arch.h is included
       [not found] ` <1484074931-3847-1-git-send-email-jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2017-01-10 19:02   ` Jason Gunthorpe
       [not found]     ` <1484074931-3847-2-git-send-email-jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  2017-01-10 19:02   ` [PATCH rdma-core 2/4] Detect if infiniband/arch.h is supported by the compiler Jason Gunthorpe
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 6+ messages in thread
From: Jason Gunthorpe @ 2017-01-10 19:02 UTC (permalink / raw)
  To: Jarod Wilson, linux-rdma-u79uwXL29TY76Z2rM5mHXA
  Cc: Hal Rosenstock, Doug Ledford, Yishai Hadas, Sean Hefty, Bart Van Assche

We never want to use it for htonll as this header can only be
compiled on architectures that define the PCI memory barriers.
Instead use byteswap.h directly.

Signed-off-by: Jason Gunthorpe <jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
---
 ibacm/linux/osd.h                 | 1 -
 ibacm/src/libacm.c                | 6 +++---
 libibumad/sysfs.c                 | 3 +--
 libibverbs/device.c               | 4 +---
 libibverbs/examples/device_list.c | 3 +--
 libibverbs/examples/devinfo.c     | 3 +--
 librdmacm/addrinfo.c              | 6 +++---
 librdmacm/cma.c                   | 6 +++---
 librdmacm/cma.h                   | 1 -
 librdmacm/examples/rping.c        | 7 +++----
 librdmacm/rsocket.c               | 8 ++++----
 srp_daemon/srp_daemon.h           | 1 -
 12 files changed, 20 insertions(+), 29 deletions(-)

diff --git a/ibacm/linux/osd.h b/ibacm/linux/osd.h
index 6e408f75ac1937..4169ec54f0f38e 100644
--- a/ibacm/linux/osd.h
+++ b/ibacm/linux/osd.h
@@ -46,7 +46,6 @@
 #include <arpa/inet.h>
 #include <sys/time.h>
 #include <netinet/in.h>
-#include <infiniband/arch.h>
 
 #include <ccan/minmax.h>
 
diff --git a/ibacm/src/libacm.c b/ibacm/src/libacm.c
index c121ad7b114e4b..8288bd446fdcf9 100644
--- a/ibacm/src/libacm.c
+++ b/ibacm/src/libacm.c
@@ -338,7 +338,7 @@ int ib_acm_query_perf(int index, uint64_t **counters, int *count)
 
 	*count = msg.hdr.data[0];
 	for (i = 0; i < *count; i++)
-		(*counters)[i] = ntohll(msg.perf_data[i]);
+		(*counters)[i] = be64toh(msg.perf_data[i]);
 	ret = 0;
 out:
 	pthread_mutex_unlock(&acm_lock);
@@ -385,7 +385,7 @@ int ib_acm_enum_ep(int index, struct acm_ep_config_data **data)
 	}
 
 	memcpy(edata, &msg.ep_data[0], len);
-	edata->dev_guid = ntohll(msg.ep_data[0].dev_guid);
+	edata->dev_guid = be64toh(msg.ep_data[0].dev_guid);
 	edata->pkey = ntohs(msg.ep_data[0].pkey);
 	edata->addr_cnt = cnt;
 	*data = edata;
@@ -440,7 +440,7 @@ int ib_acm_query_perf_ep_addr(uint8_t *src, uint8_t type,
 
 	*count = msg.hdr.data[0];
 	for (i = 0; i < *count; i++)
-		(*counters)[i] = ntohll(msg.perf_data[i]);
+		(*counters)[i] = be64toh(msg.perf_data[i]);
 
 	ret = 0;
 out:
diff --git a/libibumad/sysfs.c b/libibumad/sysfs.c
index 730374d0dc4ca6..9ef16fd9fa09f7 100644
--- a/libibumad/sysfs.c
+++ b/libibumad/sysfs.c
@@ -43,7 +43,6 @@
 #include <fcntl.h>
 #include <byteswap.h>
 #include <netinet/in.h>
-#include <infiniband/arch.h>
 #include "sysfs.h"
 
 static int ret_code(void)
@@ -98,7 +97,7 @@ int sys_read_guid(const char *dir_name, const char *file_name, uint64_t * net_gu
 		guid = (guid << 16) | (strtoul(str, NULL, 16) & 0xffff);
 	}
 
-	*net_guid = htonll(guid);
+	*net_guid = htobe64(guid);
 
 	return 0;
 }
diff --git a/libibverbs/device.c b/libibverbs/device.c
index 8950f2a3d711cd..c7f39d57681170 100644
--- a/libibverbs/device.c
+++ b/libibverbs/device.c
@@ -43,8 +43,6 @@
 #include <alloca.h>
 #include <errno.h>
 
-#include <infiniband/arch.h>
-
 #include "ibverbs.h"
 
 #pragma GCC diagnostic ignored "-Wmissing-prototypes"
@@ -118,7 +116,7 @@ uint64_t __ibv_get_device_guid(struct ibv_device *device)
 	for (i = 0; i < 4; ++i)
 		guid = (guid << 16) | parts[i];
 
-	return htonll(guid);
+	return htobe64(guid);
 }
 default_symver(__ibv_get_device_guid, ibv_get_device_guid);
 
diff --git a/libibverbs/examples/device_list.c b/libibverbs/examples/device_list.c
index 13f40ad630070d..084ede597af026 100644
--- a/libibverbs/examples/device_list.c
+++ b/libibverbs/examples/device_list.c
@@ -38,7 +38,6 @@
 #include <byteswap.h>
 
 #include <infiniband/verbs.h>
-#include <infiniband/arch.h>
 
 int main(int argc, char *argv[])
 {
@@ -57,7 +56,7 @@ int main(int argc, char *argv[])
 	for (i = 0; i < num_devices; ++i) {
 		printf("    %-16s\t%016llx\n",
 		       ibv_get_device_name(dev_list[i]),
-		       (unsigned long long) ntohll(ibv_get_device_guid(dev_list[i])));
+		       (unsigned long long) be64toh(ibv_get_device_guid(dev_list[i])));
 	}
 
 	ibv_free_device_list(dev_list);
diff --git a/libibverbs/examples/devinfo.c b/libibverbs/examples/devinfo.c
index 78e92b95c3cae8..2303ce3936c8e1 100644
--- a/libibverbs/examples/devinfo.c
+++ b/libibverbs/examples/devinfo.c
@@ -45,7 +45,6 @@
 
 #include <infiniband/verbs.h>
 #include <infiniband/driver.h>
-#include <infiniband/arch.h>
 
 static int verbose;
 
@@ -57,7 +56,7 @@ static int null_gid(union ibv_gid *gid)
 
 static const char *guid_str(uint64_t node_guid, char *str)
 {
-	node_guid = ntohll(node_guid);
+	node_guid = be64toh(node_guid);
 	sprintf(str, "%04x:%04x:%04x:%04x",
 		(unsigned) (node_guid >> 48) & 0xffff,
 		(unsigned) (node_guid >> 32) & 0xffff,
diff --git a/librdmacm/addrinfo.c b/librdmacm/addrinfo.c
index ac346438a6d07c..c7c19b1d019f65 100644
--- a/librdmacm/addrinfo.c
+++ b/librdmacm/addrinfo.c
@@ -113,12 +113,12 @@ void ucma_set_sid(enum rdma_port_space ps, struct sockaddr *addr,
 	uint16_t port;
 
 	port = addr ? ucma_get_port(addr) : 0;
-	sib->sib_sid = htonll(((uint64_t) ps << 16) + ntohs(port));
+	sib->sib_sid = htobe64(((uint64_t) ps << 16) + ntohs(port));
 
 	if (ps)
-		sib->sib_sid_mask = htonll(RDMA_IB_IP_PS_MASK);
+		sib->sib_sid_mask = htobe64(RDMA_IB_IP_PS_MASK);
 	if (port)
-		sib->sib_sid_mask |= htonll(RDMA_IB_IP_PORT_MASK);
+		sib->sib_sid_mask |= htobe64(RDMA_IB_IP_PORT_MASK);
 }
 
 static int ucma_convert_in6(int ps, struct sockaddr_ib **dst, socklen_t *dst_len,
diff --git a/librdmacm/cma.c b/librdmacm/cma.c
index 7f79ee942186bb..0c742845dde8d6 100644
--- a/librdmacm/cma.c
+++ b/librdmacm/cma.c
@@ -177,8 +177,8 @@ static void ucma_set_af_ib_support(void)
 
 	memset(&sib, 0, sizeof sib);
 	sib.sib_family = AF_IB;
-	sib.sib_sid = htonll(RDMA_IB_IP_PS_TCP);
-	sib.sib_sid_mask = htonll(RDMA_IB_IP_PS_MASK);
+	sib.sib_sid = htobe64(RDMA_IB_IP_PS_TCP);
+	sib.sib_sid_mask = htobe64(RDMA_IB_IP_PS_MASK);
 	af_ib_support = 1;
 	ret = rdma_bind_addr(id, (struct sockaddr *) &sib);
 	af_ib_support = !ret;
@@ -2443,7 +2443,7 @@ uint16_t ucma_get_port(struct sockaddr *addr)
 	case AF_INET6:
 		return ((struct sockaddr_in6 *) addr)->sin6_port;
 	case AF_IB:
-		return htons((uint16_t) ntohll(((struct sockaddr_ib *) addr)->sib_sid));
+		return htons((uint16_t) be64toh(((struct sockaddr_ib *) addr)->sib_sid));
 	default:
 		return 0;
 	}
diff --git a/librdmacm/cma.h b/librdmacm/cma.h
index 6fe786052dfff5..2e7c4182e7d98e 100644
--- a/librdmacm/cma.h
+++ b/librdmacm/cma.h
@@ -44,7 +44,6 @@
 
 #include <rdma/rdma_cma.h>
 #include <infiniband/ib.h>
-#include <infiniband/arch.h>
 
 #include <ccan/minmax.h>
 
diff --git a/librdmacm/examples/rping.c b/librdmacm/examples/rping.c
index 53c152538c67ff..c56b6ef3010f53 100644
--- a/librdmacm/examples/rping.c
+++ b/librdmacm/examples/rping.c
@@ -47,7 +47,6 @@
 #include <inttypes.h>
 
 #include <rdma/rdma_cma.h>
-#include <infiniband/arch.h>
 
 static int debug = 0;
 #define DEBUG_LOG if (debug) printf
@@ -246,7 +245,7 @@ static int server_recv(struct rping_cb *cb, struct ibv_wc *wc)
 	}
 
 	cb->remote_rkey = ntohl(cb->recv_buf.rkey);
-	cb->remote_addr = ntohll(cb->recv_buf.buf);
+	cb->remote_addr = be64toh(cb->recv_buf.buf);
 	cb->remote_len  = ntohl(cb->recv_buf.size);
 	DEBUG_LOG("Received rkey %x addr %" PRIx64 " len %d from peer\n",
 		  cb->remote_rkey, cb->remote_addr, cb->remote_len);
@@ -622,12 +621,12 @@ static void rping_format_send(struct rping_cb *cb, char *buf, struct ibv_mr *mr)
 {
 	struct rping_rdma_info *info = &cb->send_buf;
 
-	info->buf = htonll((uint64_t) (unsigned long) buf);
+	info->buf = htobe64((uint64_t) (unsigned long) buf);
 	info->rkey = htonl(mr->rkey);
 	info->size = htonl(cb->size);
 
 	DEBUG_LOG("RDMA addr %" PRIx64" rkey %x len %d\n",
-		  ntohll(info->buf), ntohl(info->rkey), ntohl(info->size));
+		  be64toh(info->buf), ntohl(info->rkey), ntohl(info->size));
 }
 
 static int rping_test_server(struct rping_cb *cb)
diff --git a/librdmacm/rsocket.c b/librdmacm/rsocket.c
index e0755dd0ab056a..e2e29a30eb59ed 100644
--- a/librdmacm/rsocket.c
+++ b/librdmacm/rsocket.c
@@ -1045,18 +1045,18 @@ static void rs_format_conn_data(struct rsocket *rs, struct rs_conn_data *conn)
 	memset(conn->reserved, 0, sizeof conn->reserved);
 	conn->target_iomap_size = (uint8_t) rs_value_to_scale(rs->target_iomap_size, 8);
 
-	conn->target_sgl.addr = htonll((uintptr_t) rs->target_sgl);
+	conn->target_sgl.addr = htobe64((uintptr_t) rs->target_sgl);
 	conn->target_sgl.length = htonl(RS_SGL_SIZE);
 	conn->target_sgl.key = htonl(rs->target_mr->rkey);
 
-	conn->data_buf.addr = htonll((uintptr_t) rs->rbuf);
+	conn->data_buf.addr = htobe64((uintptr_t) rs->rbuf);
 	conn->data_buf.length = htonl(rs->rbuf_size >> 1);
 	conn->data_buf.key = htonl(rs->rmr->rkey);
 }
 
 static void rs_save_conn_data(struct rsocket *rs, struct rs_conn_data *conn)
 {
-	rs->remote_sgl.addr = ntohll(conn->target_sgl.addr);
+	rs->remote_sgl.addr = be64toh(conn->target_sgl.addr);
 	rs->remote_sgl.length = ntohl(conn->target_sgl.length);
 	rs->remote_sgl.key = ntohl(conn->target_sgl.key);
 	rs->remote_sge = 1;
@@ -1071,7 +1071,7 @@ static void rs_save_conn_data(struct rsocket *rs, struct rs_conn_data *conn)
 		rs->remote_iomap.key = rs->remote_sgl.key;
 	}
 
-	rs->target_sgl[0].addr = ntohll(conn->data_buf.addr);
+	rs->target_sgl[0].addr = be64toh(conn->data_buf.addr);
 	rs->target_sgl[0].length = ntohl(conn->data_buf.length);
 	rs->target_sgl[0].key = ntohl(conn->data_buf.key);
 
diff --git a/srp_daemon/srp_daemon.h b/srp_daemon/srp_daemon.h
index d9d92f10f84c25..3de8ee0e9827cb 100644
--- a/srp_daemon/srp_daemon.h
+++ b/srp_daemon/srp_daemon.h
@@ -42,7 +42,6 @@
 #include <byteswap.h>
 #include <infiniband/verbs.h>
 #include <infiniband/umad.h>
-#include <infiniband/arch.h>
 
 #include "config.h"
 #include "srp_ib_types.h"
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH rdma-core 2/4] Detect if infiniband/arch.h is supported by the compiler
       [not found] ` <1484074931-3847-1-git-send-email-jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  2017-01-10 19:02   ` [PATCH rdma-core 1/4] Minimize the places where infiniband/arch.h is included Jason Gunthorpe
@ 2017-01-10 19:02   ` Jason Gunthorpe
  2017-01-10 19:02   ` [PATCH rdma-core 3/4] Provide cmake support to enable C11 stdatomic.h Jason Gunthorpe
  2017-01-10 19:02   ` [PATCH rdma-core 4/4] Use C11 atomics instead of wmb/rmb macros for CPU-only atomics Jason Gunthorpe
  3 siblings, 0 replies; 6+ messages in thread
From: Jason Gunthorpe @ 2017-01-10 19:02 UTC (permalink / raw)
  To: Jarod Wilson, linux-rdma-u79uwXL29TY76Z2rM5mHXA

If not then disable compiling providers that require coherent DMA.
Currently this is all of them.

Signed-off-by: Jason Gunthorpe <jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
---
 CMakeLists.txt | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3d546f22790d98..a8cc76d4a9f3c8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -278,6 +278,14 @@ check_type_size("long" SIZEOF_LONG BUILTIN_TYPES_ONLY LANGUAGE C)
 
 include(RDMA_LinuxHeaders)
 
+# Determine if this arch supports cache coherent DMA. This isn't really an
+# arch specific property, but for our purposes arches that do not support it
+# also do not define wmb/etc which breaks our compile.
+CHECK_C_SOURCE_COMPILES("
+#include \"${CMAKE_CURRENT_SOURCE_DIR}/libibverbs/arch.h\"
+ int main(int argc,const char *argv[]) {return 0;}"
+  HAVE_COHERENT_DMA)
+
 #-------------------------
 # Apply fixups
 
@@ -358,6 +366,7 @@ add_subdirectory(librdmacm/man)
 add_subdirectory(libibcm)
 
 # Providers
+if (HAVE_COHERENT_DMA)
 add_subdirectory(providers/cxgb3)
 add_subdirectory(providers/cxgb4)
 add_subdirectory(providers/hfi1verbs)
@@ -372,6 +381,7 @@ add_subdirectory(providers/ocrdma)
 add_subdirectory(providers/qedr)
 add_subdirectory(providers/rxe)
 add_subdirectory(providers/rxe/man)
+endif()
 
 # Binaries
 add_subdirectory(ibacm)
@@ -396,6 +406,9 @@ message(STATUS "Missing Optional Items:")
 if (NOT HAVE_FUNC_ATTRIBUTE_ALWAYS_INLINE)
   message(STATUS " Compiler attribute always_inline NOT supported")
 endif()
+if (NOT HAVE_COHERENT_DMA)
+  message(STATUS " Architecture NOT able to do coherent DMA (check libibverbs/arch.h) some providers disabled!")
+endif()
 if (NOT HAVE_VALGRIND_MEMCHECK)
   message(STATUS " Valgrind memcheck.h NOT enabled")
 endif()
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH rdma-core 3/4] Provide cmake support to enable C11 stdatomic.h
       [not found] ` <1484074931-3847-1-git-send-email-jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  2017-01-10 19:02   ` [PATCH rdma-core 1/4] Minimize the places where infiniband/arch.h is included Jason Gunthorpe
  2017-01-10 19:02   ` [PATCH rdma-core 2/4] Detect if infiniband/arch.h is supported by the compiler Jason Gunthorpe
@ 2017-01-10 19:02   ` Jason Gunthorpe
  2017-01-10 19:02   ` [PATCH rdma-core 4/4] Use C11 atomics instead of wmb/rmb macros for CPU-only atomics Jason Gunthorpe
  3 siblings, 0 replies; 6+ messages in thread
From: Jason Gunthorpe @ 2017-01-10 19:02 UTC (permalink / raw)
  To: Jarod Wilson, linux-rdma-u79uwXL29TY76Z2rM5mHXA

Turn on C11 support in the compiler, for newer compilers this brings
in the necessary semantics. This flag started working as of gcc 4.7
(released 2012)

gcc 4.7 and 4.8 do not have complete C11 support and requires compat for
stdatomic.h, use a ported FreeBSD header (courtesy of
https://gist.github.com/nhatminhle/5181506) to provide enough stdatomic.h
support on some older compilers.

Signed-off-by: Jason Gunthorpe <jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
---
 CMakeLists.txt                     |   7 +
 buildlib/RDMA_EnableCStd.cmake     |  11 +-
 buildlib/fixup-include/stdatomic.h | 369 +++++++++++++++++++++++++++++++++++++
 3 files changed, 382 insertions(+), 5 deletions(-)
 create mode 100644 buildlib/fixup-include/stdatomic.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a8cc76d4a9f3c8..d06cb71d3edf03 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -203,6 +203,10 @@ CHECK_C_SOURCE_COMPILES("
   HAVE_FUNC_ATTRIBUTE_ALWAYS_INLINE
   FAIL_REGEX "warning")
 
+# Provide a shim if C11 stdatomic.h is not supported.
+CHECK_INCLUDE_FILE("stdatomic.h" HAVE_STDATOMIC)
+RDMA_DoFixup("${HAVE_STDATOMIC}" "stdatomic.h")
+
 # Enable development support features
 # Prune unneeded shared libraries during linking
 RDMA_AddOptLDFlag(CMAKE_EXE_LINKER_FLAGS SUPPORTS_AS_NEEDED "-Wl,--as-needed")
@@ -409,6 +413,9 @@ endif()
 if (NOT HAVE_COHERENT_DMA)
   message(STATUS " Architecture NOT able to do coherent DMA (check libibverbs/arch.h) some providers disabled!")
 endif()
+if (NOT HAVE_STDATOMIC)
+  message(STATUS " C11 stdatomic.h NOT available (old compiler)")
+endif()
 if (NOT HAVE_VALGRIND_MEMCHECK)
   message(STATUS " Valgrind memcheck.h NOT enabled")
 endif()
diff --git a/buildlib/RDMA_EnableCStd.cmake b/buildlib/RDMA_EnableCStd.cmake
index bcf3ea525c7307..abe613cb01486e 100644
--- a/buildlib/RDMA_EnableCStd.cmake
+++ b/buildlib/RDMA_EnableCStd.cmake
@@ -33,16 +33,17 @@ function(RDMA_AddOptCFlag TO_VAR CACHE_VAR FLAG)
   endif()
 endfunction()
 
-# Enable the minimum required gnu99 standard in the compiler.
+# Enable the minimum required gnu11 standard in the compiler
+# This was introduced in GCC 4.7
 function(RDMA_EnableCStd)
   if (CMAKE_VERSION VERSION_LESS "3.1")
     # Check for support of the usual flag
-    CHECK_C_COMPILER_FLAG("-std=gnu99" SUPPORTS_GNU99)
-    if (SUPPORTS_GNU99)
-      SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=gnu99" PARENT_SCOPE)
+    CHECK_C_COMPILER_FLAG("-std=gnu11" SUPPORTS_GNU11)
+    if (SUPPORTS_GNU11)
+      SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=gnu11" PARENT_SCOPE)
     endif()
   else()
     # Newer cmake can do this internally
-    set(CMAKE_C_STANDARD 99 PARENT_SCOPE)
+    set(CMAKE_C_STANDARD 11 PARENT_SCOPE)
   endif()
 endfunction()
diff --git a/buildlib/fixup-include/stdatomic.h b/buildlib/fixup-include/stdatomic.h
new file mode 100644
index 00000000000000..6af810f7fb3a26
--- /dev/null
+++ b/buildlib/fixup-include/stdatomic.h
@@ -0,0 +1,369 @@
+/*
+ * An implementation of C11 stdatomic.h directly borrowed from FreeBSD
+ * (original copyright follows), with minor modifications for
+ * portability to other systems. Works for recent Clang (that
+ * implement the feature c_atomic) and GCC 4.7+; includes
+ * compatibility for GCC below 4.7 but I wouldn't recommend it.
+ *
+ * Caveats and limitations:
+ * - Only the ``_Atomic parentheses'' notation is implemented, while
+ *   the ``_Atomic space'' one is not.
+ * - _Atomic types must be typedef'ed, or programs using them will
+ *   not type check correctly (incompatible anonymous structure
+ *   types).
+ * - Non-scalar _Atomic types would require runtime support for
+ *   runtime locking, which, as far as I know, is not currently
+ *   available on any system.
+ */
+
+/*-
+ * Copyright (c) 2011 Ed Schouten <ed-HZy0K5TPuP5AfugRpC6u6w@public.gmane.org>
+ *                    David Chisnall <theraven-HZy0K5TPuP5AfugRpC6u6w@public.gmane.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/include/stdatomic.h,v 1.10.2.2 2012/05/30 19:21:54 theraven Exp $
+ */
+
+#ifndef _STDATOMIC_H_
+#define	_STDATOMIC_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if !defined(__has_builtin)
+#define __has_builtin(x) 0
+#endif
+#if !defined(__GNUC_PREREQ__)
+#if defined(__GNUC__) && defined(__GNUC_MINOR__)
+#define __GNUC_PREREQ__(maj, min)					\
+	((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
+#else
+#define __GNUC_PREREQ__(maj, min) 0
+#endif
+#endif
+
+#if !defined(__CLANG_ATOMICS) && !defined(__GNUC_ATOMICS)
+#if __has_feature(c_atomic)
+#define	__CLANG_ATOMICS
+#elif __GNUC_PREREQ__(4, 7)
+#define	__GNUC_ATOMICS
+#elif !defined(__GNUC__)
+#error "stdatomic.h does not support your compiler"
+#endif
+#endif
+
+#if !defined(__CLANG_ATOMICS)
+#define	_Atomic(T)			struct { volatile __typeof__(T) __val; }
+#endif
+
+/*
+ * 7.17.2 Initialization.
+ */
+
+#if defined(__CLANG_ATOMICS)
+#define	ATOMIC_VAR_INIT(value)		(value)
+#define	atomic_init(obj, value)		__c11_atomic_init(obj, value)
+#else
+#define	ATOMIC_VAR_INIT(value)		{ .__val = (value) }
+#define	atomic_init(obj, value) do {					\
+	(obj)->__val = (value);						\
+} while (0)
+#endif
+
+/*
+ * Clang and recent GCC both provide predefined macros for the memory
+ * orderings.  If we are using a compiler that doesn't define them, use the
+ * clang values - these will be ignored in the fallback path.
+ */
+
+#ifndef __ATOMIC_RELAXED
+#define __ATOMIC_RELAXED		0
+#endif
+#ifndef __ATOMIC_CONSUME
+#define __ATOMIC_CONSUME		1
+#endif
+#ifndef __ATOMIC_ACQUIRE
+#define __ATOMIC_ACQUIRE		2
+#endif
+#ifndef __ATOMIC_RELEASE
+#define __ATOMIC_RELEASE		3
+#endif
+#ifndef __ATOMIC_ACQ_REL
+#define __ATOMIC_ACQ_REL		4
+#endif
+#ifndef __ATOMIC_SEQ_CST
+#define __ATOMIC_SEQ_CST		5
+#endif
+
+/*
+ * 7.17.3 Order and consistency.
+ *
+ * The memory_order_* constants that denote the barrier behaviour of the
+ * atomic operations.
+ */
+
+enum memory_order {
+	memory_order_relaxed = __ATOMIC_RELAXED,
+	memory_order_consume = __ATOMIC_CONSUME,
+	memory_order_acquire = __ATOMIC_ACQUIRE,
+	memory_order_release = __ATOMIC_RELEASE,
+	memory_order_acq_rel = __ATOMIC_ACQ_REL,
+	memory_order_seq_cst = __ATOMIC_SEQ_CST
+};
+
+typedef enum memory_order memory_order;
+
+/*
+ * 7.17.4 Fences.
+ */
+
+#ifdef __CLANG_ATOMICS
+#define	atomic_thread_fence(order)	__c11_atomic_thread_fence(order)
+#define	atomic_signal_fence(order)	__c11_atomic_signal_fence(order)
+#elif defined(__GNUC_ATOMICS)
+#define	atomic_thread_fence(order)	__atomic_thread_fence(order)
+#define	atomic_signal_fence(order)	__atomic_signal_fence(order)
+#else
+#define	atomic_thread_fence(order)	__sync_synchronize()
+#define	atomic_signal_fence(order)	__asm volatile ("" : : : "memory")
+#endif
+
+/*
+ * 7.17.5 Lock-free property.
+ */
+
+#if defined(__CLANG_ATOMICS)
+#define	atomic_is_lock_free(obj) \
+	__c11_atomic_is_lock_free(sizeof(obj))
+#elif defined(__GNUC_ATOMICS)
+#define	atomic_is_lock_free(obj) \
+	__atomic_is_lock_free(sizeof((obj)->__val))
+#else
+#define	atomic_is_lock_free(obj) \
+	(sizeof((obj)->__val) <= sizeof(void *))
+#endif
+
+/*
+ * 7.17.6 Atomic integer types.
+ */
+
+typedef _Atomic(_Bool)			atomic_bool;
+typedef _Atomic(char)			atomic_char;
+typedef _Atomic(signed char)		atomic_schar;
+typedef _Atomic(unsigned char)		atomic_uchar;
+typedef _Atomic(short)			atomic_short;
+typedef _Atomic(unsigned short)		atomic_ushort;
+typedef _Atomic(int)			atomic_int;
+typedef _Atomic(unsigned int)		atomic_uint;
+typedef _Atomic(long)			atomic_long;
+typedef _Atomic(unsigned long)		atomic_ulong;
+typedef _Atomic(long long)		atomic_llong;
+typedef _Atomic(unsigned long long)	atomic_ullong;
+#if 0
+typedef _Atomic(char16_t)		atomic_char16_t;
+typedef _Atomic(char32_t)		atomic_char32_t;
+#endif
+typedef _Atomic(wchar_t)		atomic_wchar_t;
+typedef _Atomic(int_least8_t)		atomic_int_least8_t;
+typedef _Atomic(uint_least8_t)		atomic_uint_least8_t;
+typedef _Atomic(int_least16_t)		atomic_int_least16_t;
+typedef _Atomic(uint_least16_t)		atomic_uint_least16_t;
+typedef _Atomic(int_least32_t)		atomic_int_least32_t;
+typedef _Atomic(uint_least32_t)		atomic_uint_least32_t;
+typedef _Atomic(int_least64_t)		atomic_int_least64_t;
+typedef _Atomic(uint_least64_t)		atomic_uint_least64_t;
+typedef _Atomic(int_fast8_t)		atomic_int_fast8_t;
+typedef _Atomic(uint_fast8_t)		atomic_uint_fast8_t;
+typedef _Atomic(int_fast16_t)		atomic_int_fast16_t;
+typedef _Atomic(uint_fast16_t)		atomic_uint_fast16_t;
+typedef _Atomic(int_fast32_t)		atomic_int_fast32_t;
+typedef _Atomic(uint_fast32_t)		atomic_uint_fast32_t;
+typedef _Atomic(int_fast64_t)		atomic_int_fast64_t;
+typedef _Atomic(uint_fast64_t)		atomic_uint_fast64_t;
+typedef _Atomic(intptr_t)		atomic_intptr_t;
+typedef _Atomic(uintptr_t)		atomic_uintptr_t;
+typedef _Atomic(size_t)			atomic_size_t;
+typedef _Atomic(ptrdiff_t)		atomic_ptrdiff_t;
+typedef _Atomic(intmax_t)		atomic_intmax_t;
+typedef _Atomic(uintmax_t)		atomic_uintmax_t;
+
+/*
+ * 7.17.7 Operations on atomic types.
+ */
+
+/*
+ * Compiler-specific operations.
+ */
+
+#if defined(__CLANG_ATOMICS)
+#define	atomic_compare_exchange_strong_explicit(object, expected,	\
+    desired, success, failure)						\
+	__c11_atomic_compare_exchange_strong(object, expected, desired,	\
+	    success, failure)
+#define	atomic_compare_exchange_weak_explicit(object, expected,		\
+    desired, success, failure)						\
+	__c11_atomic_compare_exchange_weak(object, expected, desired,	\
+	    success, failure)
+#define	atomic_exchange_explicit(object, desired, order)		\
+	__c11_atomic_exchange(object, desired, order)
+#define	atomic_fetch_add_explicit(object, operand, order)		\
+	__c11_atomic_fetch_add(object, operand, order)
+#define	atomic_fetch_and_explicit(object, operand, order)		\
+	__c11_atomic_fetch_and(object, operand, order)
+#define	atomic_fetch_or_explicit(object, operand, order)		\
+	__c11_atomic_fetch_or(object, operand, order)
+#define	atomic_fetch_sub_explicit(object, operand, order)		\
+	__c11_atomic_fetch_sub(object, operand, order)
+#define	atomic_fetch_xor_explicit(object, operand, order)		\
+	__c11_atomic_fetch_xor(object, operand, order)
+#define	atomic_load_explicit(object, order)				\
+	__c11_atomic_load(object, order)
+#define	atomic_store_explicit(object, desired, order)			\
+	__c11_atomic_store(object, desired, order)
+#elif defined(__GNUC_ATOMICS)
+#define	atomic_compare_exchange_strong_explicit(object, expected,	\
+    desired, success, failure)						\
+	__atomic_compare_exchange_n(&(object)->__val, expected,		\
+	    desired, 0, success, failure)
+#define	atomic_compare_exchange_weak_explicit(object, expected,		\
+    desired, success, failure)						\
+	__atomic_compare_exchange_n(&(object)->__val, expected,		\
+	    desired, 1, success, failure)
+#define	atomic_exchange_explicit(object, desired, order)		\
+	__atomic_exchange_n(&(object)->__val, desired, order)
+#define	atomic_fetch_add_explicit(object, operand, order)		\
+	__atomic_fetch_add(&(object)->__val, operand, order)
+#define	atomic_fetch_and_explicit(object, operand, order)		\
+	__atomic_fetch_and(&(object)->__val, operand, order)
+#define	atomic_fetch_or_explicit(object, operand, order)		\
+	__atomic_fetch_or(&(object)->__val, operand, order)
+#define	atomic_fetch_sub_explicit(object, operand, order)		\
+	__atomic_fetch_sub(&(object)->__val, operand, order)
+#define	atomic_fetch_xor_explicit(object, operand, order)		\
+	__atomic_fetch_xor(&(object)->__val, operand, order)
+#define	atomic_load_explicit(object, order)				\
+	__atomic_load_n(&(object)->__val, order)
+#define	atomic_store_explicit(object, desired, order)			\
+	__atomic_store_n(&(object)->__val, desired, order)
+#else
+#define	atomic_compare_exchange_strong_explicit(object, expected,	\
+    desired, success, failure) ({					\
+	__typeof__((object)->__val) __v;				\
+	_Bool __r;							\
+	__v = __sync_val_compare_and_swap(&(object)->__val,		\
+	    *(expected), desired);					\
+	__r = *(expected) == __v;					\
+	*(expected) = __v;						\
+	__r;								\
+})
+
+#define	atomic_compare_exchange_weak_explicit(object, expected,		\
+    desired, success, failure)						\
+	atomic_compare_exchange_strong_explicit(object, expected,	\
+		desired, success, failure)
+#if __has_builtin(__sync_swap)
+/* Clang provides a full-barrier atomic exchange - use it if available. */
+#define atomic_exchange_explicit(object, desired, order)		\
+	__sync_swap(&(object)->__val, desired)
+#else
+/*
+ * __sync_lock_test_and_set() is only an acquire barrier in theory (although in
+ * practice it is usually a full barrier) so we need an explicit barrier after
+ * it.
+ */
+#define	atomic_exchange_explicit(object, desired, order) ({		\
+	__typeof__((object)->__val) __v;				\
+	__v = __sync_lock_test_and_set(&(object)->__val, desired);	\
+	__sync_synchronize();						\
+	__v;								\
+})
+#endif
+#define	atomic_fetch_add_explicit(object, operand, order)		\
+	__sync_fetch_and_add(&(object)->__val, operand)
+#define	atomic_fetch_and_explicit(object, operand, order)		\
+	__sync_fetch_and_and(&(object)->__val, operand)
+#define	atomic_fetch_or_explicit(object, operand, order)		\
+	__sync_fetch_and_or(&(object)->__val, operand)
+#define	atomic_fetch_sub_explicit(object, operand, order)		\
+	__sync_fetch_and_sub(&(object)->__val, operand)
+#define	atomic_fetch_xor_explicit(object, operand, order)		\
+	__sync_fetch_and_xor(&(object)->__val, operand)
+#define	atomic_load_explicit(object, order)				\
+	__sync_fetch_and_add(&(object)->__val, 0)
+#define	atomic_store_explicit(object, desired, order) do {		\
+	__sync_synchronize();						\
+	(object)->__val = (desired);					\
+	__sync_synchronize();						\
+} while (0)
+#endif
+
+/*
+ * Convenience functions.
+ */
+
+#define	atomic_compare_exchange_strong(object, expected, desired)	\
+	atomic_compare_exchange_strong_explicit(object, expected,	\
+	    desired, memory_order_seq_cst, memory_order_seq_cst)
+#define	atomic_compare_exchange_weak(object, expected, desired)		\
+	atomic_compare_exchange_weak_explicit(object, expected,		\
+	    desired, memory_order_seq_cst, memory_order_seq_cst)
+#define	atomic_exchange(object, desired)				\
+	atomic_exchange_explicit(object, desired, memory_order_seq_cst)
+#define	atomic_fetch_add(object, operand)				\
+	atomic_fetch_add_explicit(object, operand, memory_order_seq_cst)
+#define	atomic_fetch_and(object, operand)				\
+	atomic_fetch_and_explicit(object, operand, memory_order_seq_cst)
+#define	atomic_fetch_or(object, operand)				\
+	atomic_fetch_or_explicit(object, operand, memory_order_seq_cst)
+#define	atomic_fetch_sub(object, operand)				\
+	atomic_fetch_sub_explicit(object, operand, memory_order_seq_cst)
+#define	atomic_fetch_xor(object, operand)				\
+	atomic_fetch_xor_explicit(object, operand, memory_order_seq_cst)
+#define	atomic_load(object)						\
+	atomic_load_explicit(object, memory_order_seq_cst)
+#define	atomic_store(object, desired)					\
+	atomic_store_explicit(object, desired, memory_order_seq_cst)
+
+/*
+ * 7.17.8 Atomic flag type and operations.
+ */
+
+typedef atomic_bool			atomic_flag;
+
+#define	ATOMIC_FLAG_INIT		ATOMIC_VAR_INIT(0)
+
+#define	atomic_flag_clear_explicit(object, order)			\
+	atomic_store_explicit(object, 0, order)
+#define	atomic_flag_test_and_set_explicit(object, order)		\
+	atomic_compare_exchange_strong_explicit(object, 0, 1, order, order)
+
+#define	atomic_flag_clear(object)					\
+	atomic_flag_clear_explicit(object, memory_order_seq_cst)
+#define	atomic_flag_test_and_set(object)				\
+	atomic_flag_test_and_set_explicit(object, memory_order_seq_cst)
+
+#endif /* !_STDATOMIC_H_ */
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH rdma-core 4/4] Use C11 atomics instead of wmb/rmb macros for CPU-only atomics
       [not found] ` <1484074931-3847-1-git-send-email-jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
                     ` (2 preceding siblings ...)
  2017-01-10 19:02   ` [PATCH rdma-core 3/4] Provide cmake support to enable C11 stdatomic.h Jason Gunthorpe
@ 2017-01-10 19:02   ` Jason Gunthorpe
  3 siblings, 0 replies; 6+ messages in thread
From: Jason Gunthorpe @ 2017-01-10 19:02 UTC (permalink / raw)
  To: Jarod Wilson, linux-rdma-u79uwXL29TY76Z2rM5mHXA
  Cc: Mike Marciniszyn, Dennis Dalessandro, Moni Shoua,
	moderated list:HF1 USERSPACE PROVIDER for hf1.ko,
	open list:IPATH/QIB USERSPACE PROVIDER for ib_qib.ko

ipath/hfi1 and rxe are synchronizing with the kernel (via a shared mmap)
and can safely use the weaker SMP memory model atomics to do it, they do
not need the PCI barriers from arch.h

This allows those providers to compile on all arches.

Signed-off-by: Jason Gunthorpe <jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
---
 CMakeLists.txt                    |  5 +--
 providers/hfi1verbs/hfiverbs.h    | 10 +++---
 providers/hfi1verbs/verbs.c       | 17 ++++-----
 providers/ipathverbs/ipathverbs.h | 10 +++---
 providers/ipathverbs/verbs.c      | 18 +++++-----
 providers/rxe/rxe.c               |  5 +--
 providers/rxe/rxe_queue.h         | 72 ++++++++++++++++++++-------------------
 7 files changed, 70 insertions(+), 67 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d06cb71d3edf03..849d3936d86508 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -376,16 +376,17 @@ add_subdirectory(providers/cxgb4)
 add_subdirectory(providers/hfi1verbs)
 add_subdirectory(providers/hns)
 add_subdirectory(providers/i40iw)
-add_subdirectory(providers/ipathverbs)
 add_subdirectory(providers/mlx4)
 add_subdirectory(providers/mlx5)
 add_subdirectory(providers/mthca)
 add_subdirectory(providers/nes)
 add_subdirectory(providers/ocrdma)
 add_subdirectory(providers/qedr)
+endif()
+
+add_subdirectory(providers/ipathverbs)
 add_subdirectory(providers/rxe)
 add_subdirectory(providers/rxe/man)
-endif()
 
 # Binaries
 add_subdirectory(ibacm)
diff --git a/providers/hfi1verbs/hfiverbs.h b/providers/hfi1verbs/hfiverbs.h
index e82ba2b469ab05..af93dc43e059d9 100644
--- a/providers/hfi1verbs/hfiverbs.h
+++ b/providers/hfi1verbs/hfiverbs.h
@@ -62,9 +62,9 @@
 #include <byteswap.h>
 #include <pthread.h>
 #include <stddef.h>
+#include <stdatomic.h>
 
 #include <infiniband/driver.h>
-#include <infiniband/arch.h>
 #include <infiniband/verbs.h>
 
 #define PFX		"hfi1: "
@@ -100,8 +100,8 @@ struct hfi1_wc {
 };
 
 struct hfi1_cq_wc {
-	uint32_t		head;
-	uint32_t		tail;
+	_Atomic(uint32_t)	head;
+	_Atomic(uint32_t)	tail;
 	struct hfi1_wc		queue[1];
 };
 
@@ -132,8 +132,8 @@ struct hfi1_rwqe {
  * use get_rwqe_ptr() instead.
  */
 struct hfi1_rwq {
-	uint32_t		head;	/* new requests posted to the head */
-	uint32_t		tail;	/* receives pull requests from here. */
+	_Atomic(uint32_t)	head;	/* new requests posted to the head. */
+	_Atomic(uint32_t)	tail;	/* receives pull requests from here. */
 	struct hfi1_rwqe	wq[0];
 };
 
diff --git a/providers/hfi1verbs/verbs.c b/providers/hfi1verbs/verbs.c
index 06ddbb712857f6..8d1b11150977a3 100644
--- a/providers/hfi1verbs/verbs.c
+++ b/providers/hfi1verbs/verbs.c
@@ -298,19 +298,19 @@ int hfi1_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc)
 
 	pthread_spin_lock(&cq->lock);
 	q = cq->queue;
-	tail = q->tail;
+	tail = atomic_load_explicit(&q->tail, memory_order_relaxed);
 	for (npolled = 0; npolled < ne; ++npolled, ++wc) {
-		if (tail == q->head)
+		if (tail == atomic_load(&q->head))
 			break;
 		/* Make sure entry is read after head index is read. */
-		rmb();
+		atomic_thread_fence(memory_order_acquire);
 		memcpy(wc, &q->queue[tail], sizeof(*wc));
 		if (tail == cq->ibv_cq.cqe)
 			tail = 0;
 		else
 			tail++;
 	}
-	q->tail = tail;
+	atomic_store(&q->tail, tail);
 	pthread_spin_unlock(&cq->lock);
 
 	return npolled;
@@ -478,7 +478,7 @@ static int post_recv(struct hfi1_rq *rq, struct ibv_recv_wr *wr,
 
 	pthread_spin_lock(&rq->lock);
 	rwq = rq->rwq;
-	head = rwq->head;
+	head = atomic_load_explicit(&rwq->head, memory_order_relaxed);;
 	for (i = wr; i; i = i->next) {
 		if ((unsigned) i->num_sge > rq->max_sge) {
 			ret = EINVAL;
@@ -487,7 +487,7 @@ static int post_recv(struct hfi1_rq *rq, struct ibv_recv_wr *wr,
 		wqe = get_rwqe_ptr(rq, head);
 		if (++head >= rq->size)
 			head = 0;
-		if (head == rwq->tail) {
+		if (head == atomic_load(&rwq->tail)) {
 			ret = ENOMEM;
 			goto bad;
 		}
@@ -495,9 +495,10 @@ static int post_recv(struct hfi1_rq *rq, struct ibv_recv_wr *wr,
 		wqe->num_sge = i->num_sge;
 		for (n = 0; n < wqe->num_sge; n++)
 			wqe->sg_list[n] = i->sg_list[n];
+
 		/* Make sure queue entry is written before the head index. */
-		wmb();
-		rwq->head = head;
+		atomic_thread_fence(memory_order_release);
+		atomic_store(&rwq->head, head);
 	}
 	ret = 0;
 	goto done;
diff --git a/providers/ipathverbs/ipathverbs.h b/providers/ipathverbs/ipathverbs.h
index ff25854519fdc7..ebd2dcfbc413a9 100644
--- a/providers/ipathverbs/ipathverbs.h
+++ b/providers/ipathverbs/ipathverbs.h
@@ -42,9 +42,9 @@
 #include <byteswap.h>
 #include <pthread.h>
 #include <stddef.h>
+#include <stdatomic.h>
 
 #include <infiniband/driver.h>
-#include <infiniband/arch.h>
 #include <infiniband/verbs.h>
 
 #define PFX		"ipath: "
@@ -80,8 +80,8 @@ struct ipath_wc {
 };
 
 struct ipath_cq_wc {
-	uint32_t		head;
-	uint32_t		tail;
+	_Atomic(uint32_t)	head;
+	_Atomic(uint32_t)	tail;
 	struct ipath_wc		queue[1];
 };
 
@@ -112,8 +112,8 @@ struct ipath_rwqe {
  * use get_rwqe_ptr() instead.
  */
 struct ipath_rwq {
-	uint32_t		head;	/* new requests posted to the head */
-	uint32_t		tail;	/* receives pull requests from here. */
+	_Atomic(uint32_t)	head;	/* new requests posted to the head. */
+	_Atomic(uint32_t)	tail;	/* receives pull requests from here. */
 	struct ipath_rwqe	wq[0];
 };
 
diff --git a/providers/ipathverbs/verbs.c b/providers/ipathverbs/verbs.c
index 35b2162a84f0ae..b8cd46bd3c892d 100644
--- a/providers/ipathverbs/verbs.c
+++ b/providers/ipathverbs/verbs.c
@@ -275,19 +275,20 @@ int ipath_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc)
 
 	pthread_spin_lock(&cq->lock);
 	q = cq->queue;
-	tail = q->tail;
+	tail = atomic_load_explicit(&q->tail, memory_order_relaxed);
 	for (npolled = 0; npolled < ne; ++npolled, ++wc) {
-		if (tail == q->head)
+		if (tail == atomic_load(&q->head))
 			break;
+
 		/* Make sure entry is read after head index is read. */
-		rmb();
+		atomic_thread_fence(memory_order_acquire);
 		memcpy(wc, &q->queue[tail], sizeof(*wc));
 		if (tail == cq->ibv_cq.cqe)
 			tail = 0;
 		else
 			tail++;
 	}
-	q->tail = tail;
+	atomic_store(&q->tail, tail);
 	pthread_spin_unlock(&cq->lock);
 
 	return npolled;
@@ -454,7 +455,7 @@ static int post_recv(struct ipath_rq *rq, struct ibv_recv_wr *wr,
 
 	pthread_spin_lock(&rq->lock);
 	rwq = rq->rwq;
-	head = rwq->head;
+	head = atomic_load_explicit(&rwq->head, memory_order_relaxed);;
 	for (i = wr; i; i = i->next) {
 		if ((unsigned) i->num_sge > rq->max_sge) {
 			ret = EINVAL;
@@ -463,7 +464,7 @@ static int post_recv(struct ipath_rq *rq, struct ibv_recv_wr *wr,
 		wqe = get_rwqe_ptr(rq, head);
 		if (++head >= rq->size)
 			head = 0;
-		if (head == rwq->tail) {
+		if (head == atomic_load(&rwq->tail)) {
 			ret = ENOMEM;
 			goto bad;
 		}
@@ -471,9 +472,10 @@ static int post_recv(struct ipath_rq *rq, struct ibv_recv_wr *wr,
 		wqe->num_sge = i->num_sge;
 		for (n = 0; n < wqe->num_sge; n++)
 			wqe->sg_list[n] = i->sg_list[n];
+
 		/* Make sure queue entry is written before the head index. */
-		wmb();
-		rwq->head = head;
+		atomic_thread_fence(memory_order_release);
+		atomic_store(&rwq->head, head);
 	}
 	ret = 0;
 	goto done;
diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
index d23ef3d6b85cf9..a76c74b04ced8e 100644
--- a/providers/rxe/rxe.c
+++ b/providers/rxe/rxe.c
@@ -50,7 +50,6 @@
 #include <stddef.h>
 
 #include <infiniband/driver.h>
-#include <infiniband/arch.h>
 #include <infiniband/verbs.h>
 #include <rdma/rdma_user_rxe.h>
 
@@ -255,7 +254,7 @@ static int rxe_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc)
 		if (queue_empty(q))
 			break;
 
-		rmb();
+		atomic_thread_fence(memory_order_acquire);
 		src = consumer_addr(q);
 		memcpy(wc, src, sizeof(*wc));
 		advance_consumer(q);
@@ -402,8 +401,6 @@ static int rxe_post_one_recv(struct rxe_wq *rq, struct ibv_recv_wr *recv_wr)
 	wqe->dma.num_sge = wqe->num_sge;
 	wqe->dma.sge_offset = 0;
 
-	rmb();
-
 	advance_producer(q);
 
 out:
diff --git a/providers/rxe/rxe_queue.h b/providers/rxe/rxe_queue.h
index e9794727913461..629aad98f914aa 100644
--- a/providers/rxe/rxe_queue.h
+++ b/providers/rxe/rxe_queue.h
@@ -37,15 +37,16 @@
 #ifndef H_RXE_PCQ
 #define H_RXE_PCQ
 
+#include <stdatomic.h>
+
 /* MUST MATCH kernel struct rxe_pqc in rxe_queue.h */
 struct rxe_queue {
 	uint32_t		log2_elem_size;
 	uint32_t		index_mask;
 	uint32_t		pad_1[30];
-	volatile uint32_t	producer_index;
+	_Atomic(uint32_t)	producer_index;
 	uint32_t		pad_2[31];
-	volatile uint32_t	consumer_index;
-	uint32_t		pad_3[31];
+	_Atomic(uint32_t)	consumer_index;
 	uint8_t			data[0];
 };
 
@@ -56,48 +57,59 @@ static inline int next_index(struct rxe_queue *q, int index)
 
 static inline int queue_empty(struct rxe_queue *q)
 {
-	return ((q->producer_index - q->consumer_index)
-			& q->index_mask) == 0;
+	/* Must hold consumer_index lock */
+	return ((atomic_load(&q->producer_index) -
+		 atomic_load_explicit(&q->consumer_index,
+				      memory_order_relaxed)) &
+		q->index_mask) == 0;
 }
 
 static inline int queue_full(struct rxe_queue *q)
 {
-	return ((q->producer_index + 1 - q->consumer_index)
-			& q->index_mask) == 0;
+	/* Must hold producer_index lock */
+	return ((atomic_load_explicit(&q->producer_index,
+				      memory_order_relaxed) +
+		 1 - atomic_load(&q->consumer_index)) &
+		q->index_mask) == 0;
 }
 
 static inline void advance_producer(struct rxe_queue *q)
 {
-	q->producer_index = (q->producer_index + 1)
-			& q->index_mask;
+	/* Must hold producer_index lock */
+	atomic_thread_fence(memory_order_release);
+	atomic_store(
+	    &q->producer_index,
+	    (atomic_load_explicit(&q->producer_index, memory_order_relaxed) +
+	     1) &
+		q->index_mask);
 }
 
 static inline void advance_consumer(struct rxe_queue *q)
 {
-	q->consumer_index = (q->consumer_index + 1)
-			& q->index_mask;
+	/* Must hold consumer_index lock */
+	atomic_store(
+	    &q->consumer_index,
+	    (atomic_load_explicit(&q->consumer_index, memory_order_relaxed) +
+	     1) &
+		q->index_mask);
 }
 
 static inline void *producer_addr(struct rxe_queue *q)
 {
-	return q->data + ((q->producer_index & q->index_mask)
-				<< q->log2_elem_size);
+	/* Must hold producer_index lock */
+	return q->data + ((atomic_load_explicit(&q->producer_index,
+						memory_order_relaxed) &
+			   q->index_mask)
+			  << q->log2_elem_size);
 }
 
 static inline void *consumer_addr(struct rxe_queue *q)
 {
-	return q->data + ((q->consumer_index & q->index_mask)
-				<< q->log2_elem_size);
-}
-
-static inline unsigned int producer_index(struct rxe_queue *q)
-{
-	return q->producer_index;
-}
-
-static inline unsigned int consumer_index(struct rxe_queue *q)
-{
-	return q->consumer_index;
+	/* Must hold consumer_index lock */
+	return q->data + ((atomic_load_explicit(&q->consumer_index,
+						memory_order_relaxed) &
+			   q->index_mask)
+			  << q->log2_elem_size);
 }
 
 static inline void *addr_from_index(struct rxe_queue *q, unsigned int index)
@@ -111,14 +123,4 @@ static inline unsigned int index_from_addr(const struct rxe_queue *q, const void
 	return (((uint8_t *)addr - q->data) >> q->log2_elem_size) & q->index_mask;
 }
 
-static inline unsigned int queue_count(const struct rxe_queue *q)
-{
-	return (q->producer_index - q->consumer_index) & q->index_mask;
-}
-
-static inline void *queue_head(struct rxe_queue *q)
-{
-	return queue_empty(q) ? NULL : consumer_addr(q);
-}
-
 #endif /* H_RXE_PCQ */
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH rdma-core 1/4] Minimize the places where infiniband/arch.h is included
       [not found]     ` <1484074931-3847-2-git-send-email-jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2017-01-10 19:49       ` Hal Rosenstock
  0 siblings, 0 replies; 6+ messages in thread
From: Hal Rosenstock @ 2017-01-10 19:49 UTC (permalink / raw)
  To: Jason Gunthorpe, Jarod Wilson, linux-rdma-u79uwXL29TY76Z2rM5mHXA
  Cc: Doug Ledford, Yishai Hadas, Sean Hefty, Bart Van Assche

On 1/10/2017 2:02 PM, Jason Gunthorpe wrote:
> We never want to use it for htonll as this header can only be
> compiled on architectures that define the PCI memory barriers.
> Instead use byteswap.h directly.
> 
> Signed-off-by: Jason Gunthorpe <jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
> ---
>  ibacm/linux/osd.h                 | 1 -
>  ibacm/src/libacm.c                | 6 +++---
>  libibumad/sysfs.c                 | 3 +--
>  libibverbs/device.c               | 4 +---
>  libibverbs/examples/device_list.c | 3 +--
>  libibverbs/examples/devinfo.c     | 3 +--
>  librdmacm/addrinfo.c              | 6 +++---
>  librdmacm/cma.c                   | 6 +++---
>  librdmacm/cma.h                   | 1 -
>  librdmacm/examples/rping.c        | 7 +++----
>  librdmacm/rsocket.c               | 8 ++++----
>  srp_daemon/srp_daemon.h           | 1 -
>  12 files changed, 20 insertions(+), 29 deletions(-)

>From the libibumad piece

Reviewed-by: Hal Rosenstock <hal-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-01-10 19:49 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-10 19:02 [PATCH rdma-core 0/4] Support compiling on ARM32 and others Jason Gunthorpe
     [not found] ` <1484074931-3847-1-git-send-email-jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2017-01-10 19:02   ` [PATCH rdma-core 1/4] Minimize the places where infiniband/arch.h is included Jason Gunthorpe
     [not found]     ` <1484074931-3847-2-git-send-email-jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2017-01-10 19:49       ` Hal Rosenstock
2017-01-10 19:02   ` [PATCH rdma-core 2/4] Detect if infiniband/arch.h is supported by the compiler Jason Gunthorpe
2017-01-10 19:02   ` [PATCH rdma-core 3/4] Provide cmake support to enable C11 stdatomic.h Jason Gunthorpe
2017-01-10 19:02   ` [PATCH rdma-core 4/4] Use C11 atomics instead of wmb/rmb macros for CPU-only atomics Jason Gunthorpe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.