nvdimm.lists.linux.dev archive mirror
 help / color / mirror / Atom feed
* [ndctl PATCH] ndctl, test: rdma vs dax
@ 2017-10-07 15:14 Dan Williams
  2017-10-09  8:07 ` Johannes Thumshirn
  0 siblings, 1 reply; 4+ messages in thread
From: Dan Williams @ 2017-10-07 15:14 UTC (permalink / raw)
  To: linux-nvdimm

Use the rxe (Soft-ROCE) driver to unit test the DAX paths in ibverbs
memory registration (ib_umem_get).

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 configure.ac     |   11 +++
 test/Makefile.am |   13 +++
 test/rdma.c      |  224 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 test/rdma.sh     |   54 +++++++++++++
 4 files changed, 302 insertions(+)
 create mode 100644 test/rdma.c
 create mode 100755 test/rdma.sh

diff --git a/configure.ac b/configure.ac
index 5b103813ee6f..087df2f7b3a6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -94,6 +94,17 @@ PKG_CHECK_MODULES([UDEV], [libudev])
 PKG_CHECK_MODULES([UUID], [uuid])
 PKG_CHECK_MODULES([JSON], [json-c])
 
+AC_ARG_WITH([libibverbs],
+	AS_HELP_STRING([--with-libibverbs],
+		       [Enable RDMA functionality. @<:@default=no@:>@]),
+	[], [with_libibverbs=no])
+if test "x$with_libibverbs" = "xyes"; then
+	AC_CHECK_LIB(ibverbs, ibv_get_device_list, [],
+		AC_MSG_ERROR([libibverbs not found.]))
+	AC_DEFINE(ENABLE_RDMA, 1, [Enable RDMA])
+fi
+AM_CONDITIONAL([ENABLE_RDMA], [test "x$with_libibverbs" = "xyes"])
+
 AC_ARG_WITH([libpmem],
 	AS_HELP_STRING([--with-libpmem],
 		       [Install with libpmem support. @<:@default=no@:>@]),
diff --git a/test/Makefile.am b/test/Makefile.am
index 9223628b2608..0be0d0ab8828 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -42,6 +42,11 @@ check_PROGRAMS +=\
 	dax-pmd \
 	device-dax \
 	mmap
+
+if ENABLE_RDMA
+TESTS += rdma.sh
+check_PROGRAMS += rdma
+endif
 endif
 
 LIBNDCTL_LIB =\
@@ -110,3 +115,11 @@ multi_pmem_LDADD = \
 		$(UUID_LIBS) \
 		$(KMOD_LIBS) \
 		../libutil.a
+
+rdma_SOURCES =\
+	      rdma.c \
+	      $(testcore)
+
+rdma_LDADD = \
+	     $(LIBNDCTL_LIB)
+	     -libverbs
diff --git a/test/rdma.c b/test/rdma.c
new file mode 100644
index 000000000000..043483272162
--- /dev/null
+++ b/test/rdma.c
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2014-2017, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU Lesser General Public License,
+ * version 2.1, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include <errno.h>
+#include <unistd.h>
+#include <limits.h>
+#include <syslog.h>
+#include <sys/mman.h>
+#include <linux/mman.h>
+
+#include <util/size.h>
+#include <ndctl/libndctl.h>
+#include <infiniband/verbs.h>
+#include <ccan/array_size/array_size.h>
+
+static struct ibv_qp *create_qp(struct ibv_pd *pd, struct ibv_cq *cq)
+{
+	struct ibv_qp *qp;
+	struct ibv_qp_init_attr qp_attr = {
+		.send_cq = cq,
+		.recv_cq = cq,
+		.cap = {
+			.max_send_wr = 1,
+			.max_recv_wr = 1,
+			.max_send_sge = 1,
+			.max_recv_sge = 1,
+		},
+		.qp_type = IBV_QPT_RC,
+	};
+
+	qp = ibv_create_qp(pd, &qp_attr);
+	if (!qp)
+		return NULL;
+	if (qp_attr.cap.max_send_wr < 1 || qp_attr.cap.max_recv_wr < 1
+			|| qp_attr.cap.max_send_sge < 1
+			|| qp_attr.cap.max_recv_sge < 1) {
+		fprintf(stderr, "%s: insufficient queue pair capabilities\n",
+				__func__);
+		ibv_destroy_qp(qp);
+		return NULL;
+	}
+	return qp;
+}
+
+static int post_recv(struct ibv_qp *qp, struct ibv_mr *mr, void *addr,
+		size_t len)
+{
+	struct ibv_recv_wr wr = {
+		.sg_list = &(struct ibv_sge) {
+			.addr = (uint64_t) addr,
+			.length = len,
+			.lkey = mr->lkey
+		},
+		.num_sge = 1,
+		.next = NULL,
+	};
+	struct ibv_recv_wr *bad_wr;
+
+	return ibv_post_recv(qp, &wr, &bad_wr);
+}
+
+static int do_rdma(struct ndctl_ctx *ctx, int fd, unsigned long map_flags)
+{
+	int nr_devs, rc = -ENXIO;
+	void *addr;
+	struct ibv_pd *pd;
+	struct ibv_mr *mr;
+	struct ibv_cq *cq;
+	struct ibv_qp *qp;
+	struct ibv_context *ictx;
+	size_t map_len = 4*HPAGE_SIZE;
+	struct ibv_device **idevs, *idev;
+
+	addr = mmap(NULL, map_len, PROT_READ|PROT_WRITE, map_flags, fd, 0);
+	if (addr == MAP_FAILED) {
+		fprintf(stderr, "failed to map test file\n");
+		return -ENXIO;
+	}
+
+	idevs = ibv_get_device_list(&nr_devs);
+	if (!idevs || !nr_devs) {
+		fprintf(stderr, "ibverbs device not found\n");
+		goto err_dev;
+	}
+
+	idev = idevs[0];
+	ictx = ibv_open_device(idev);
+	if (ictx)
+		fprintf(stderr, "%s: opened dev: %s\n", __func__,
+				ibv_get_device_name(idev));
+	else {
+		fprintf(stderr, "%s: failed to open dev: %s\n", __func__,
+				ibv_get_device_name(idev));
+		goto err_open;
+	}
+
+	pd = ibv_alloc_pd(ictx);
+	if (!pd) {
+		fprintf(stderr, "%s: failed alloc_pd dev: %s\n", __func__,
+				ibv_get_device_name(idev));
+		goto err_pd;
+	}
+
+	mr = ibv_reg_mr(pd, addr, map_len, IBV_ACCESS_LOCAL_WRITE
+			| IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ);
+	if (!mr) {
+		fprintf(stderr, "%s: failed reg_mr dev: %s\n", __func__,
+				ibv_get_device_name(idev));
+		goto err_mr;
+	}
+
+	cq = ibv_create_cq(ictx, 1, NULL, NULL, 0);
+	if (!cq) {
+		fprintf(stderr, "%s: failed create_cq dev: %s\n", __func__,
+				ibv_get_device_name(idev));
+		goto err_cq;
+	}
+
+	qp = create_qp(pd, cq);
+	if (!cq) {
+		fprintf(stderr, "%s: failed create_qp dev: %s\n", __func__,
+				ibv_get_device_name(idev));
+		goto err_qp;
+	}
+
+	rc = post_recv(qp, mr, addr, map_len);
+	if (rc) {
+		fprintf(stderr, "%s: failed post_recv (%d) dev: %s\n", __func__,
+				rc, ibv_get_device_name(idev));
+		goto err_post_recv;
+	}
+
+	fprintf(stderr, "%s: successful post_recv dev: %s\n", __func__,
+				ibv_get_device_name(idev));
+	rc = 0;
+err_post_recv:
+	ibv_destroy_qp(qp);
+err_qp:
+	ibv_destroy_cq(cq);
+err_cq:
+	ibv_dereg_mr(mr);
+err_mr:
+	ibv_dealloc_pd(pd);
+err_pd:
+	ibv_close_device(ictx);
+err_open:
+	ibv_free_device_list(idevs);
+err_dev:
+	munmap(addr, map_len);
+	return rc;
+}
+
+static int test_rdma(int fd, int loglevel)
+{
+	int err, i;
+	struct ndctl_ctx *ctx;
+	unsigned long test_flags[] = {
+		MAP_SHARED,
+		MAP_SHARED_VALIDATE | MAP_DIRECT,
+	};
+
+	err = ndctl_new(&ctx);
+	if (err < 0)
+		return err;
+
+	ndctl_set_log_priority(ctx, loglevel);
+
+	for (i = 0; i < (int) ARRAY_SIZE(test_flags); i++) {
+		unsigned long map_flags = test_flags[i];
+
+		err = do_rdma(ctx, fd, map_flags);
+		switch (map_flags) {
+		case MAP_SHARED:
+			if (err == 0) {
+				fprintf(stderr, "expected failure map_flags: %#lx\n",
+						map_flags);
+				return EXIT_FAILURE;
+			}
+			break;
+		case (MAP_SHARED_VALIDATE | MAP_DIRECT):
+			if (err != 0) {
+				fprintf(stderr, "expected success map_flags: %#lx\n",
+						map_flags);
+				return EXIT_FAILURE;
+			}
+			break;
+		default:
+			fprintf(stderr, "unhandled test case\n");
+			return EXIT_FAILURE;
+		}
+	}
+
+	ndctl_unref(ctx);
+	return err;
+}
+
+int __attribute__((weak)) main(int argc, char *argv[])
+{
+	int rc, fd;
+
+	if (argc < 1)
+		return -EINVAL;
+
+	fd = open(argv[1], O_RDWR);
+	rc = test_rdma(fd, LOG_DEBUG);
+	if (fd >= 0)
+		close(fd);
+	return rc;
+}
diff --git a/test/rdma.sh b/test/rdma.sh
new file mode 100755
index 000000000000..3b486d7b1680
--- /dev/null
+++ b/test/rdma.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+# Copyright(c) 2015-2017 Intel Corporation. All rights reserved.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+
+MNT=test_dax_mnt
+FILE=image
+NDCTL="../ndctl/ndctl"
+json2var="s/[{}\",]//g; s/:/=/g"
+blockdev=""
+
+err() {
+	echo "test-rdma: failed at line $1"
+	if [ -n "$blockdev" ]; then
+		umount /dev/$blockdev
+	else
+		rc=77
+	fi
+	rmdir $MNT
+	exit $rc
+}
+
+set -e
+mkdir -p $MNT
+trap 'err $LINENO' ERR
+
+rxe_cfg stop
+rxe_cfg start
+if ! rxe_cfg status | grep -n rxe0; then
+	rxe_cfg add eth0
+fi
+
+dev=$(./dax-dev)
+json=$($NDCTL list -N -n $dev)
+eval $(echo $json | sed -e "$json2var")
+rc=1
+
+# TODO test with sparse file, and a file that needs to do unwritten
+# extent conversion
+mkfs.xfs -f /dev/$blockdev
+mount /dev/$blockdev $MNT -o dax
+dd if=/dev/zero of=$MNT/$FILE bs=1G count=1
+./rdma $MNT/$FILE
+umount $MNT
+
+exit 0

_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [ndctl PATCH] ndctl, test: rdma vs dax
  2017-10-07 15:14 [ndctl PATCH] ndctl, test: rdma vs dax Dan Williams
@ 2017-10-09  8:07 ` Johannes Thumshirn
  2017-10-09 15:45   ` Dan Williams
  0 siblings, 1 reply; 4+ messages in thread
From: Johannes Thumshirn @ 2017-10-09  8:07 UTC (permalink / raw)
  To: Dan Williams; +Cc: linux-nvdimm

On Sat, Oct 07, 2017 at 08:14:42AM -0700, Dan Williams wrote:
[...]

> +rxe_cfg stop
> +rxe_cfg start
> +if ! rxe_cfg status | grep -n rxe0; then
> +	rxe_cfg add eth0
> +fi

Can we maybe skip the dependency on rxe_cfg? All that is needed is modprobe
and echo. Also hard coding eth0 might be problematic in this case. This works
on your test-setup but surely isn't portable.

Byte,
	Johannes

-- 
Johannes Thumshirn                                          Storage
jthumshirn@suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [ndctl PATCH] ndctl, test: rdma vs dax
  2017-10-09  8:07 ` Johannes Thumshirn
@ 2017-10-09 15:45   ` Dan Williams
  2017-10-10  7:14     ` Johannes Thumshirn
  0 siblings, 1 reply; 4+ messages in thread
From: Dan Williams @ 2017-10-09 15:45 UTC (permalink / raw)
  To: Johannes Thumshirn; +Cc: linux-nvdimm

On Mon, Oct 9, 2017 at 1:07 AM, Johannes Thumshirn <jthumshirn@suse.de> wrote:
> On Sat, Oct 07, 2017 at 08:14:42AM -0700, Dan Williams wrote:
> [...]
>
>> +rxe_cfg stop
>> +rxe_cfg start
>> +if ! rxe_cfg status | grep -n rxe0; then
>> +     rxe_cfg add eth0
>> +fi
>
> Can we maybe skip the dependency on rxe_cfg? All that is needed is modprobe
> and echo.

Sure, I'll take a look.

> Also hard coding eth0 might be problematic in this case. This works
> on your test-setup but surely isn't portable.

Yes,  which is part of the reason I have this listed under the
"destructive" tests. Any advice on how to make it portable would be
appreciated.
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [ndctl PATCH] ndctl, test: rdma vs dax
  2017-10-09 15:45   ` Dan Williams
@ 2017-10-10  7:14     ` Johannes Thumshirn
  0 siblings, 0 replies; 4+ messages in thread
From: Johannes Thumshirn @ 2017-10-10  7:14 UTC (permalink / raw)
  To: Dan Williams; +Cc: linux-nvdimm

On Mon, Oct 09, 2017 at 08:45:41AM -0700, Dan Williams wrote:
> On Mon, Oct 9, 2017 at 1:07 AM, Johannes Thumshirn <jthumshirn@suse.de> wrote:
> > On Sat, Oct 07, 2017 at 08:14:42AM -0700, Dan Williams wrote:
> > [...]
> >
> >> +rxe_cfg stop
> >> +rxe_cfg start
> >> +if ! rxe_cfg status | grep -n rxe0; then
> >> +     rxe_cfg add eth0
> >> +fi
> >
> > Can we maybe skip the dependency on rxe_cfg? All that is needed is modprobe
> > and echo.
> 
> Sure, I'll take a look.

For my NVMe over Soft-RoCE test setup with Rapido [1] I used the following:

modprobe rdma-rxe
echo eth0 > /sys/module/rdma_rxe/parameters/add

> 
> > Also hard coding eth0 might be problematic in this case. This works
> > on your test-setup but surely isn't portable.
> 
> Yes,  which is part of the reason I have this listed under the
> "destructive" tests. Any advice on how to make it portable would be
> appreciated.

Maybe:
ETH=${ETH:-eth0}
echo $ETH > /sys/module/rdma_rxe/parameters/add

Byte,
	Johannes

[1] https://github.com/rapido-linux/rapido/blob/master/nvme_rdma_autorun.sh#L74

-- 
Johannes Thumshirn                                          Storage
jthumshirn@suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2017-10-10  7:10 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-07 15:14 [ndctl PATCH] ndctl, test: rdma vs dax Dan Williams
2017-10-09  8:07 ` Johannes Thumshirn
2017-10-09 15:45   ` Dan Williams
2017-10-10  7:14     ` Johannes Thumshirn

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).