All of lore.kernel.org
 help / color / mirror / Atom feed
From: kaike.wan@intel.com
To: dledford@redhat.com, jgg@nvidia.com
Cc: linux-rdma@vger.kernel.org, todd.rimmer@intel.com,
	Kaike Wan <kaike.wan@intel.com>
Subject: [PATCH RFC 3/9] RDMA/rv: Add the rv module
Date: Fri, 19 Mar 2021 08:56:29 -0400	[thread overview]
Message-ID: <20210319125635.34492-4-kaike.wan@intel.com> (raw)
In-Reply-To: <20210319125635.34492-1-kaike.wan@intel.com>

From: Kaike Wan <kaike.wan@intel.com>

Add the rv module, the Makefile, and Kconfig file.

Also add the functions to manage IB devices.

Signed-off-by: Todd Rimmer <todd.rimmer@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
---
 MAINTAINERS                           |   6 +
 drivers/infiniband/Kconfig            |   1 +
 drivers/infiniband/ulp/Makefile       |   1 +
 drivers/infiniband/ulp/rv/Kconfig     |  11 ++
 drivers/infiniband/ulp/rv/Makefile    |   9 +
 drivers/infiniband/ulp/rv/rv_main.c   | 266 ++++++++++++++++++++++++++
 drivers/infiniband/ulp/rv/trace.c     |   7 +
 drivers/infiniband/ulp/rv/trace.h     |   5 +
 drivers/infiniband/ulp/rv/trace_dev.h |  82 ++++++++
 9 files changed, 388 insertions(+)
 create mode 100644 drivers/infiniband/ulp/rv/Kconfig
 create mode 100644 drivers/infiniband/ulp/rv/Makefile
 create mode 100644 drivers/infiniband/ulp/rv/rv_main.c
 create mode 100644 drivers/infiniband/ulp/rv/trace.c
 create mode 100644 drivers/infiniband/ulp/rv/trace.h
 create mode 100644 drivers/infiniband/ulp/rv/trace_dev.h

diff --git a/MAINTAINERS b/MAINTAINERS
index d92f85ca831d..ba50affec9bc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15547,6 +15547,12 @@ L:	linux-rdma@vger.kernel.org
 S:	Maintained
 F:	drivers/infiniband/ulp/rtrs/
 
+RV DRIVER
+M:	Kaike Wan <kaike.wan@intel.com>
+L:	linux-rdma@vger.kernel.org
+S:	Supported
+F:	drivers/infiniband/ulp/rv
+
 RXRPC SOCKETS (AF_RXRPC)
 M:	David Howells <dhowells@redhat.com>
 L:	linux-afs@lists.infradead.org
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 04a78d9f8fe3..5086164c836f 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -107,5 +107,6 @@ source "drivers/infiniband/ulp/isert/Kconfig"
 source "drivers/infiniband/ulp/rtrs/Kconfig"
 
 source "drivers/infiniband/ulp/opa_vnic/Kconfig"
+source "drivers/infiniband/ulp/rv/Kconfig"
 
 endif # INFINIBAND
diff --git a/drivers/infiniband/ulp/Makefile b/drivers/infiniband/ulp/Makefile
index 4d0004b58377..f925deb9241c 100644
--- a/drivers/infiniband/ulp/Makefile
+++ b/drivers/infiniband/ulp/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_INFINIBAND_ISER)		+= iser/
 obj-$(CONFIG_INFINIBAND_ISERT)		+= isert/
 obj-$(CONFIG_INFINIBAND_OPA_VNIC)	+= opa_vnic/
 obj-$(CONFIG_INFINIBAND_RTRS)		+= rtrs/
+obj-$(CONFIG_INFINIBAND_RV)		+= rv/
diff --git a/drivers/infiniband/ulp/rv/Kconfig b/drivers/infiniband/ulp/rv/Kconfig
new file mode 100644
index 000000000000..32a0523ff8ce
--- /dev/null
+++ b/drivers/infiniband/ulp/rv/Kconfig
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+#
+# Copyright(c) 2020 - 2021 Intel Corporation.
+#
+config INFINIBAND_RV
+	tristate "InfiniBand Rendezvous Module"
+	depends on X86_64 && INFINIBAND
+	help
+	  The rendezvous module provides mechanisms for HPC middlewares
+	  to cache memory region registration, to manage connections
+	  between nodes, and improve the scability of RDMA transactions.
diff --git a/drivers/infiniband/ulp/rv/Makefile b/drivers/infiniband/ulp/rv/Makefile
new file mode 100644
index 000000000000..07a7a7dd9c3b
--- /dev/null
+++ b/drivers/infiniband/ulp/rv/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+#
+# Copyright(c) 2020 - 2021 Intel Corporation.
+#
+obj-$(CONFIG_INFINIBAND_RV) += rv.o
+
+rv-y := rv_main.o trace.o
+
+CFLAGS_trace.o = -I$(src)
diff --git a/drivers/infiniband/ulp/rv/rv_main.c b/drivers/infiniband/ulp/rv/rv_main.c
new file mode 100644
index 000000000000..7f81f97a01f0
--- /dev/null
+++ b/drivers/infiniband/ulp/rv/rv_main.c
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2020 - 2021 Intel Corporation.
+ */
+
+/* This file contains the base of the rendezvous RDMA driver */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/parser.h>
+
+#include <rdma/ib_user_sa.h>
+
+#include "rv.h"
+#include "trace.h"
+
+MODULE_AUTHOR("Kaike Wan");
+MODULE_DESCRIPTION("Rendezvous Module");
+MODULE_LICENSE("Dual BSD/GPL");
+
+static int rv_add_one(struct ib_device *device);
+static void rv_remove_one(struct ib_device *device, void *client_data);
+static void rv_rename_dev(struct ib_device *device, void *client_data);
+
+static struct ib_client rv_client = {
+	.name = "rv",
+	.add = rv_add_one,
+	.remove = rv_remove_one,
+	.rename = rv_rename_dev
+};
+
+static struct list_head rv_dev_list;	/* list of rv_device */
+static spinlock_t rv_dev_list_lock;
+
+/* get a device reference and add an rv_user to rv_device.user_list */
+struct rv_device *rv_device_get_add_user(char *dev_name, struct rv_user *rv)
+{
+	struct rv_device *dev;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rv_dev_list_lock, flags);
+	list_for_each_entry(dev, &rv_dev_list, dev_entry) {
+		if (strcmp(dev->ib_dev->name, dev_name) == 0) {
+			if (!kref_get_unless_zero(&dev->kref))
+				continue; /* skip, going away */
+			list_add_tail(&rv->user_entry, &dev->user_list);
+			spin_unlock_irqrestore(&rv_dev_list_lock, flags);
+			trace_rv_dev_get(dev_name, kref_read(&dev->kref));
+			return dev;
+		}
+	}
+	spin_unlock_irqrestore(&rv_dev_list_lock, flags);
+	rv_err(RV_INVALID, "Could not find IB dev %s\n", dev_name);
+	return NULL;
+}
+
+static void rv_device_release(struct kref *kref)
+{
+	struct rv_device *dev = container_of(kref, struct rv_device, kref);
+
+	ib_unregister_event_handler(&dev->event_handler); /* may need sooner */
+	kfree(dev);
+}
+
+void rv_device_get(struct rv_device *dev)
+{
+	kref_get(&dev->kref);
+}
+
+void rv_device_put(struct rv_device *dev)
+{
+	trace_rv_dev_put(dev->ib_dev ? dev->ib_dev->name : "nil",
+			 kref_read(&dev->kref));
+	kref_put(&dev->kref, rv_device_release);
+}
+
+/*
+ * Remove a rv_user from rv_device.user_list
+ *
+ * @rv - The rv_user to remove
+ *
+ * Return:
+ *   0 - The rv_user is in rv_device.user_list and removed;
+ *   1 - The rv_user is already not in rv_device.user_list.
+ */
+int rv_device_del_user(struct rv_user *rv)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&rv_dev_list_lock, flags);
+	if (list_empty(&rv->user_entry))
+		ret = 1;
+	else
+		list_del_init(&rv->user_entry);
+	spin_unlock_irqrestore(&rv_dev_list_lock, flags);
+
+	return ret;
+}
+
+/* verbs device level async events */
+static void rv_device_event_handler(struct ib_event_handler *handler,
+				    struct ib_event *event)
+{
+	struct rv_device *dev;
+
+	dev = ib_get_client_data(event->device, &rv_client);
+	if (!dev || dev->ib_dev != event->device)
+		return;
+
+	trace_rv_device_event(dev->ib_dev->name, ib_event_msg(event->event));
+	switch (event->event) {
+	case IB_EVENT_DEVICE_FATAL:
+	case IB_EVENT_PORT_ERR:
+	case IB_EVENT_PORT_ACTIVE:
+	case IB_EVENT_LID_CHANGE:
+	case IB_EVENT_PKEY_CHANGE:
+	case IB_EVENT_SM_CHANGE:
+	case IB_EVENT_CLIENT_REREGISTER:
+	case IB_EVENT_GID_CHANGE:
+	default:
+		break;
+	}
+}
+
+static int rv_add_one(struct ib_device *device)
+{
+	struct rv_device *dev;
+	unsigned long flags;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+	dev->ib_dev = device;
+	kref_init(&dev->kref);
+	mutex_init(&dev->listener_mutex);
+	spin_lock_init(&dev->listener_lock);
+	INIT_LIST_HEAD(&dev->listener_list);
+	INIT_LIST_HEAD(&dev->user_list);
+	spin_lock_irqsave(&rv_dev_list_lock, flags);
+	list_add(&dev->dev_entry, &rv_dev_list);
+	spin_unlock_irqrestore(&rv_dev_list_lock, flags);
+	trace_rv_dev_add(device->name, kref_read(&dev->kref));
+	ib_set_client_data(device, &rv_client, dev);
+
+	INIT_IB_EVENT_HANDLER(&dev->event_handler, device,
+			      rv_device_event_handler);
+	ib_register_event_handler(&dev->event_handler);
+
+	return 0;
+}
+
+/*
+ * Called on device removal, gets users off the device
+ *
+ * At the same time, applications will get device async events which should
+ * trigger them to start user space cleanup and close.
+ *
+ * We remove the rv_user from the user_list so that the user application knows
+ * that the remove_one handler is cleaning up this rv_user. After this,
+ * the rv->user_entry itself is an empty list, an indicator that the
+ * remove_one handler owns this rv_user.
+ *
+ * To comply with lock heirarchy, we must release rv_dev_list_lock so
+ * rv_detach_user can get rv->mutex.  The empty rv->user_entry will prevent
+ * a race with rv_user starting its own detach.
+ */
+static void rv_device_detach_users(struct rv_device *dev)
+{
+	unsigned long flags;
+	struct rv_user *rv;
+
+	spin_lock_irqsave(&rv_dev_list_lock, flags);
+	while (!list_empty(&dev->user_list)) {
+		rv = list_first_entry(&dev->user_list, struct rv_user,
+				      user_entry);
+		list_del_init(&rv->user_entry);
+
+		spin_unlock_irqrestore(&rv_dev_list_lock, flags);
+		/* Detach user here */
+		spin_lock_irqsave(&rv_dev_list_lock, flags);
+	}
+	spin_unlock_irqrestore(&rv_dev_list_lock, flags);
+}
+
+/*
+ * device removal handler
+ *
+ * we allow a wait_time of 2 seconds for applications to cleanup themselves
+ * and close.  Typically they will get an async event and react quickly.
+ * After which we begin forcibly removing the remaining users and
+ * then wait for the internal references to get releaseed by their callbacks
+ */
+static void rv_remove_one(struct ib_device *device, void *client_data)
+{
+	struct rv_device *dev = client_data;
+	unsigned long flags;
+	unsigned long wait_time = 2000; /* 2 seconds */
+	unsigned long sleep_time = msecs_to_jiffies(100);
+	unsigned long end;
+
+	trace_rv_dev_remove(device->name, kref_read(&dev->kref));
+	spin_lock_irqsave(&rv_dev_list_lock, flags);
+	list_del(&dev->dev_entry);
+	spin_unlock_irqrestore(&rv_dev_list_lock, flags);
+
+	end = jiffies + msecs_to_jiffies(wait_time);
+	while (time_before(jiffies, end) && !list_empty(&dev->user_list))
+		schedule_timeout_interruptible(sleep_time);
+
+	rv_device_detach_users(dev);
+
+	while (kref_read(&dev->kref) > 1)
+		schedule_timeout_interruptible(sleep_time);
+
+	rv_device_put(dev);
+}
+
+static void rv_rename_dev(struct ib_device *device, void *client_data)
+{
+}
+
+static void rv_init_devices(void)
+{
+	spin_lock_init(&rv_dev_list_lock);
+	INIT_LIST_HEAD(&rv_dev_list);
+}
+
+/* uses syncrhnoize_rcu to ensure previous kfree_rcu of references are done */
+static void rv_deinit_devices(void)
+{
+	struct rv_device *dev, *temp;
+	unsigned long flags;
+
+	synchronize_rcu();
+	spin_lock_irqsave(&rv_dev_list_lock, flags);
+	list_for_each_entry_safe(dev, temp, &rv_dev_list, dev_entry) {
+		list_del(&dev->dev_entry);
+		rv_device_put(dev);
+	}
+	spin_unlock_irqrestore(&rv_dev_list_lock, flags);
+}
+
+static int __init rv_init_module(void)
+{
+	pr_info("Loading rendezvous module");
+
+	rv_init_devices();
+
+	if (ib_register_client(&rv_client)) {
+		rv_err(RV_INVALID, "Failed to register with the IB core\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void __exit rv_cleanup_module(void)
+{
+	ib_unregister_client(&rv_client);
+	rv_deinit_devices();
+}
+
+module_init(rv_init_module);
+module_exit(rv_cleanup_module);
diff --git a/drivers/infiniband/ulp/rv/trace.c b/drivers/infiniband/ulp/rv/trace.c
new file mode 100644
index 000000000000..b27536056c60
--- /dev/null
+++ b/drivers/infiniband/ulp/rv/trace.c
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2020 - 2021 Intel Corporation.
+ */
+#define CREATE_TRACE_POINTS
+#include <rdma/rv_user_ioctls.h>
+#include "trace.h"
diff --git a/drivers/infiniband/ulp/rv/trace.h b/drivers/infiniband/ulp/rv/trace.h
new file mode 100644
index 000000000000..cb1d1d087e16
--- /dev/null
+++ b/drivers/infiniband/ulp/rv/trace.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2020 - 2021 Intel Corporation.
+ */
+#include "trace_dev.h"
diff --git a/drivers/infiniband/ulp/rv/trace_dev.h b/drivers/infiniband/ulp/rv/trace_dev.h
new file mode 100644
index 000000000000..2bfc6b07d518
--- /dev/null
+++ b/drivers/infiniband/ulp/rv/trace_dev.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2020 - 2021 Intel Corporation.
+ */
+#if !defined(__RV_TRACE_DEV_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __RV_TRACE_DEV_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rv_dev
+
+DECLARE_EVENT_CLASS(/* dev */
+	rv_dev_template,
+	TP_PROTO(const char *dev_name, u32 refcount),
+	TP_ARGS(dev_name, refcount),
+	TP_STRUCT__entry(/* entry */
+		__string(name, dev_name)
+		__field(u32, refcount)
+	),
+	TP_fast_assign(/* assign */
+		__assign_str(name, dev_name);
+		__entry->refcount = refcount;
+	),
+	TP_printk(/* print */
+		"name %s, refcount %u",
+		__get_str(name),
+		__entry->refcount
+	)
+);
+
+DEFINE_EVENT(/* event */
+	rv_dev_template, rv_dev_add,
+	TP_PROTO(const char *dev_name, u32 refcount),
+	TP_ARGS(dev_name, refcount)
+);
+
+DEFINE_EVENT(/* event */
+	rv_dev_template, rv_dev_remove,
+	TP_PROTO(const char *dev_name, u32 refcount),
+	TP_ARGS(dev_name, refcount)
+);
+
+DEFINE_EVENT(/* event */
+	rv_dev_template, rv_dev_get,
+	TP_PROTO(const char *dev_name, u32 refcount),
+	TP_ARGS(dev_name, refcount)
+);
+
+DEFINE_EVENT(/* event */
+	rv_dev_template, rv_dev_put,
+	TP_PROTO(const char *dev_name, u32 refcount),
+	TP_ARGS(dev_name, refcount)
+);
+
+TRACE_EVENT(/* event */
+	rv_device_event,
+	TP_PROTO(const char *dev_name, const char *evt_name),
+	TP_ARGS(dev_name, evt_name),
+	TP_STRUCT__entry(/* entry */
+		__string(device, dev_name)
+		__string(event, evt_name)
+	),
+	TP_fast_assign(/* assign */
+		__assign_str(device, dev_name);
+		__assign_str(event, evt_name);
+	),
+	TP_printk(/* print */
+		"Device %s Event %s",
+		__get_str(device),
+		__get_str(event)
+	)
+);
+
+#endif /* __RV_TRACE_DEV_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_dev
+#include <trace/define_trace.h>
-- 
2.18.1


  parent reply	other threads:[~2021-03-19 12:57 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-19 12:56 [PATCH RFC 0/9] A rendezvous module kaike.wan
2021-03-19 12:56 ` [PATCH RFC 1/9] RDMA/rv: Public interferce for the RDMA Rendezvous module kaike.wan
2021-03-19 16:00   ` Jason Gunthorpe
2021-03-19 18:42   ` kernel test robot
2021-03-19 12:56 ` [PATCH RFC 2/9] RDMA/rv: Add the internal header files kaike.wan
2021-03-19 16:02   ` Jason Gunthorpe
2021-03-19 12:56 ` kaike.wan [this message]
2021-03-19 12:56 ` [PATCH RFC 4/9] RDMA/rv: Add functions for memory region cache kaike.wan
2021-03-19 12:56 ` [PATCH RFC 5/9] RDMA/rv: Add function to register/deregister memory region kaike.wan
2021-03-19 12:56 ` [PATCH RFC 6/9] RDMA/rv: Add connection management functions kaike.wan
2021-03-19 12:56 ` [PATCH RFC 7/9] RDMA/rv: Add functions for RDMA transactions kaike.wan
2021-03-19 12:56 ` [PATCH RFC 8/9] RDMA/rv: Add functions for file operations kaike.wan
2021-03-19 12:56 ` [PATCH RFC 9/9] RDMA/rv: Integrate the file operations into the rv module kaike.wan
2021-03-19 13:53 ` [PATCH RFC 0/9] A rendezvous module Jason Gunthorpe
2021-03-19 14:49   ` Wan, Kaike
2021-03-19 15:48     ` Jason Gunthorpe
2021-03-19 19:22       ` Dennis Dalessandro
2021-03-19 19:44         ` Jason Gunthorpe
2021-03-19 20:12           ` Rimmer, Todd
2021-03-19 20:26             ` Jason Gunthorpe
2021-03-19 20:46               ` Rimmer, Todd
2021-03-19 20:54                 ` Jason Gunthorpe
2021-03-19 20:59                   ` Wan, Kaike
2021-03-19 21:28                     ` Dennis Dalessandro
2021-03-19 21:58                       ` Wan, Kaike
2021-03-19 22:35                         ` Jason Gunthorpe
2021-03-19 22:57                       ` Rimmer, Todd
2021-03-19 23:06                         ` Jason Gunthorpe
2021-03-20 16:39                         ` Dennis Dalessandro
2021-03-21  8:56                           ` Leon Romanovsky
2021-03-21 16:24                             ` Dennis Dalessandro
2021-03-21 16:45                               ` Jason Gunthorpe
2021-03-21 17:21                                 ` Dennis Dalessandro
2021-03-21 18:08                                   ` Jason Gunthorpe
2021-03-22 15:17                                     ` Rimmer, Todd
2021-03-22 16:47                                       ` Jason Gunthorpe
2021-03-22 17:31                                     ` Hefty, Sean
2021-03-23 22:56                                       ` Jason Gunthorpe
2021-03-23 23:29                                         ` Rimmer, Todd
2021-03-21 19:19                                   ` Wan, Kaike
2021-03-23 15:36                                   ` Christoph Hellwig
2021-03-23 15:35                                 ` Christoph Hellwig
2021-03-23 15:33                               ` Christoph Hellwig
2021-03-23 15:30                         ` Christoph Hellwig
2021-03-23 15:46                           ` Jason Gunthorpe
2021-03-23 16:07                             ` Christoph Hellwig
2021-03-23 17:25                               ` Rimmer, Todd
2021-03-23 17:44                                 ` Jason Gunthorpe
2021-03-19 20:18           ` Dennis Dalessandro
2021-03-19 20:30             ` Jason Gunthorpe
2021-03-19 20:34       ` Hefty, Sean
2021-03-21 12:08         ` Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210319125635.34492-4-kaike.wan@intel.com \
    --to=kaike.wan@intel.com \
    --cc=dledford@redhat.com \
    --cc=jgg@nvidia.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=todd.rimmer@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.