All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dmitry Bogdanov <d.bogdanov@yadro.com>
To: Martin Petersen <martin.petersen@oracle.com>,
	<target-devel@vger.kernel.org>
Cc: <linux-scsi@vger.kernel.org>, <linux@yadro.com>,
	Dmitry Bogdanov <d.bogdanov@yadro.com>
Subject: [RFC PATCH 31/48] dlm_ckv: introduce DLM cluster key-value storage
Date: Thu, 18 Nov 2021 10:52:33 +0300	[thread overview]
Message-ID: <20220803162857.27770-32-d.bogdanov@yadro.com> (raw)
In-Reply-To: <20220803162857.27770-1-d.bogdanov@yadro.com>

Introduce the first version of DLM CKV module that could be used by
different kernel subsystems to share some information in a cluster.

This commit has just cluster level locks.

Signed-off-by: Dmitry Bogdanov <d.bogdanov@yadro.com>
---
 drivers/target/Kconfig   |   6 +
 drivers/target/Makefile  |   2 +
 drivers/target/dlm_ckv.c | 323 +++++++++++++++++++++++++++++++++++++++
 drivers/target/dlm_ckv.h |  19 +++
 4 files changed, 350 insertions(+)
 create mode 100644 drivers/target/dlm_ckv.c
 create mode 100644 drivers/target/dlm_ckv.h

diff --git a/drivers/target/Kconfig b/drivers/target/Kconfig
index 72171ea3dd53..75d5e1d23a1c 100644
--- a/drivers/target/Kconfig
+++ b/drivers/target/Kconfig
@@ -35,6 +35,12 @@ config TCM_PSCSI
 	Say Y here to enable the TCM/pSCSI subsystem plugin for non-buffered
 	passthrough access to Linux/SCSI device
 
+config DLM_CKV
+	tristate "Cluster key value storage over DLM"
+	depends on DLM
+	help
+	Say Y here to enable the cluster key value storage over DLM
+
 config TCM_USER2
 	tristate "TCM/USER Subsystem Plugin for Linux"
 	depends on UIO && NET
diff --git a/drivers/target/Makefile b/drivers/target/Makefile
index 45634747377e..8bc9ac2bd629 100644
--- a/drivers/target/Makefile
+++ b/drivers/target/Makefile
@@ -30,3 +30,5 @@ obj-$(CONFIG_LOOPBACK_TARGET)	+= loopback/
 obj-$(CONFIG_TCM_FC)		+= tcm_fc/
 obj-$(CONFIG_ISCSI_TARGET)	+= iscsi/
 obj-$(CONFIG_SBP_TARGET)	+= sbp/
+
+obj-$(CONFIG_DLM_CKV)			+= dlm_ckv.o
diff --git a/drivers/target/dlm_ckv.c b/drivers/target/dlm_ckv.c
new file mode 100644
index 000000000000..a2e1a191c433
--- /dev/null
+++ b/drivers/target/dlm_ckv.c
@@ -0,0 +1,323 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <asm-generic/errno-base.h>
+#include <linux/kthread.h>
+#include <linux/dlmconstants.h>
+#include <linux/mutex.h>
+#include <linux/dlm.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <target/target_core_base.h>
+#include "dlm_ckv.h"
+
+struct dlm_ckv_lksb {
+	struct dlm_lksb lksb;
+	struct completion compl;
+};
+
+struct dlm_ckv_lock {
+	struct dlm_ckv_bucket *bucket;
+	struct dlm_ckv_lksb lksb;
+	char name[DLM_RESNAME_MAXLEN];
+};
+
+struct dlm_ckv_bucket {
+	dlm_lockspace_t *ls;
+	struct kref refcount;
+	u32 local_nodeid;
+	u32 local_slotid;
+	size_t num_nodes;
+	int nodeid[64];
+	void *userarg;
+	struct completion sync_compl;
+};
+
+
+#define DLM_CKV_LVB_SIZE	256
+
+static void bucket_release(struct kref *ref);
+
+/* dlm calls before it does lock recovery */
+
+static void dlm_ckv_recover_prep(void *arg)
+{
+
+}
+
+/* dlm calls after recover_prep has been completed on all lockspace members;
+ * identifies slot/jid of failed member
+ */
+
+static void dlm_ckv_recover_slot(void *arg, struct dlm_slot *slot)
+{
+	pr_info("nodeid %d left the cluster\n", slot->nodeid);
+}
+
+/* dlm calls after recover_slot and after it completes lock recovery */
+
+static void dlm_ckv_recover_done(void *arg, struct dlm_slot *slots, int num_slots,
+			      int our_slot, uint32_t generation)
+{
+	struct dlm_ckv_bucket *bucket = arg;
+	int i;
+
+	for (i = 0; i < num_slots; i++) {
+		bucket->nodeid[i] = slots[i].nodeid;
+		if (slots[i].slot == our_slot)
+			bucket->local_nodeid = slots[i].nodeid;
+	}
+	bucket->local_slotid = our_slot;
+	bucket->num_nodes = num_slots;
+	complete(&bucket->sync_compl);
+}
+
+static const struct dlm_lockspace_ops dlm_ckv_lockspace_ops = {
+	.recover_prep = dlm_ckv_recover_prep,
+	.recover_slot = dlm_ckv_recover_slot,
+	.recover_done = dlm_ckv_recover_done,
+};
+
+static void dlm_ast(void *astarg)
+{
+	struct dlm_ckv_lksb *dlm_ckv_lksb = astarg;
+
+	complete(&dlm_ckv_lksb->compl);
+}
+
+/*
+ * dlm_ckv_cancel - Synchronously cancel a pending dlm_lock() operation
+ */
+static int dlm_ckv_cancel(dlm_lockspace_t *ls, struct dlm_ckv_lksb *lksb,
+			   int flags, const char *name)
+{
+	int res;
+
+	res = dlm_unlock(ls, lksb->lksb.sb_lkid,
+			      DLM_LKF_CANCEL | (flags & DLM_LKF_VALBLK),
+			      &lksb->lksb, lksb);
+	if (res < 0)
+		goto out;
+	res = wait_for_completion_timeout(&lksb->compl, 10 * HZ);
+
+out:
+	return res;
+}
+
+/**
+ * dlm_ckv_lock_wait - Wait until a DLM lock has been granted
+ * @ls:     DLM lock space.
+ * @mode:   DLM lock mode.
+ * @lksb:   DLM lock status block.
+ * @flags:  DLM flags.
+ * @name:   DLM lock name. Only required for non-conversion requests.
+ * @bast:   AST to be invoked in case this lock blocks another one.
+ */
+static int dlm_ckv_lock_wait(dlm_lockspace_t *ls, int mode,
+				struct dlm_ckv_lksb *lksb, int flags,
+				const char *name, void (*bast)(void *, int))
+{
+	int res;
+
+	res = dlm_lock(ls, mode, &lksb->lksb, flags,
+		       (void *)name, name ? strlen(name) : 0, 0,
+		       dlm_ast, lksb, bast);
+	if (res < 0)
+		goto out;
+	res = wait_for_completion_timeout(&lksb->compl, 60 * HZ);
+	if (res > 0)
+		res = lksb->lksb.sb_status;
+	else if (res == 0)
+		res = -ETIMEDOUT;
+	if (res < 0) {
+		int res2 = dlm_ckv_cancel(ls, lksb, flags, name);
+
+		if (res2 < 0)
+			pr_warn("canceling lock %s / %08x failed: %d\n",
+				name ? : "?", lksb->lksb.sb_lkid, res2);
+	}
+
+out:
+	return res;
+}
+
+/*
+ * dlm_ckv_unlock_wait - Release a DLM lock
+ */
+static int dlm_ckv_unlock_wait(dlm_lockspace_t *ls, struct dlm_ckv_lksb *lksb)
+{
+	int res;
+
+	res = dlm_unlock(ls, lksb->lksb.sb_lkid, 0, &lksb->lksb, lksb);
+	if (res < 0)
+		goto out;
+	res = wait_for_completion_timeout(&lksb->compl, 60 * HZ);
+	if (res > 0) {
+		res = lksb->lksb.sb_status;
+		if (res == -DLM_EUNLOCK || res == -DLM_ECANCEL)
+			res = 0;
+	} else if (res == 0) {
+		res = -ETIMEDOUT;
+	}
+
+out:
+	return res;
+}
+
+static void
+dlm_ckv_lock_init(struct dlm_ckv_lock *ckv_lock,
+		  struct dlm_ckv_bucket *bucket,
+		  const char *name)
+{
+	init_completion(&ckv_lock->lksb.compl);
+	strscpy(ckv_lock->name, name, DLM_RESNAME_MAXLEN);
+	ckv_lock->bucket = bucket;
+}
+
+struct dlm_ckv_lock *
+dlm_ckv_create_lock(struct dlm_ckv_bucket *bucket, const char *name)
+{
+	struct dlm_ckv_lock *ckv_lock;
+
+	ckv_lock = kzalloc(sizeof(struct dlm_ckv_lock), GFP_KERNEL);
+	if (!ckv_lock)
+		return NULL;
+
+	kref_get(&bucket->refcount);
+	dlm_ckv_lock_init(ckv_lock, bucket, name);
+
+	return ckv_lock;
+}
+EXPORT_SYMBOL(dlm_ckv_create_lock);
+
+void
+dlm_ckv_free_lock(struct dlm_ckv_lock *ckv_lock)
+{
+	struct dlm_ckv_bucket *bucket = ckv_lock->bucket;
+
+	kfree(ckv_lock);
+
+	kref_put(&bucket->refcount, bucket_release);
+}
+EXPORT_SYMBOL(dlm_ckv_free_lock);
+
+int
+dlm_ckv_lock_get(struct dlm_ckv_lock *ckv_lock)
+{
+	int res;
+
+	BUG_ON(!ckv_lock);
+
+	res = dlm_ckv_lock_wait(ckv_lock->bucket->ls, DLM_LOCK_EX,
+				&ckv_lock->lksb, 0, ckv_lock->name, NULL);
+
+	return res;
+}
+EXPORT_SYMBOL(dlm_ckv_lock_get);
+
+int
+dlm_ckv_lock_release(struct dlm_ckv_lock *ckv_lock)
+{
+	int res;
+
+	BUG_ON(!ckv_lock);
+
+	res = dlm_ckv_unlock_wait(ckv_lock->bucket->ls, &ckv_lock->lksb);
+
+	return res;
+}
+EXPORT_SYMBOL(dlm_ckv_lock_release);
+
+
+static void bucket_release(struct kref *ref)
+{
+	struct dlm_ckv_bucket *bucket = container_of(ref, struct dlm_ckv_bucket,
+						     refcount);
+	int res;
+
+	res = dlm_release_lockspace(bucket->ls, 2);
+	if (res)
+		pr_err("forcibly releasing lockspace failed: %d\n",
+		       res);
+
+	kfree(bucket);
+}
+
+struct dlm_ckv_bucket *
+dlm_ckv_open_bucket(const char *name, const char *cluster_name, void *userarg)
+{
+	struct dlm_ckv_bucket *bucket;
+	int name_len = strlen(name);
+	int ops_result;
+	int err;
+
+	if (!name)
+		return ERR_PTR(-EINVAL);
+
+	if (name_len > DLM_LOCKSPACE_LEN)
+		return ERR_PTR(-EINVAL);
+
+	bucket = kzalloc(sizeof(struct dlm_ckv_bucket), GFP_KERNEL);
+	kref_init(&bucket->refcount);
+
+	bucket->userarg = userarg;
+	init_completion(&bucket->sync_compl);
+
+	err = dlm_new_lockspace(name, cluster_name,
+				DLM_LSFL_FS | DLM_LSFL_NEWEXCL, DLM_CKV_LVB_SIZE,
+				&dlm_ckv_lockspace_ops, bucket, &ops_result,
+				&bucket->ls);
+	if (err) {
+		pr_err("dlm_new_lockspace error %d\n", err);
+		goto fail_free;
+	}
+
+	if (ops_result < 0) {
+		pr_err("dlm does not support ops callbacks\n");
+		err = -EOPNOTSUPP;
+		goto fail_free;
+	}
+
+	wait_for_completion_timeout(&bucket->sync_compl, 10 * HZ);
+	if (bucket->num_nodes == 0) {
+		pr_err("Cluster joining timed out\n");
+		goto fail_init;
+	}
+
+	return bucket;
+
+fail_init:
+	dlm_release_lockspace(bucket->ls, 2);
+fail_free:
+	kfree(bucket);
+
+	return NULL;
+}
+EXPORT_SYMBOL(dlm_ckv_open_bucket);
+
+int dlm_ckv_close_bucket(struct dlm_ckv_bucket *bucket)
+{
+	kref_put(&bucket->refcount, bucket_release);
+
+	return 0;
+}
+EXPORT_SYMBOL(dlm_ckv_close_bucket);
+
+static int __init dlm_ckv_module_init(void)
+{
+	return 0;
+}
+
+static void __exit dlm_ckv_module_exit(void)
+{
+
+}
+
+MODULE_DESCRIPTION("Cluster KV storage over DLM");
+MODULE_AUTHOR("Dmitry Bogdanov <d.bogdanov@yadro.com>");
+MODULE_LICENSE("GPL");
+
+module_init(dlm_ckv_module_init);
+module_exit(dlm_ckv_module_exit);
diff --git a/drivers/target/dlm_ckv.h b/drivers/target/dlm_ckv.h
new file mode 100644
index 000000000000..1a3f79e42bf6
--- /dev/null
+++ b/drivers/target/dlm_ckv.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef DLM_CKV_H
+#define DLM_CKV_H
+
+struct dlm_ckv_bucket;
+struct dlm_ckv_lock;
+
+struct dlm_ckv_bucket *dlm_ckv_open_bucket(const char *name,
+					   const char *cluster_name,
+					   void *userarg);
+int dlm_ckv_close_bucket(struct dlm_ckv_bucket *bucket);
+
+struct dlm_ckv_lock *
+dlm_ckv_create_lock(struct dlm_ckv_bucket *bucket, const char *name);
+void dlm_ckv_free_lock(struct dlm_ckv_lock *ckv_lock);
+int dlm_ckv_lock_get(struct dlm_ckv_lock *ckv_lock);
+int dlm_ckv_lock_release(struct dlm_ckv_lock *ckv_lock);
+
+#endif
-- 
2.25.1


  parent reply	other threads:[~2022-08-03 16:30 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-08-03 16:04 [RFC PATCH 00/48] Target cluster implementation over DLM Dmitry Bogdanov
2019-12-21  0:53 ` [RFC PATCH 07/48] scsi: target/core: Add common port attributes Dmitry Bogdanov
2019-12-21 23:45 ` [RFC PATCH 05/48] scsi: target/core: Use RTPI from target port Dmitry Bogdanov
2019-12-21 23:49 ` [RFC PATCH 06/48] scsi: target/core: Drop device-based RTPI Dmitry Bogdanov
2020-04-04 10:48 ` [RFC PATCH 01/48] scsi: target/core: Add a way to hide a port group Dmitry Bogdanov
2020-04-20  0:18 ` [RFC PATCH 02/48] scsi: target/core: Set MULTIP bit for se_device with multiple ports Dmitry Bogdanov
2020-04-20 17:20 ` [RFC PATCH 03/48] scsi: target/core: Add cleanup sequence in core_tpg_register() Dmitry Bogdanov
2020-04-20 17:57 ` [RFC PATCH 04/48] scsi: target/core: Add RTPI field to target port Dmitry Bogdanov
2020-04-21 14:00 ` [RFC PATCH 08/48] scsi: target/core: Add RTPI attribute for " Dmitry Bogdanov
2020-04-30 14:16 ` [RFC PATCH 10/48] scsi: target/core: Unlock PR generation bump Dmitry Bogdanov
2021-02-25 12:18 ` [RFC PATCH 09/48] target: core: check RTPI uniquity for enabled TPG Dmitry Bogdanov
2021-11-17 11:12 ` [RFC PATCH 36/48] target: cluster: introduce cluster ops Dmitry Bogdanov
2021-11-18  7:52 ` Dmitry Bogdanov [this message]
2021-11-22 17:07 ` [RFC PATCH 32/48] dlm_ckv: add notification service Dmitry Bogdanov
2021-11-22 17:12 ` [RFC PATCH 33/48] dlm_ckv: add key-value storage service Dmitry Bogdanov
2021-11-29  9:00 ` [RFC PATCH 38/48] target: cluster: store PR data in DLM cluster Dmitry Bogdanov
2021-12-01 15:42 ` [RFC PATCH 15/48] target: core: remove unused variable in se_dev_entry Dmitry Bogdanov
2021-12-06 10:56 ` [RFC PATCH 14/48] target: core: new key must be used for moved PR Dmitry Bogdanov
2021-12-06 13:39 ` [RFC PATCH 39/48] target: cluster: read PR data from cluster Dmitry Bogdanov
2021-12-07  9:47 ` [RFC PATCH 35/48] target: add virtual remote target Dmitry Bogdanov
2021-12-10 12:43 ` [RFC PATCH 17/48] target: core: make some functions public Dmitry Bogdanov
2021-12-13 18:58 ` [RFC PATCH 18/48] target: core: proper clear reservation on LUN RESET Dmitry Bogdanov
2021-12-13 19:15 ` [RFC PATCH 19/48] target: core: remove superfluous checks Dmitry Bogdanov
2021-12-13 19:20 ` [RFC PATCH 20/48] target: core: proper check of SCSI-2 reservation Dmitry Bogdanov
2021-12-13 19:28 ` [RFC PATCH 21/48] target: core: checks against peer node SCSI2 reservation Dmitry Bogdanov
2021-12-16 10:20 ` [RFC PATCH 42/48] target: cluster: sync SPC-2 reservations Dmitry Bogdanov
2021-12-17  9:27 ` [RFC PATCH 41/48] target: cluster: sync-up PR data on cluster join Dmitry Bogdanov
2021-12-22 12:38 ` [RFC PATCH 34/48] dlm_ckv: add KV get/set async API Dmitry Bogdanov
2021-12-24  9:45 ` [RFC PATCH 16/48] target: core: undepend PR registrant of nacl Dmitry Bogdanov
2021-12-24  9:52 ` [RFC PATCH 40/48] target: cluster: sync PR for dynamic acls Dmitry Bogdanov
2022-02-28 10:37 ` [RFC PATCH 43/48] target: cluster: allocate UAs on PR sync Dmitry Bogdanov
2022-03-02  7:13 ` [RFC PATCH 12/48] target: core: fix memory leak in preempt_and_abort Dmitry Bogdanov
2022-03-04 10:38 ` [RFC PATCH 11/48] target: core: fix preempt and abort for allreg res Dmitry Bogdanov
2022-03-04 10:44 ` [RFC PATCH 13/48] target: core: abort all preempted regs if requested Dmitry Bogdanov
2022-03-04 12:35 ` [RFC PATCH 44/48] target: cluster: support PR OUT preempt and abort Dmitry Bogdanov
2022-03-11  8:11 ` [RFC PATCH 22/48] target: core: UA on all luns after reset Dmitry Bogdanov
2022-03-11  8:33 ` [RFC PATCH 23/48] target: core: refactor LUN_RESET code Dmitry Bogdanov
2022-03-11  8:40 ` [RFC PATCH 45/48] target: cluster: add reset cluster function Dmitry Bogdanov
2022-03-11  9:30 ` [RFC PATCH 46/48] target: cluster: implement LUN reset in DLM cluster Dmitry Bogdanov
2022-03-25  6:35 ` [RFC PATCH 47/48] target: cluster: split cluster sync function Dmitry Bogdanov
2022-03-25  7:58 ` [RFC PATCH 48/48] target: cluster: request data on initial sync Dmitry Bogdanov
2022-06-21 13:05 ` [RFC PATCH 24/48] target: core: pr: use RTPI in APTPL Dmitry Bogdanov
2022-06-22 10:25 ` [RFC PATCH 26/48] target: core: pr: remove se_tpg from pr_reg Dmitry Bogdanov
2022-06-29  7:50 ` [RFC PATCH 27/48] target: core: fix parsing PR OUT TID Dmitry Bogdanov
2022-06-29 10:06 ` [RFC PATCH 28/48] target: core: add function to compare TransportID Dmitry Bogdanov
2022-06-30 11:11 ` [RFC PATCH 29/48] target: core: store proto_id in APTPL Dmitry Bogdanov
2022-07-01  7:26 ` [RFC PATCH 30/48] target: core: rethink APTPL registrations Dmitry Bogdanov
2022-07-22 14:59 ` [RFC PATCH 37/48] target: cluster: introduce dlm cluster Dmitry Bogdanov
2022-07-27 16:21 ` [RFC PATCH 25/48] target: core: pr: have Transport ID stored Dmitry Bogdanov
2022-08-03 17:36 ` [RFC PATCH 00/48] Target cluster implementation over DLM Mike Christie
2022-08-04 11:01   ` Dmitry Bogdanov
2022-08-05 20:01 [RFC PATCH 27/48] target: core: fix parsing PR OUT TID kernel test robot
2022-08-08 10:31 ` Dan Carpenter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220803162857.27770-32-d.bogdanov@yadro.com \
    --to=d.bogdanov@yadro.com \
    --cc=linux-scsi@vger.kernel.org \
    --cc=linux@yadro.com \
    --cc=martin.petersen@oracle.com \
    --cc=target-devel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.