linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Dongsheng Yang <dongsheng.yang@easystack.cn>
To: dan.j.williams@intel.com, axboe@kernel.dk
Cc: linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-cxl@vger.kernel.org,
	Dongsheng Yang <dongsheng.yang.linux@gmail.com>
Subject: [PATCH 1/7] block: Init for CBD(CXL Block Device)
Date: Mon, 22 Apr 2024 07:16:00 +0000	[thread overview]
Message-ID: <20240422071606.52637-2-dongsheng.yang@easystack.cn> (raw)
In-Reply-To: <20240422071606.52637-1-dongsheng.yang@easystack.cn>

From: Dongsheng Yang <dongsheng.yang.linux@gmail.com>

As shared memory is supported in CXL3.0 spec, we can transfer data
via CXL shared memory. CBD means CXL block device, it use CXL shared memory
to transfer command and data to access block device in different
host, as shown below:

┌───────────────────────────────┐                               ┌────────────────────────────────────┐
│          node-1               │                               │              node-2                │
├───────────────────────────────┤                               ├────────────────────────────────────┤
│                               │                               │                                    │
│                       ┌───────┤                               ├─────────┐                          │
│                       │ cbd0  │                               │ backend0├──────────────────┐       │
│                       ├───────┤                               ├─────────┤                  │       │
│                       │ pmem0 │                               │ pmem0   │                  ▼       │
│               ┌───────┴───────┤                               ├─────────┴────┐     ┌───────────────┤
│               │    cxl driver │                               │ cxl driver   │     │ /dev/sda      │
└───────────────┴────────┬──────┘                               └─────┬────────┴─────┴───────────────┘
                         │                                            │
                         │                                            │
                         │        CXL                         CXL     │
                         └────────────────┐               ┌───────────┘
                                          │               │
                                          │               │
                                          │               │
                                      ┌───┴───────────────┴────---------------─┐
                                      │   shared memory device(cbd transport)  │
                                      └──────────────────────---------------───┘

any read/write to cbd0 on node-1 will be transferred to node-2 /dev/sda. It works similar with
nbd (network block device), but it transfer data via CXL shared memory rather than network.

Signed-off-by: Dongsheng Yang <dongsheng.yang.linux@gmail.com>
---
 drivers/block/Kconfig            |   2 +
 drivers/block/Makefile           |   2 +
 drivers/block/cbd/Kconfig        |   4 +
 drivers/block/cbd/Makefile       |   3 +
 drivers/block/cbd/cbd_internal.h | 830 +++++++++++++++++++++++++++++++
 drivers/block/cbd/cbd_main.c     | 216 ++++++++
 6 files changed, 1057 insertions(+)
 create mode 100644 drivers/block/cbd/Kconfig
 create mode 100644 drivers/block/cbd/Makefile
 create mode 100644 drivers/block/cbd/cbd_internal.h
 create mode 100644 drivers/block/cbd/cbd_main.c

diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 5b9d4aaebb81..1f6376828af9 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -219,6 +219,8 @@ config BLK_DEV_NBD
 
 	  If unsure, say N.
 
+source "drivers/block/cbd/Kconfig"
+
 config BLK_DEV_RAM
 	tristate "RAM block device support"
 	help
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 101612cba303..8be2a39f5a7c 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -39,4 +39,6 @@ obj-$(CONFIG_BLK_DEV_NULL_BLK)	+= null_blk/
 
 obj-$(CONFIG_BLK_DEV_UBLK)			+= ublk_drv.o
 
+obj-$(CONFIG_BLK_DEV_CBD)	+= cbd/
+
 swim_mod-y	:= swim.o swim_asm.o
diff --git a/drivers/block/cbd/Kconfig b/drivers/block/cbd/Kconfig
new file mode 100644
index 000000000000..98b2cbcdf895
--- /dev/null
+++ b/drivers/block/cbd/Kconfig
@@ -0,0 +1,4 @@
+config BLK_DEV_CBD
+	tristate "CXL Block Device"
+	help
+	  If unsure say 'm'.
diff --git a/drivers/block/cbd/Makefile b/drivers/block/cbd/Makefile
new file mode 100644
index 000000000000..2765325486a2
--- /dev/null
+++ b/drivers/block/cbd/Makefile
@@ -0,0 +1,3 @@
+cbd-y := cbd_main.o
+
+obj-$(CONFIG_BLK_DEV_CBD) += cbd.o
diff --git a/drivers/block/cbd/cbd_internal.h b/drivers/block/cbd/cbd_internal.h
new file mode 100644
index 000000000000..7d9bf5b1c70d
--- /dev/null
+++ b/drivers/block/cbd/cbd_internal.h
@@ -0,0 +1,830 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _CBD_INTERNAL_H
+#define _CBD_INTERNAL_H
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/blk-mq.h>
+#include <asm/byteorder.h>
+#include <asm/types.h>
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/dax.h>
+#include <linux/blkdev.h>
+#include <linux/slab.h>
+#include <linux/parser.h>
+#include <linux/idr.h>
+#include <linux/workqueue.h>
+#include <linux/uuid.h>
+#include <linux/bitfield.h>
+
+/*
+ * As shared memory is supported in CXL3.0 spec, we can transfer data via CXL shared memory.
+ * CBD means CXL block device, it use CXL shared memory to transport command and data to
+ * access block device in different host, as shown below:
+ *
+ *    ┌───────────────────────────────┐                               ┌────────────────────────────────────┐
+ *    │          node-1               │                               │              node-2                │
+ *    ├───────────────────────────────┤                               ├────────────────────────────────────┤
+ *    │                               │                               │                                    │
+ *    │                       ┌───────┤                               ├─────────┐                          │
+ *    │                       │ cbd0  │                               │ backend0├──────────────────┐       │
+ *    │                       ├───────┤                               ├─────────┤                  │       │
+ *    │                       │ pmem0 │                               │ pmem0   │                  ▼       │
+ *    │               ┌───────┴───────┤                               ├─────────┴────┐     ┌───────────────┤
+ *    │               │    cxl driver │                               │ cxl driver   │     │  /dev/sda     │
+ *    └───────────────┴────────┬──────┘                               └─────┬────────┴─────┴───────────────┘
+ *                             │                                            │
+ *                             │                                            │
+ *                             │        CXL                         CXL     │
+ *                             └────────────────┐               ┌───────────┘
+ *                                              │               │
+ *                                              │               │
+ *                                              │               │
+ *                                          ┌───┴───────────────┴─────┐
+ *                                          │   shared memory device  │
+ *                                          └─────────────────────────┘
+ *
+ * any read/write to cbd0 on node-1 will be transferred to node-2 /dev/sda. It works similar with
+ * nbd (network block device), but it transfer data via CXL shared memory rather than network.
+ */
+
+/* printk */
+#define cbd_err(fmt, ...)							\
+	pr_err("cbd: %s:%u " fmt, __func__, __LINE__, ##__VA_ARGS__)
+#define cbd_info(fmt, ...)							\
+	pr_info("cbd: %s:%u " fmt, __func__, __LINE__, ##__VA_ARGS__)
+#define cbd_debug(fmt, ...)							\
+	pr_debug("cbd: %s:%u " fmt, __func__, __LINE__, ##__VA_ARGS__)
+
+#define cbdt_err(transport, fmt, ...)						\
+	cbd_err("cbd_transport%u: " fmt,					\
+		 transport->id, ##__VA_ARGS__)
+#define cbdt_info(transport, fmt, ...)						\
+	cbd_info("cbd_transport%u: " fmt,					\
+		 transport->id, ##__VA_ARGS__)
+#define cbdt_debug(transport, fmt, ...)						\
+	cbd_debug("cbd_transport%u: " fmt,					\
+		 transport->id, ##__VA_ARGS__)
+
+#define cbd_backend_err(backend, fmt, ...)					\
+	cbdt_err(backend->cbdt, "backend%d: " fmt,				\
+		 backend->backend_id, ##__VA_ARGS__)
+#define cbd_backend_info(backend, fmt, ...)					\
+	cbdt_info(backend->cbdt, "backend%d: " fmt,				\
+		 backend->backend_id, ##__VA_ARGS__)
+#define cbd_backend_debug(backend, fmt, ...)					\
+	cbdt_debug(backend->cbdt, "backend%d: " fmt,				\
+		 backend->backend_id, ##__VA_ARGS__)
+
+#define cbd_handler_err(handler, fmt, ...)					\
+	cbd_backend_err(handler->cbdb, "handler%d: " fmt,			\
+		 handler->channel.channel_id, ##__VA_ARGS__)
+#define cbd_handler_info(handler, fmt, ...)					\
+	cbd_backend_info(handler->cbdb, "handler%d: " fmt,			\
+		 handler->channel.channel_id, ##__VA_ARGS__)
+#define cbd_handler_debug(handler, fmt, ...)					\
+	cbd_backend_debug(handler->cbdb, "handler%d: " fmt,			\
+		 handler->channel.channel_id, ##__VA_ARGS__)
+
+#define cbd_blk_err(dev, fmt, ...)						\
+	cbdt_err(dev->cbdt, "cbd%d: " fmt,					\
+		 dev->mapped_id, ##__VA_ARGS__)
+#define cbd_blk_info(dev, fmt, ...)						\
+	cbdt_info(dev->cbdt, "cbd%d: " fmt,					\
+		 dev->mapped_id, ##__VA_ARGS__)
+#define cbd_blk_debug(dev, fmt, ...)						\
+	cbdt_debug(dev->cbdt, "cbd%d: " fmt,					\
+		 dev->mapped_id, ##__VA_ARGS__)
+
+#define cbd_queue_err(queue, fmt, ...)						\
+	cbd_blk_err(queue->cbd_blkdev, "queue-%d: " fmt,			\
+		     queue->index, ##__VA_ARGS__)
+#define cbd_queue_info(queue, fmt, ...)						\
+	cbd_blk_info(queue->cbd_blkdev, "queue-%d: " fmt,			\
+		     queue->index, ##__VA_ARGS__)
+#define cbd_queue_debug(queue, fmt, ...)					\
+	cbd_blk_debug(queue->cbd_blkdev, "queue-%d: " fmt,			\
+		     queue->index, ##__VA_ARGS__)
+
+#define cbd_channel_err(channel, fmt, ...)					\
+	cbdt_err(channel->cbdt, "channel%d: " fmt,				\
+		 channel->channel_id, ##__VA_ARGS__)
+#define cbd_channel_info(channel, fmt, ...)					\
+	cbdt_info(channel->cbdt, "channel%d: " fmt,				\
+		 channel->channel_id, ##__VA_ARGS__)
+#define cbd_channel_debug(channel, fmt, ...)					\
+	cbdt_debug(channel->cbdt, "channel%d: " fmt,				\
+		 channel->channel_id, ##__VA_ARGS__)
+
+#define CBD_PAGE_SHIFT		12
+#define CBD_PAGE_SIZE		(1 << CBD_PAGE_SHIFT)
+#define CBD_PAGE_MASK		(CBD_PAGE_SIZE - 1)
+
+#define CBD_TRANSPORT_MAX	1024
+#define CBD_PATH_LEN	512
+#define CBD_NAME_LEN	32
+
+/* TODO support multi queue */
+#define CBD_QUEUES_MAX		1
+
+#define CBD_PART_SHIFT 4
+#define CBD_DRV_NAME "cbd"
+#define CBD_DEV_NAME_LEN 32
+
+#define CBD_HB_INTERVAL		msecs_to_jiffies(5000) /* 5s */
+#define CBD_HB_TIMEOUT		(30 * 1000) /* 30s */
+
+/*
+ * CBD transport layout:
+ *
+ *      +-------------------------------------------------------------------------------------------------------------------------------+
+ *      |                           cbd transport                                                                                       |
+ *      +--------------------+-----------------------+-----------------------+----------------------+-----------------------------------+
+ *      |                    |       hosts           |      backends         |       blkdevs        |        channels                   |
+ *      | cbd transport info +----+----+----+--------+----+----+----+--------+----+----+----+-------+-------+-------+-------+-----------+
+ *      |                    |    |    |    |  ...   |    |    |    |  ...   |    |    |    |  ...  |       |       |       |   ...     |
+ *      +--------------------+----+----+----+--------+----+----+----+--------+----+----+----+-------+---+---+-------+-------+-----------+
+ *                                                                                                      |
+ *                                                                                                      |
+ *                                                                                                      |
+ *                                                                                                      |
+ *                +-------------------------------------------------------------------------------------+
+ *                |
+ *                |
+ *                v
+ *          +-----------------------------------------------------------+
+ *          |                     channel                               |
+ *          +--------------------+--------------------------------------+
+ *          |    channel meta    |              channel data            |
+ *          +---------+----------+--------------------------------------+
+ *                    |
+ *                    |
+ *                    |
+ *                    v
+ *          +----------------------------------------------------------+
+ *          |                 channel meta                             |
+ *          +-----------+--------------+-------------------------------+
+ *          | meta ctrl |  comp ring   |       cmd ring                |
+ *          +-----------+--------------+-------------------------------+
+ */
+
+/* cbd channel */
+#define CBD_OP_ALIGN_SIZE	sizeof(u64)
+#define CBDC_META_SIZE		(1024 * CBD_PAGE_SIZE)
+#define CBDC_CMDR_RESERVED	CBD_OP_ALIGN_SIZE
+#define CBDC_CMPR_RESERVED	sizeof(struct cbd_ce)
+
+#define CBDC_CTRL_OFF		0
+#define CBDC_CTRL_SIZE		CBD_PAGE_SIZE
+#define CBDC_COMPR_OFF		(CBDC_CTRL_OFF + CBDC_CTRL_SIZE)
+#define CBDC_COMPR_SIZE		(sizeof(struct cbd_ce) * 1024)
+#define CBDC_CMDR_OFF		(CBDC_COMPR_OFF + CBDC_COMPR_SIZE)
+#define CBDC_CMDR_SIZE		(CBDC_META_SIZE - CBDC_CMDR_OFF)
+
+#define CBDC_DATA_OFF		(CBDC_CMDR_OFF + CBDC_CMDR_SIZE)
+#define CBDC_DATA_SIZE		(16 * 1024 * 1024)
+#define CBDC_DATA_MASK		0xFFFFFF
+
+#define CBDC_UPDATE_CMDR_HEAD(head, used, size) (head = ((head % size) + used) % size)
+#define CBDC_UPDATE_CMDR_TAIL(tail, used, size) (tail = ((tail % size) + used) % size)
+
+#define CBDC_UPDATE_COMPR_HEAD(head, used, size) (head = ((head % size) + used) % size)
+#define CBDC_UPDATE_COMPR_TAIL(tail, used, size) (tail = ((tail % size) + used) % size)
+
+/* cbd transport */
+#define CBD_TRANSPORT_MAGIC		0x9a6c676896C596EFULL
+#define CBD_TRANSPORT_VERSION		1
+
+#define CBDT_INFO_OFF			0
+#define CBDT_INFO_SIZE			CBD_PAGE_SIZE
+
+#define CBDT_HOST_AREA_OFF		(CBDT_INFO_OFF + CBDT_INFO_SIZE)
+#define CBDT_HOST_INFO_SIZE		CBD_PAGE_SIZE
+#define CBDT_HOST_NUM			16
+
+#define CBDT_BACKEND_AREA_OFF		(CBDT_HOST_AREA_OFF + (CBDT_HOST_INFO_SIZE * CBDT_HOST_NUM))
+#define CBDT_BACKEND_INFO_SIZE		CBD_PAGE_SIZE
+#define CBDT_BACKEND_NUM		16
+
+#define CBDT_BLKDEV_AREA_OFF		(CBDT_BACKEND_AREA_OFF + (CBDT_BACKEND_INFO_SIZE * CBDT_BACKEND_NUM))
+#define CBDT_BLKDEV_INFO_SIZE		CBD_PAGE_SIZE
+#define CBDT_BLKDEV_NUM			16
+
+#define CBDT_CHANNEL_AREA_OFF		(CBDT_BLKDEV_AREA_OFF + (CBDT_BLKDEV_INFO_SIZE * CBDT_BLKDEV_NUM))
+#define CBDT_CHANNEL_SIZE		(CBDC_META_SIZE + CBDC_DATA_SIZE)
+#define CBDT_CHANNEL_NUM		16
+
+#define CBD_TRASNPORT_SIZE		(CBDT_CHANNEL_AREA_OFF + CBDT_CHANNEL_SIZE * CBDT_CHANNEL_NUM)
+
+/*
+ * CBD structure diagram:
+ *
+ *                                        +--------------+
+ *                                        | cbd_transport|                                               +----------+
+ *                                        +--------------+                                               | cbd_host |
+ *                                        |              |                                               +----------+
+ *                                        |   host       +---------------------------------------------->|          |
+ *                   +--------------------+   backends   |                                               | hostname |
+ *                   |                    |   devices    +------------------------------------------+    |          |
+ *                   |                    |              |                                          |    +----------+
+ *                   |                    +--------------+                                          |
+ *                   |                                                                              |
+ *                   |                                                                              |
+ *                   |                                                                              |
+ *                   |                                                                              |
+ *                   |                                                                              |
+ *                   v                                                                              v
+ *             +------------+     +-----------+     +------+                                  +-----------+      +-----------+     +------+
+ *             | cbd_backend+---->|cbd_backend+---->| NULL |                                  | cbd_blkdev+----->| cbd_blkdev+---->| NULL |
+ *             +------------+     +-----------+     +------+                                  +-----------+      +-----------+     +------+
+ *      +------+  handlers  |     |  handlers |                                        +------+  queues   |      |  queues   |
+ *      |      +------------+     +-----------+                                        |      +-----------+      +-----------+
+ *      |                                                                              |
+ *      |                                                                              |
+ *      |                                                                              |
+ *      |                                                                              |
+ *      |      +-------------+       +-------------+           +------+                |      +-----------+      +-----------+     +------+
+ *      +----->| cbd_handler +------>| cbd_handler +---------->| NULL |                +----->| cbd_queue +----->| cbd_queue +---->| NULL |
+ *             +-------------+       +-------------+           +------+                       +-----------+      +-----------+     +------+
+ *      +------+ channel     |       |   channel   |                                   +------+  channel  |      |  channel  |
+ *      |      +-------------+       +-------------+                                   |      +-----------+      +-----------+
+ *      |                                                                              |
+ *      |                                                                              |
+ *      |                                                                              |
+ *      |                                                                              v
+ *      |                                                        +-----------------------+
+ *      +------------------------------------------------------->|      cbd_channel      |
+ *                                                               +-----------------------+
+ *                                                               | channel_id            |
+ *                                                               | cmdr (cmd ring)       |
+ *                                                               | compr (complete ring) |
+ *                                                               | data (data area)      |
+ *                                                               |                       |
+ *                                                               +-----------------------+
+ */
+
+#define CBD_DEVICE(OBJ)					\
+struct cbd_## OBJ ##_device {				\
+	struct device dev;				\
+	struct cbd_transport *cbdt;			\
+	struct cbd_## OBJ ##_info *OBJ##_info;		\
+};							\
+							\
+struct cbd_## OBJ ##s_device {				\
+	struct device OBJ ##s_dev;			\
+	struct cbd_## OBJ ##_device OBJ ##_devs[];	\
+};
+
+
+/* cbd_worker_cfg*/
+struct cbd_worker_cfg {
+	u32			busy_retry_cur;
+	u32			busy_retry_count;
+	u32			busy_retry_max;
+	u32			busy_retry_min;
+	u64			busy_retry_interval;
+};
+
+static inline void cbdwc_init(struct cbd_worker_cfg *cfg)
+{
+	/* init cbd_worker_cfg with default values */
+	cfg->busy_retry_cur = 0;
+	cfg->busy_retry_count = 100;
+	cfg->busy_retry_max = cfg->busy_retry_count * 2;
+	cfg->busy_retry_min = 0;
+	cfg->busy_retry_interval = 1;			/* 1us */
+}
+
+/* reset retry_cur and increase busy_retry_count */
+static inline void cbdwc_hit(struct cbd_worker_cfg *cfg)
+{
+	u32 delta;
+
+	cfg->busy_retry_cur = 0;
+
+	if (cfg->busy_retry_count == cfg->busy_retry_max)
+		return;
+
+	/* retry_count increase by 1/16 */
+	delta = cfg->busy_retry_count >> 4;
+	if (!delta)
+		delta = (cfg->busy_retry_max + cfg->busy_retry_min) >> 1;
+
+	cfg->busy_retry_count += delta;
+
+	if (cfg->busy_retry_count > cfg->busy_retry_max)
+		cfg->busy_retry_count = cfg->busy_retry_max;
+
+	return;
+}
+
+/* reset retry_cur and decrease busy_retry_count */
+static inline void cbdwc_miss(struct cbd_worker_cfg *cfg)
+{
+	u32 delta;
+
+	cfg->busy_retry_cur = 0;
+
+	if (cfg->busy_retry_count == cfg->busy_retry_min)
+		return;
+
+	/* retry_count decrease by 1/16 */
+	delta = cfg->busy_retry_count >> 4;
+	if (!delta)
+		delta = cfg->busy_retry_count;
+
+	cfg->busy_retry_count -= delta;
+
+	return;
+}
+
+static inline bool cbdwc_need_retry(struct cbd_worker_cfg *cfg)
+{
+	if (++cfg->busy_retry_cur < cfg->busy_retry_count) {
+		cpu_relax();
+		fsleep(cfg->busy_retry_interval);
+		return true;
+	}
+
+	return false;
+}
+
+/* cbd_transport */
+#define CBDT_INFO_F_BIGENDIAN		1 << 0
+
+struct cbd_transport_info {
+	__le64 magic;
+	__le16 version;
+	__le16 flags;
+
+	u64 host_area_off;
+	u32 host_info_size;
+	u32 host_num;
+
+	u64 backend_area_off;
+	u32 backend_info_size;
+	u32 backend_num;
+
+	u64 blkdev_area_off;
+	u32 blkdev_info_size;
+	u32 blkdev_num;
+
+	u64 channel_area_off;
+	u32 channel_size;
+	u32 channel_num;
+};
+
+struct cbd_transport {
+	u16	id;
+	struct device device;
+	struct mutex lock;
+
+	struct cbd_transport_info *transport_info;
+
+	struct cbd_host *host;
+	struct list_head backends;
+	struct list_head devices;
+
+	struct cbd_hosts_device *cbd_hosts_dev;
+	struct cbd_channels_device *cbd_channels_dev;
+	struct cbd_backends_device *cbd_backends_dev;
+	struct cbd_blkdevs_device *cbd_blkdevs_dev;
+
+	struct dax_device *dax_dev;
+	struct bdev_handle   *bdev_handle;
+};
+
+struct cbdt_register_options {
+	char hostname[CBD_NAME_LEN];
+	char path[CBD_PATH_LEN];
+	u16 format:1;
+	u16 force:1;
+	u16 unused:15;
+};
+
+struct cbd_blkdev;
+struct cbd_backend;
+
+int cbdt_register(struct cbdt_register_options *opts);
+int cbdt_unregister(u32 transport_id);
+
+struct cbd_host_info *cbdt_get_host_info(struct cbd_transport *cbdt, u32 id);
+struct cbd_backend_info *cbdt_get_backend_info(struct cbd_transport *cbdt, u32 id);
+struct cbd_blkdev_info *cbdt_get_blkdev_info(struct cbd_transport *cbdt, u32 id);
+struct cbd_channel_info *cbdt_get_channel_info(struct cbd_transport *cbdt, u32 id);
+
+int cbdt_get_empty_host_id(struct cbd_transport *cbdt, u32 *id);
+int cbdt_get_empty_backend_id(struct cbd_transport *cbdt, u32 *id);
+int cbdt_get_empty_blkdev_id(struct cbd_transport *cbdt, u32 *id);
+int cbdt_get_empty_channel_id(struct cbd_transport *cbdt, u32 *id);
+
+void cbdt_add_backend(struct cbd_transport *cbdt, struct cbd_backend *cbdb);
+void cbdt_del_backend(struct cbd_transport *cbdt, struct cbd_backend *cbdb);
+struct cbd_backend *cbdt_get_backend(struct cbd_transport *cbdt, u32 id);
+void cbdt_add_blkdev(struct cbd_transport *cbdt, struct cbd_blkdev *blkdev);
+struct cbd_blkdev *cbdt_fetch_blkdev(struct cbd_transport *cbdt, u32 id);
+
+struct page *cbdt_page(struct cbd_transport *cbdt, u64 transport_off);
+void cbdt_flush_range(struct cbd_transport *cbdt, void *pos, u64 size);
+
+/* cbd_host */
+CBD_DEVICE(host);
+
+enum cbd_host_state {
+	cbd_host_state_none	= 0,
+	cbd_host_state_running
+};
+
+struct cbd_host_info {
+	u8	state;
+	u64	alive_ts;
+	char	hostname[CBD_NAME_LEN];
+};
+
+struct cbd_host {
+	u32			host_id;
+	struct cbd_transport	*cbdt;
+
+	struct cbd_host_device	*dev;
+	struct cbd_host_info	*host_info;
+	struct delayed_work	hb_work; /* heartbeat work */
+};
+
+int cbd_host_register(struct cbd_transport *cbdt, char *hostname);
+int cbd_host_unregister(struct cbd_transport *cbdt);
+
+/* cbd_channel */
+CBD_DEVICE(channel);
+
+enum cbdc_blkdev_state {
+	cbdc_blkdev_state_none		= 0,
+	cbdc_blkdev_state_running,
+	cbdc_blkdev_state_stopped,
+};
+
+enum cbdc_backend_state {
+	cbdc_backend_state_none		= 0,
+	cbdc_backend_state_running,
+	cbdc_backend_state_stopped,
+};
+
+enum cbd_channel_state {
+	cbd_channel_state_none		= 0,
+	cbd_channel_state_running,
+};
+
+struct cbd_channel_info {
+	u8	state;
+
+	u8	blkdev_state;
+	u32	blkdev_id;
+
+	u8	backend_state;
+	u32	backend_id;
+
+	u32	cmdr_off;
+	u32 	cmdr_size;
+	u32 	cmd_head;
+	u32 	cmd_tail;
+
+	u32 	compr_head;
+	u32 	compr_tail;
+	u32 	compr_off;
+	u32 	compr_size;
+};
+
+struct cbd_channel {
+	u32				channel_id;
+	struct cbd_channel_deivce	*dev;
+	struct cbd_channel_info		*channel_info;
+
+	struct cbd_transport		*cbdt;
+
+	struct page			*ctrl_page;
+
+	void				*cmdr;
+	void				*compr;
+	void				*data;
+
+	u32				data_size;
+	u32				data_head;
+	u32				data_tail;
+
+	spinlock_t			cmdr_lock;
+	spinlock_t			compr_lock;
+};
+
+void cbd_channel_init(struct cbd_channel *channel, struct cbd_transport *cbdt, u32 channel_id);
+void cbdc_copy_from_bio(struct cbd_channel *channel,
+		u32 data_off, u32 data_len, struct bio *bio);
+void cbdc_copy_to_bio(struct cbd_channel *channel,
+		u32 data_off, u32 data_len, struct bio *bio);
+void cbdc_flush_ctrl(struct cbd_channel *channel);
+
+/* cbd_handler */
+struct cbd_handler {
+	struct cbd_backend	*cbdb;
+	struct cbd_channel_info *channel_info;
+
+	struct cbd_channel	channel;
+
+	u32			se_to_handle;
+
+	struct delayed_work	handle_work;
+	struct cbd_worker_cfg	handle_worker_cfg;
+
+	struct list_head	handlers_node;
+	struct bio_set		bioset;
+	struct workqueue_struct *handle_wq;
+};
+
+void cbd_handler_destroy(struct cbd_handler *handler);
+int cbd_handler_create(struct cbd_backend *cbdb, u32 channel_id);
+
+/* cbd_backend */
+CBD_DEVICE(backend);
+
+enum cbd_backend_state {
+	cbd_backend_state_none	= 0,
+	cbd_backend_state_running,
+};
+
+#define CBDB_BLKDEV_COUNT_MAX	1
+
+struct cbd_backend_info {
+	u8	state;
+	u32	host_id;
+	u32	blkdev_count;
+	u64	alive_ts;
+	u64	dev_size; /* nr_sectors */
+	char	path[CBD_PATH_LEN];
+};
+
+struct cbd_backend {
+	u32			backend_id;
+	char			path[CBD_PATH_LEN];
+	struct cbd_transport	*cbdt;
+	struct cbd_backend_info *backend_info;
+	struct mutex 		lock;
+
+	struct block_device	*bdev;
+	struct bdev_handle	*bdev_handle;
+
+	struct workqueue_struct	*task_wq;  /* workqueue for request work */
+	struct delayed_work	state_work;
+	struct delayed_work	hb_work; /* heartbeat work */
+
+	struct list_head	node; /* cbd_transport->backends */
+	struct list_head	handlers;
+
+	struct cbd_backend_device *backend_device;
+};
+
+int cbd_backend_start(struct cbd_transport *cbdt, char *path);
+int cbd_backend_stop(struct cbd_transport *cbdt, u32 backend_id);
+void cbdb_add_handler(struct cbd_backend *cbdb, struct cbd_handler *handler);
+void cbdb_del_handler(struct cbd_backend *cbdb, struct cbd_handler *handler);
+
+/* cbd_queue */
+enum cbd_op {
+	CBD_OP_PAD = 0,
+	CBD_OP_WRITE,
+	CBD_OP_READ,
+	CBD_OP_DISCARD,
+	CBD_OP_WRITE_ZEROS,
+	CBD_OP_FLUSH,
+};
+
+struct cbd_se_hdr {
+	u32 len_op;
+	u32 flags;
+
+};
+
+struct cbd_se {
+	struct cbd_se_hdr	header;
+	u64			priv_data;	// pointer to cbd_request
+
+	u64			offset;
+	u32			len;
+
+	u32			data_off;
+	u32			data_len;
+};
+
+
+struct cbd_ce {
+	u64		priv_data;	// copied from submit entry
+	u32		result;
+	u32		flags;
+};
+
+
+struct cbd_request {
+	struct cbd_queue	*cbdq;
+
+	struct cbd_se		*se;
+	struct cbd_ce		*ce;
+	struct request		*req;
+
+	enum cbd_op		op;
+	u64			req_tid;
+	struct list_head	inflight_reqs_node;
+
+	u32			data_off;
+	u32			data_len;
+
+	struct work_struct	work;
+};
+
+#define CBD_OP_MASK 0xff
+#define CBD_OP_SHIFT 8
+
+static inline enum cbd_op cbd_se_hdr_get_op(__le32 len_op)
+{
+       return (enum cbd_op)(len_op & CBD_OP_MASK);
+}
+
+static inline void cbd_se_hdr_set_op(u32 *len_op, enum cbd_op op)
+{
+       *len_op &= ~CBD_OP_MASK;
+       *len_op |= (op & CBD_OP_MASK);
+}
+
+static inline u32 cbd_se_hdr_get_len(u32 len_op)
+{
+	return len_op >> CBD_OP_SHIFT;
+}
+
+static inline void cbd_se_hdr_set_len(u32 *len_op, u32 len)
+{
+	*len_op &= CBD_OP_MASK;
+	*len_op |= (len << CBD_OP_SHIFT);
+}
+
+#define CBD_SE_HDR_DONE	1
+
+static inline bool cbd_se_hdr_flags_test(struct cbd_se *se, u32 bit)
+{
+	return (se->header.flags & bit);
+}
+
+static inline void cbd_se_hdr_flags_set(struct cbd_se *se, u32 bit)
+{
+	se->header.flags |= bit;
+}
+
+enum cbd_queue_state {
+	cbd_queue_state_none	= 0,
+	cbd_queue_state_running
+};
+
+struct cbd_queue {
+	struct cbd_blkdev	*cbd_blkdev;
+
+	bool			inited;
+	int			index;
+
+	struct list_head	inflight_reqs;
+	spinlock_t		inflight_reqs_lock;
+	u64			req_tid;
+
+	u32			*released_extents;
+
+	u32			channel_id;
+	struct cbd_channel_info	*channel_info;
+	struct cbd_channel	channel;
+	struct workqueue_struct	*task_wq;  /* workqueue for request work */
+
+	atomic_t		state;
+
+	struct delayed_work	complete_work;
+	struct cbd_worker_cfg	complete_worker_cfg;
+};
+
+int cbd_queue_start(struct cbd_queue *cbdq);
+void cbd_queue_stop(struct cbd_queue *cbdq);
+extern const struct blk_mq_ops cbd_mq_ops;
+
+/* cbd_blkdev */
+CBD_DEVICE(blkdev);
+
+enum cbd_blkdev_state {
+	cbd_blkdev_state_none	= 0,
+	cbd_blkdev_state_running
+};
+
+struct cbd_blkdev_info {
+	u8	state;
+	u64	alive_ts;
+	u32	backend_id;
+	u32	host_id;
+	u32	mapped_id;
+};
+
+struct cbd_blkdev {
+	u32			blkdev_id; /* index in transport blkdev area */
+	u32			backend_id;
+	int			mapped_id; /* id in block device such as: /dev/cbd0 */
+
+	int			major;		/* blkdev assigned major */
+	int			minor;
+	struct gendisk		*disk;		/* blkdev's gendisk and rq */
+
+	spinlock_t		lock;		/* open_count */
+	struct list_head	node;
+	struct mutex		state_lock;
+	struct delayed_work	hb_work; /* heartbeat work */
+
+	/* Block layer tags. */
+	struct blk_mq_tag_set	tag_set;
+
+	unsigned long		open_count;	/* protected by lock */
+
+	uint32_t		num_queues;
+	struct cbd_queue	*queues;
+
+	u64			dev_size;
+	u64			dev_features;
+	u32			io_timeout;
+
+	u8			state;
+	u32			state_flags;
+	struct kref		kref;
+
+	void			*cmdr;
+	void			*compr;
+	spinlock_t		cmdr_lock;
+	spinlock_t		compr_lock;
+	void			*data;
+
+	struct cbd_blkdev_device *blkdev_dev;
+	struct cbd_blkdev_info *blkdev_info;
+
+	struct cbd_transport *cbdt;
+};
+
+int cbd_blkdev_init(void);
+void cbd_blkdev_exit(void);
+int cbd_blkdev_start(struct cbd_transport *cbdt, u32 backend_id, u32 queues);
+int cbd_blkdev_stop(struct cbd_transport *cbdt, u32 devid);
+
+extern struct workqueue_struct	*cbd_wq;
+
+#define cbd_setup_device(DEV, PARENT, TYPE, fmt, ...)		\
+do {								\
+	device_initialize(DEV);					\
+	device_set_pm_not_required(DEV);			\
+	dev_set_name(DEV, fmt, ##__VA_ARGS__);			\
+	DEV->parent = PARENT;					\
+	DEV->type = TYPE;					\
+								\
+	ret = device_add(DEV);					\
+} while (0)
+
+#define CBD_OBJ_HEARTBEAT(OBJ)								\
+static void OBJ##_hb_workfn(struct work_struct *work)					\
+{											\
+	struct cbd_##OBJ *obj = container_of(work, struct cbd_##OBJ, hb_work.work);	\
+	struct cbd_##OBJ##_info *info = obj->OBJ##_info;				\
+											\
+	info->alive_ts = ktime_get_real();						\
+	cbdt_flush_range(obj->cbdt, info, sizeof(*info));				\
+											\
+	queue_delayed_work(cbd_wq, &obj->hb_work, CBD_HB_INTERVAL);			\
+}											\
+											\
+static bool OBJ##_info_is_alive(struct cbd_##OBJ##_info *info)				\
+{											\
+	ktime_t oldest, ts;								\
+											\
+	ts = info->alive_ts;								\
+	oldest = ktime_sub_ms(ktime_get_real(), CBD_HB_TIMEOUT);			\
+											\
+	if (ktime_after(ts, oldest))							\
+		return true;								\
+											\
+	return false;									\
+}											\
+											\
+static ssize_t cbd_##OBJ##_alive_show(struct device *dev,				\
+			       struct device_attribute *attr,				\
+			       char *buf)						\
+{											\
+	struct cbd_##OBJ##_device *_dev;						\
+											\
+	_dev = container_of(dev, struct cbd_##OBJ##_device, dev);			\
+											\
+	cbdt_flush_range(_dev->cbdt, _dev->OBJ##_info, sizeof(*_dev->OBJ##_info));	\
+	if (OBJ##_info_is_alive(_dev->OBJ##_info))					\
+		return sprintf(buf, "true\n");						\
+											\
+	return sprintf(buf, "false\n");							\
+}											\
+											\
+static DEVICE_ATTR(alive, 0400, cbd_##OBJ##_alive_show, NULL);				\
+
+#endif /* _CBD_INTERNAL_H */
diff --git a/drivers/block/cbd/cbd_main.c b/drivers/block/cbd/cbd_main.c
new file mode 100644
index 000000000000..0a87c95d749d
--- /dev/null
+++ b/drivers/block/cbd/cbd_main.c
@@ -0,0 +1,216 @@
+/*
+ * Copyright(C) 2024, Dongsheng Yang <dongsheng.yang.linux@gmail.com>
+ */
+
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/blk-mq.h>
+#include <linux/blkdev.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/bio.h>
+#include <linux/module.h>
+#include <linux/blk-mq.h>
+#include <linux/fs.h>
+#include <linux/blkdev.h>
+#include <linux/slab.h>
+#include <linux/idr.h>
+#include <linux/workqueue.h>
+#include <linux/delay.h>
+#include <net/genetlink.h>
+
+#include <linux/types.h>
+
+#include "cbd_internal.h"
+
+struct workqueue_struct	*cbd_wq;
+
+enum {
+	CBDT_REG_OPT_ERR		= 0,
+	CBDT_REG_OPT_FORCE,
+	CBDT_REG_OPT_FORMAT,
+	CBDT_REG_OPT_PATH,
+	CBDT_REG_OPT_HOSTNAME,
+};
+
+static const match_table_t register_opt_tokens = {
+	{ CBDT_REG_OPT_FORCE,		"force=%u" },
+	{ CBDT_REG_OPT_FORMAT,		"format=%u" },
+	{ CBDT_REG_OPT_PATH,		"path=%s" },
+	{ CBDT_REG_OPT_HOSTNAME,	"hostname=%s" },
+	{ CBDT_REG_OPT_ERR,		NULL	}
+};
+
+static int parse_register_options(
+		char *buf,
+		struct cbdt_register_options *opts)
+{
+	substring_t args[MAX_OPT_ARGS];
+	char *o, *p;
+	int token, ret = 0;
+
+	o = buf;
+
+	while ((p = strsep(&o, ",\n")) != NULL) {
+		if (!*p)
+			continue;
+
+		token = match_token(p, register_opt_tokens, args);
+		switch (token) {
+		case CBDT_REG_OPT_PATH:
+			if (match_strlcpy(opts->path, &args[0],
+			        CBD_PATH_LEN) == 0) {
+			        ret = -EINVAL;
+			        break;
+			}
+			break;
+		case CBDT_REG_OPT_FORCE:
+			if (match_uint(args, &token) || token != 1) {
+				ret = -EINVAL;
+				goto out;
+			}
+			opts->force = 1;
+			break;
+		case CBDT_REG_OPT_FORMAT:
+			if (match_uint(args, &token) || token != 1) {
+				ret = -EINVAL;
+				goto out;
+			}
+			opts->format = 1;
+			break;
+		case CBDT_REG_OPT_HOSTNAME:
+			if (match_strlcpy(opts->hostname, &args[0],
+			        CBD_NAME_LEN) == 0) {
+			        ret = -EINVAL;
+			        break;
+			}
+			break;
+		default:
+			pr_err("unknown parameter or missing value '%s'\n", p);
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+
+out:
+	return ret;
+}
+
+static ssize_t transport_unregister_store(const struct bus_type *bus, const char *ubuf,
+				      size_t size)
+{
+	int ret;
+	u32 transport_id;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (sscanf(ubuf, "transport_id=%u", &transport_id) != 1) {
+		return -EINVAL;
+	}
+
+	return size;
+}
+
+static ssize_t transport_register_store(const struct bus_type *bus, const char *ubuf,
+				      size_t size)
+{
+	int ret;
+	char *buf;
+	struct cbdt_register_options opts = { 0 };
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	buf = kmemdup(ubuf, size + 1, GFP_KERNEL);
+	if (IS_ERR(buf)) {
+		pr_err("failed to dup buf for adm option: %d", (int)PTR_ERR(buf));
+		return PTR_ERR(buf);
+	}
+	buf[size] = '\0';
+
+	ret = parse_register_options(buf, &opts);
+	if (ret < 0) {
+		kfree(buf);
+		return ret;
+	}
+	kfree(buf);
+
+	return size;
+}
+
+static BUS_ATTR_WO(transport_unregister);
+static BUS_ATTR_WO(transport_register);
+
+static struct attribute *cbd_bus_attrs[] = {
+	&bus_attr_transport_unregister.attr,
+	&bus_attr_transport_register.attr,
+	NULL,
+};
+
+static const struct attribute_group cbd_bus_group = {
+	.attrs = cbd_bus_attrs,
+};
+__ATTRIBUTE_GROUPS(cbd_bus);
+
+struct bus_type cbd_bus_type = {
+	.name		= "cbd",
+	.bus_groups	= cbd_bus_groups,
+};
+
+static void cbd_root_dev_release(struct device *dev)
+{
+}
+
+struct device cbd_root_dev = {
+	.init_name =    "cbd",
+	.release =      cbd_root_dev_release,
+};
+
+static int __init cbd_init(void)
+{
+	int ret;
+
+	cbd_wq = alloc_workqueue(CBD_DRV_NAME, WQ_MEM_RECLAIM, 0);
+	if (!cbd_wq) {
+		return -ENOMEM;
+	}
+
+	ret = device_register(&cbd_root_dev);
+	if (ret < 0) {
+		put_device(&cbd_root_dev);
+		goto destroy_wq;
+	}
+
+	ret = bus_register(&cbd_bus_type);
+	if (ret < 0) {
+		goto device_unregister;
+	}
+
+	return 0;
+
+bus_unregister:
+	bus_unregister(&cbd_bus_type);
+device_unregister:
+	device_unregister(&cbd_root_dev);
+destroy_wq:
+	destroy_workqueue(cbd_wq);
+
+	return ret;
+}
+
+static void cbd_exit(void)
+{
+	bus_unregister(&cbd_bus_type);
+	device_unregister(&cbd_root_dev);
+
+	destroy_workqueue(cbd_wq);
+
+	return;
+}
+
+MODULE_AUTHOR("Dongsheng Yang <dongsheng.yang.linux@gmail.com>");
+MODULE_DESCRIPTION("CXL(Compute Express Link) Block Device");
+MODULE_LICENSE("GPL v2");
+module_init(cbd_init);
+module_exit(cbd_exit);
-- 
2.34.1


  reply	other threads:[~2024-04-22 10:53 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-22  7:15 [PATCH RFC 0/7] block: Introduce CBD (CXL Block Device) Dongsheng Yang
2024-04-22  7:16 ` Dongsheng Yang [this message]
2024-04-22 18:39   ` [PATCH 1/7] block: Init for CBD(CXL " Randy Dunlap
2024-04-22 22:41     ` Dongsheng Yang
2024-04-24  3:58   ` Chaitanya Kulkarni
2024-04-24  8:36     ` Dongsheng Yang
2024-04-22  7:16 ` [PATCH 2/7] cbd: introduce cbd_transport Dongsheng Yang
2024-04-24  4:08   ` Chaitanya Kulkarni
2024-04-24  8:43     ` Dongsheng Yang
2024-04-22  7:16 ` [PATCH 3/7] cbd: introduce cbd_channel Dongsheng Yang
2024-04-22  7:16 ` [PATCH 4/7] cbd: introduce cbd_host Dongsheng Yang
2024-04-25  5:51   ` [EXTERNAL] " Bharat Bhushan
2024-04-22  7:16 ` [PATCH 5/7] cbd: introuce cbd_backend Dongsheng Yang
2024-04-24  5:03   ` Chaitanya Kulkarni
2024-04-24  8:36     ` Dongsheng Yang
2024-04-25  5:46   ` [EXTERNAL] " Bharat Bhushan
2024-04-22  7:16 ` [PATCH 7/7] cbd: add related sysfs files in transport register Dongsheng Yang
2024-04-25  5:24   ` [EXTERNAL] " Bharat Bhushan
2024-04-22 22:42 ` [PATCH 6/7] cbd: introduce cbd_blkdev Dongsheng Yang
2024-04-23  7:27   ` Dongsheng Yang
2024-04-24  4:29 ` [PATCH RFC 0/7] block: Introduce CBD (CXL Block Device) Dan Williams
2024-04-24  6:33   ` Dongsheng Yang
2024-04-24 15:14     ` Gregory Price
2024-04-26  1:25       ` Dongsheng Yang
2024-04-26 13:48         ` Gregory Price
2024-04-26 14:53           ` Dongsheng Yang
2024-04-26 16:14             ` Gregory Price
2024-04-28  5:47               ` Dongsheng Yang
2024-04-28 16:44                 ` Gregory Price
2024-04-28 16:55                 ` John Groves
2024-05-03  9:52                   ` Jonathan Cameron
2024-05-08 11:39                     ` Dongsheng Yang
2024-05-08 12:11                       ` Jonathan Cameron
2024-05-08 13:03                         ` Dongsheng Yang
2024-05-08 15:44                           ` Jonathan Cameron
2024-05-09 11:24                             ` Dongsheng Yang
2024-05-09 12:21                               ` Jonathan Cameron
2024-05-09 13:03                                 ` Dongsheng Yang
2024-05-21 18:41                                   ` Dan Williams
2024-05-22  6:17                                     ` Dongsheng Yang
2024-05-29 15:25                                       ` Gregory Price
2024-05-30  6:59                                         ` Dongsheng Yang
2024-05-30 13:38                                           ` Jonathan Cameron
2024-06-01  3:22                                             ` Dan Williams
2024-06-03 12:48                                               ` Jonathan Cameron
2024-06-03 17:28                                                 ` James Morse
2024-06-04 14:26                                                   ` Jonathan Cameron
2024-05-31 14:23                                           ` Gregory Price
2024-06-03  1:33                                             ` Dongsheng Yang
2024-04-30  0:34                 ` Dan Williams
2024-04-24 18:08     ` Dan Williams
     [not found]       ` <539c1323-68f9-d753-a102-692b69049c20@easystack.cn>
2024-04-30  0:10         ` Dan Williams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240422071606.52637-2-dongsheng.yang@easystack.cn \
    --to=dongsheng.yang@easystack.cn \
    --cc=axboe@kernel.dk \
    --cc=dan.j.williams@intel.com \
    --cc=dongsheng.yang.linux@gmail.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-cxl@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).