All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ming Lei <ming.lei@redhat.com>
To: Jens Axboe <axboe@kernel.dk>,
	io-uring@vger.kernel.org, linux-block@vger.kernel.org
Cc: linux-kernel@vger.kernel.org,
	Miklos Szeredi <mszeredi@redhat.com>,
	ZiyangZhang <ZiyangZhang@linux.alibaba.com>,
	Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>,
	Bernd Schubert <bschubert@ddn.com>,
	Pavel Begunkov <asml.silence@gmail.com>,
	Stefan Hajnoczi <stefanha@redhat.com>,
	Ming Lei <ming.lei@redhat.com>
Subject: [PATCH V4 15/17] block: ublk_drv: add read()/write() support for ublk char device
Date: Fri, 24 Mar 2023 21:58:06 +0800	[thread overview]
Message-ID: <20230324135808.855245-16-ming.lei@redhat.com> (raw)
In-Reply-To: <20230324135808.855245-1-ming.lei@redhat.com>

We are going to support zero copy by fused uring command, the userspace
can't read from or write to the io buffer any more, it becomes not
flexible for applications:

1) some targets need to zero buffer explicitly, such as when reading
unmapped qcow2 cluster

2) some targets need to support passthrough command, such as zoned
report zones, and still need to read/write the io buffer

Support pread()/pwrite() on ublk char device for reading/writing request
io buffer, so ublk server can handle the above cases easily.

This also can help to make zero copy becoming the primary option, and
non-zero-copy will become legacy code path since the added read()/write()
can cover non-zero-copy feature.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 drivers/block/ublk_drv.c      | 131 ++++++++++++++++++++++++++++++++++
 include/uapi/linux/ublk_cmd.h |  31 +++++++-
 2 files changed, 161 insertions(+), 1 deletion(-)

diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 26a14c54da1d..e6b528750b3c 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -1317,6 +1317,36 @@ static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
 	ublk_queue_cmd(ubq, req);
 }
 
+static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
+		struct ublk_queue *ubq, int tag, size_t offset)
+{
+	struct request *req;
+
+	if (!ublk_support_zc(ubq))
+		return NULL;
+
+	req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag);
+	if (!req)
+		return NULL;
+
+	if (!ublk_get_req_ref(ubq, req))
+		return NULL;
+
+	if (unlikely(!blk_mq_request_started(req) || req->tag != tag))
+		goto fail_put;
+
+	if (!ublk_rq_has_data(req))
+		goto fail_put;
+
+	if (offset > blk_rq_bytes(req))
+		goto fail_put;
+
+	return req;
+fail_put:
+	ublk_put_req_ref(ubq, req);
+	return NULL;
+}
+
 static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
 {
 	struct ublksrv_io_cmd *ub_cmd = (struct ublksrv_io_cmd *)cmd->cmd;
@@ -1418,11 +1448,112 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
 	return -EIOCBQUEUED;
 }
 
+static inline bool ublk_check_ubuf_dir(const struct request *req,
+		int ubuf_dir)
+{
+	/* copy ubuf to request pages */
+	if (req_op(req) == REQ_OP_READ && ubuf_dir == ITER_SOURCE)
+		return true;
+
+	/* copy request pages to ubuf */
+	if (req_op(req) == REQ_OP_WRITE && ubuf_dir == ITER_DEST)
+		return true;
+
+	return false;
+}
+
+static struct request *ublk_check_and_get_req(struct kiocb *iocb,
+		struct iov_iter *iter, size_t *off, int dir)
+{
+	struct ublk_device *ub = iocb->ki_filp->private_data;
+	struct ublk_queue *ubq;
+	struct request *req;
+	size_t buf_off;
+	u16 tag, q_id;
+
+	if (!ub)
+		return ERR_PTR(-EACCES);
+
+	if (!user_backed_iter(iter))
+		return ERR_PTR(-EACCES);
+
+	if (ub->dev_info.state == UBLK_S_DEV_DEAD)
+		return ERR_PTR(-EACCES);
+
+	tag = ublk_pos_to_tag(iocb->ki_pos);
+	q_id = ublk_pos_to_hwq(iocb->ki_pos);
+	buf_off = ublk_pos_to_buf_offset(iocb->ki_pos);
+
+	if (q_id >= ub->dev_info.nr_hw_queues)
+		return ERR_PTR(-EINVAL);
+
+	ubq = ublk_get_queue(ub, q_id);
+	if (!ubq)
+		return ERR_PTR(-EINVAL);
+
+	if (tag >= ubq->q_depth)
+		return ERR_PTR(-EINVAL);
+
+	req = __ublk_check_and_get_req(ub, ubq, tag, buf_off);
+	if (!req)
+		return ERR_PTR(-EINVAL);
+
+	if (!req->mq_hctx || !req->mq_hctx->driver_data)
+		goto fail;
+
+	if (!ublk_check_ubuf_dir(req, dir))
+		goto fail;
+
+	*off = buf_off;
+	return req;
+fail:
+	ublk_put_req_ref(ubq, req);
+	return ERR_PTR(-EACCES);
+}
+
+static ssize_t ublk_ch_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+	struct ublk_queue *ubq;
+	struct request *req;
+	size_t buf_off;
+	size_t ret;
+
+	req = ublk_check_and_get_req(iocb, to, &buf_off, ITER_DEST);
+	if (unlikely(IS_ERR(req)))
+		return PTR_ERR(req);
+
+	ret = ublk_copy_user_pages(req, buf_off, to, ITER_DEST);
+	ubq = req->mq_hctx->driver_data;
+	ublk_put_req_ref(ubq, req);
+
+	return ret;
+}
+
+static ssize_t ublk_ch_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+	struct ublk_queue *ubq;
+	struct request *req;
+	size_t buf_off;
+	size_t ret;
+
+	req = ublk_check_and_get_req(iocb, from, &buf_off, ITER_SOURCE);
+	if (unlikely(IS_ERR(req)))
+		return PTR_ERR(req);
+
+	ret = ublk_copy_user_pages(req, buf_off, from, ITER_SOURCE);
+	ubq = req->mq_hctx->driver_data;
+	ublk_put_req_ref(ubq, req);
+
+	return ret;
+}
+
 static const struct file_operations ublk_ch_fops = {
 	.owner = THIS_MODULE,
 	.open = ublk_ch_open,
 	.release = ublk_ch_release,
 	.llseek = no_llseek,
+	.read_iter = ublk_ch_read_iter,
+	.write_iter = ublk_ch_write_iter,
 	.uring_cmd = ublk_ch_uring_cmd,
 	.mmap = ublk_ch_mmap,
 };
diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
index f6238ccc7800..d1a6b3dc0327 100644
--- a/include/uapi/linux/ublk_cmd.h
+++ b/include/uapi/linux/ublk_cmd.h
@@ -54,7 +54,36 @@
 #define UBLKSRV_IO_BUF_OFFSET	0x80000000
 
 /* tag bit is 12bit, so at most 4096 IOs for each queue */
-#define UBLK_MAX_QUEUE_DEPTH	4096
+#define UBLK_TAG_BITS		12
+#define UBLK_MAX_QUEUE_DEPTH	(1U << UBLK_TAG_BITS)
+
+/* used for locating each io buffer for pread()/pwrite() on char device */
+#define UBLK_BUFS_SIZE_BITS	42
+#define UBLK_BUFS_SIZE_MASK    ((1ULL << UBLK_BUFS_SIZE_BITS) - 1)
+#define UBLK_BUF_SIZE_BITS     (UBLK_BUFS_SIZE_BITS - UBLK_TAG_BITS)
+#define UBLK_BUF_MAX_SIZE      (1ULL << UBLK_BUF_SIZE_BITS)
+
+static inline __u16 ublk_pos_to_hwq(__u64 pos)
+{
+	return pos >> UBLK_BUFS_SIZE_BITS;
+}
+
+static inline __u32 ublk_pos_to_buf_offset(__u64 pos)
+{
+	return (pos & UBLK_BUFS_SIZE_MASK) & (UBLK_BUF_MAX_SIZE - 1);
+}
+
+static inline __u16 ublk_pos_to_tag(__u64 pos)
+{
+	return (pos & UBLK_BUFS_SIZE_MASK) >> UBLK_BUF_SIZE_BITS;
+}
+
+/* offset of single buffer, which has to be < UBLK_BUX_MAX_SIZE */
+static inline __u64 ublk_pos(__u16 q_id, __u16 tag, __u32 offset)
+{
+	return (((__u64)q_id) << UBLK_BUFS_SIZE_BITS) |
+		((((__u64)tag) << UBLK_BUF_SIZE_BITS) + offset);
+}
 
 /*
  * zero copy requires 4k block size, and can remap ublk driver's io
-- 
2.39.2


  parent reply	other threads:[~2023-03-24 14:01 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CGME20230324135916epcas5p37aad4c49c76c05567a484377d8909092@epcas5p3.samsung.com>
2023-03-24 13:57 ` [PATCH V4 00/17] io_uring/ublk: add IORING_OP_FUSED_CMD Ming Lei
2023-03-24 13:57   ` [PATCH V4 01/17] io_uring: increase io_kiocb->flags into 64bit Ming Lei
2023-03-24 13:57   ` [PATCH V4 02/17] io_uring: add IORING_OP_FUSED_CMD Ming Lei
2023-03-24 13:57   ` [PATCH V4 03/17] io_uring: support normal SQE for fused command Ming Lei
2023-03-24 13:57   ` [PATCH V4 04/17] io_uring: support OP_READ/OP_WRITE for fused slave request Ming Lei
2023-03-24 13:57   ` [PATCH V4 05/17] io_uring: support OP_SEND_ZC/OP_RECV " Ming Lei
2023-03-24 13:57   ` [PATCH V4 06/17] block: ublk_drv: mark device as LIVE before adding disk Ming Lei
2023-03-24 13:57   ` [PATCH V4 07/17] block: ublk_drv: add common exit handling Ming Lei
2023-03-24 13:57   ` [PATCH V4 08/17] block: ublk_drv: don't consider flush request in map/unmap io Ming Lei
2023-03-24 13:58   ` [PATCH V4 09/17] block: ublk_drv: add two helpers to clean up map/unmap request Ming Lei
2023-03-24 13:58   ` [PATCH V4 10/17] block: ublk_drv: clean up several helpers Ming Lei
2023-03-24 13:58   ` [PATCH V4 11/17] block: ublk_drv: cleanup 'struct ublk_map_data' Ming Lei
2023-03-24 13:58   ` [PATCH V4 12/17] block: ublk_drv: cleanup ublk_copy_user_pages Ming Lei
2023-03-24 13:58   ` [PATCH V4 13/17] block: ublk_drv: grab request reference when the request is handled by userspace Ming Lei
2023-03-24 13:58   ` [PATCH V4 14/17] block: ublk_drv: support to copy any part of request pages Ming Lei
2023-03-24 13:58   ` Ming Lei [this message]
2023-03-24 13:58   ` [PATCH V4 16/17] block: ublk_drv: don't check buffer in case of zero copy Ming Lei
2023-03-24 13:58   ` [PATCH V4 17/17] block: ublk_drv: apply io_uring FUSED_CMD for supporting " Ming Lei
2023-03-28  0:36   ` [PATCH V4 00/17] io_uring/ublk: add IORING_OP_FUSED_CMD Dan Williams
2023-03-28  1:16     ` Ming Lei
2023-03-28  1:29       ` Jens Axboe
2023-03-28  1:35         ` Ming Lei
2023-03-28  1:31       ` Dan Williams
2023-03-28  2:02         ` Ming Lei
2023-03-28  6:32           ` Dan Williams
2023-03-28  3:13       ` Gao Xiang
2023-03-28  3:33         ` Ming Lei
2023-03-28  5:10   ` Kanchan Joshi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230324135808.855245-16-ming.lei@redhat.com \
    --to=ming.lei@redhat.com \
    --cc=ZiyangZhang@linux.alibaba.com \
    --cc=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=bschubert@ddn.com \
    --cc=io-uring@vger.kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mszeredi@redhat.com \
    --cc=stefanha@redhat.com \
    --cc=xiaoguang.wang@linux.alibaba.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.