All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrei Vagin <avagin@openvz.org>
To: target-devel@vger.kernel.org
Subject: [PATCH v2] target/file: add support of direct and async I/O
Date: Thu, 22 Mar 2018 06:55:02 +0000	[thread overview]
Message-ID: <20180322065502.25569-1-avagin@openvz.org> (raw)

There are two advantages:
* Direct I/O allows to avoid the write-back cache, so it reduces
  affects to other processes in the system.
* Async I/O allows to handle a few commands concurrently.

DIO + AIO shows a better perfomance for random write operations:

Mode: O_DSYNC Async: 1
$ ./fio --bs=4K --direct=1 --rw=randwrite --ioengine=libaio --iodepthd --name=/dev/sda --runtime  --numjobs=2
  WRITE: bwE.9MiB/s (48.1MB/s), 21.9MiB/s-23.0MiB/s (22.0MB/s-25.2MB/s), io‘9MiB (963MB), run 002-20020msec

Mode: O_DSYNC Async: 0
$ ./fio --bs=4K --direct=1 --rw=randwrite --ioengine=libaio --iodepthd --name=/dev/sdb --runtime  --numjobs=2
  WRITE: bw\x1607KiB/s (1645kB/s), 802KiB/s-805KiB/s (821kB/s-824kB/s), io1.8MiB (33.4MB), run 280-20295msec

Known issue:

DIF (PI) emulation doesn't work when a target uses async I/O, because
DIF metadata is saved in a separate file, and it is another non-trivial
task how to synchronize writing in two files, so that a following read
operation always returns a consisten metadata for a specified block.

v2: fix comments from Christoph Hellwig

Cc: "Nicholas A. Bellinger" <nab@linux-iscsi.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Tested-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Signed-off-by: Andrei Vagin <avagin@openvz.org>
---
 drivers/target/target_core_file.c | 137 ++++++++++++++++++++++++++++++++++----
 drivers/target/target_core_file.h |   1 +
 2 files changed, 124 insertions(+), 14 deletions(-)

diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 9b2c0c773022..16751ae55d7b 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -250,6 +250,84 @@ static void fd_destroy_device(struct se_device *dev)
 	}
 }
 
+struct target_core_file_cmd {
+	unsigned long	len;
+	struct se_cmd	*cmd;
+	struct kiocb	iocb;
+};
+
+static void cmd_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
+{
+	struct target_core_file_cmd *cmd;
+
+	cmd = container_of(iocb, struct target_core_file_cmd, iocb);
+
+	if (ret != cmd->len)
+		target_complete_cmd(cmd->cmd, SAM_STAT_CHECK_CONDITION);
+	else
+		target_complete_cmd(cmd->cmd, SAM_STAT_GOOD);
+
+	kfree(cmd);
+}
+
+static sense_reason_t
+fd_execute_rw_aio(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
+	      enum dma_data_direction data_direction)
+{
+	int is_write = !(data_direction = DMA_FROM_DEVICE);
+	struct se_device *dev = cmd->se_dev;
+	struct fd_dev *fd_dev = FD_DEV(dev);
+	struct file *file = fd_dev->fd_file;
+	struct target_core_file_cmd *aio_cmd;
+	struct iov_iter iter = {};
+	struct scatterlist *sg;
+	struct bio_vec *bvec;
+	ssize_t len = 0;
+	int ret = 0, i;
+
+	aio_cmd = kmalloc(sizeof(struct target_core_file_cmd), GFP_KERNEL);
+	if (!aio_cmd)
+		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+
+	bvec = kcalloc(sgl_nents, sizeof(struct bio_vec), GFP_KERNEL);
+	if (!bvec) {
+		kfree(aio_cmd);
+		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+	}
+
+	for_each_sg(sgl, sg, sgl_nents, i) {
+		bvec[i].bv_page = sg_page(sg);
+		bvec[i].bv_len = sg->length;
+		bvec[i].bv_offset = sg->offset;
+
+		len += sg->length;
+	}
+
+	iov_iter_bvec(&iter, ITER_BVEC | is_write, bvec, sgl_nents, len);
+
+	aio_cmd->cmd = cmd;
+	aio_cmd->len = len;
+	aio_cmd->iocb.ki_pos = cmd->t_task_lba * dev->dev_attrib.block_size;
+	aio_cmd->iocb.ki_filp = file;
+	aio_cmd->iocb.ki_complete = cmd_rw_aio_complete;
+	aio_cmd->iocb.ki_flags = IOCB_DIRECT;
+
+	if (is_write && (cmd->se_cmd_flags & SCF_FUA))
+		aio_cmd->iocb.ki_flags |= IOCB_DSYNC;
+
+	if (is_write)
+		ret = call_write_iter(file, &aio_cmd->iocb, &iter);
+	else
+		ret = call_read_iter(file, &aio_cmd->iocb, &iter);
+
+	kfree(bvec);
+
+	if (ret != -EIOCBQUEUED)
+		cmd_rw_aio_complete(&aio_cmd->iocb, ret, 0);
+
+	return 0;
+}
+
 static int fd_do_rw(struct se_cmd *cmd, struct file *fd,
 		    u32 block_size, struct scatterlist *sgl,
 		    u32 sgl_nents, u32 data_length, int is_write)
@@ -527,7 +605,7 @@ fd_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb)
 }
 
 static sense_reason_t
-fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
+fd_execute_rw_buffered(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 	      enum dma_data_direction data_direction)
 {
 	struct se_device *dev = cmd->se_dev;
@@ -537,16 +615,6 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 	sense_reason_t rc;
 	int ret = 0;
 	/*
-	 * We are currently limited by the number of iovecs (2048) per
-	 * single vfs_[writev,readv] call.
-	 */
-	if (cmd->data_length > FD_MAX_BYTES) {
-		pr_err("FILEIO: Not able to process I/O of %u bytes due to"
-		       "FD_MAX_BYTES: %u iovec count limitation\n",
-			cmd->data_length, FD_MAX_BYTES);
-		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
-	}
-	/*
 	 * Call vectorized fileio functions to map struct scatterlist
 	 * physical memory addresses to struct iovec virtual memory.
 	 */
@@ -620,14 +688,39 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 	return 0;
 }
 
+static sense_reason_t
+fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
+	      enum dma_data_direction data_direction)
+{
+	struct se_device *dev = cmd->se_dev;
+	struct fd_dev *fd_dev = FD_DEV(dev);
+
+	/*
+	 * We are currently limited by the number of iovecs (2048) per
+	 * single vfs_[writev,readv] call.
+	 */
+	if (cmd->data_length > FD_MAX_BYTES) {
+		pr_err("FILEIO: Not able to process I/O of %u bytes due to"
+		       "FD_MAX_BYTES: %u iovec count limitation\n",
+			cmd->data_length, FD_MAX_BYTES);
+		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+	}
+
+	if (fd_dev->fbd_flags & FDBD_HAS_ASYNC_IO)
+		return fd_execute_rw_aio(cmd, sgl, sgl_nents, data_direction);
+	return fd_execute_rw_buffered(cmd, sgl, sgl_nents, data_direction);
+}
+
 enum {
-	Opt_fd_dev_name, Opt_fd_dev_size, Opt_fd_buffered_io, Opt_err
+	Opt_fd_dev_name, Opt_fd_dev_size, Opt_fd_buffered_io,
+	Opt_fd_async_io, Opt_err
 };
 
 static match_table_t tokens = {
 	{Opt_fd_dev_name, "fd_dev_name=%s"},
 	{Opt_fd_dev_size, "fd_dev_size=%s"},
 	{Opt_fd_buffered_io, "fd_buffered_io=%d"},
+	{Opt_fd_async_io, "fd_async_io=%d"},
 	{Opt_err, NULL}
 };
 
@@ -693,6 +786,21 @@ static ssize_t fd_set_configfs_dev_params(struct se_device *dev,
 
 			fd_dev->fbd_flags |= FDBD_HAS_BUFFERED_IO_WCE;
 			break;
+		case Opt_fd_async_io:
+			ret = match_int(args, &arg);
+			if (ret)
+				goto out;
+			if (arg != 1) {
+				pr_err("bogus fd_async_io=%d value\n", arg);
+				ret = -EINVAL;
+				goto out;
+			}
+
+			pr_debug("FILEIO: Using async I/O"
+				" operations for struct fd_dev\n");
+
+			fd_dev->fbd_flags |= FDBD_HAS_ASYNC_IO;
+			break;
 		default:
 			break;
 		}
@@ -709,10 +817,11 @@ static ssize_t fd_show_configfs_dev_params(struct se_device *dev, char *b)
 	ssize_t bl = 0;
 
 	bl = sprintf(b + bl, "TCM FILEIO ID: %u", fd_dev->fd_dev_id);
-	bl += sprintf(b + bl, "        File: %s  Size: %llu  Mode: %s\n",
+	bl += sprintf(b + bl, "        File: %s  Size: %llu  Mode: %s Async: %d\n",
 		fd_dev->fd_dev_name, fd_dev->fd_dev_size,
 		(fd_dev->fbd_flags & FDBD_HAS_BUFFERED_IO_WCE) ?
-		"Buffered-WCE" : "O_DSYNC");
+		"Buffered-WCE" : "O_DSYNC",
+		!!(fd_dev->fbd_flags & FDBD_HAS_ASYNC_IO));
 	return bl;
 }
 
diff --git a/drivers/target/target_core_file.h b/drivers/target/target_core_file.h
index 53be5ffd3261..929b1ecd544e 100644
--- a/drivers/target/target_core_file.h
+++ b/drivers/target/target_core_file.h
@@ -22,6 +22,7 @@
 #define FBDF_HAS_PATH		0x01
 #define FBDF_HAS_SIZE		0x02
 #define FDBD_HAS_BUFFERED_IO_WCE 0x04
+#define FDBD_HAS_ASYNC_IO	 0x08
 #define FDBD_FORMAT_UNIT_SIZE	2048
 
 struct fd_dev {
-- 
2.13.6

--
To unsubscribe from this list: send the line "unsubscribe target-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

             reply	other threads:[~2018-03-22  6:55 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-22  6:55 Andrei Vagin [this message]
2018-03-22 17:34 ` [PATCH v2] target/file: add support of direct and async I/O Christoph Hellwig
2018-03-22 18:27 ` Bryant G. Ly
2018-03-22 21:19 ` Andrei Vagin
2018-04-19 18:29 ` Andrei Vagin
2018-05-07 18:37 ` Bryant G. Ly
2018-05-08  5:47 ` Martin K. Petersen
2018-05-08 16:26 ` Mike Christie

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180322065502.25569-1-avagin@openvz.org \
    --to=avagin@openvz.org \
    --cc=target-devel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.