All of lore.kernel.org
 help / color / mirror / Atom feed
From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
To: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@infradead.org>,
	linux-block <linux-block@vger.kernel.org>,
	Damien Le Moal <Damien.LeMoal@wdc.com>,
	Keith Busch <kbusch@kernel.org>,
	"linux-scsi @ vger . kernel . org" <linux-scsi@vger.kernel.org>,
	"Martin K . Petersen" <martin.petersen@oracle.com>,
	"linux-fsdevel @ vger . kernel . org"
	<linux-fsdevel@vger.kernel.org>,
	Johannes Thumshirn <johannes.thumshirn@wdc.com>
Subject: [PATCH v3 10/10] zonefs: use REQ_OP_ZONE_APPEND for sync DIO
Date: Sat, 28 Mar 2020 01:50:12 +0900	[thread overview]
Message-ID: <20200327165012.34443-11-johannes.thumshirn@wdc.com> (raw)
In-Reply-To: <20200327165012.34443-1-johannes.thumshirn@wdc.com>

Synchronous direct I/O to a sequential write only zone can be issued using
the new REQ_OP_ZONE_APPEND request operation. As dispatching multiple
BIOs can potentially result in reordering, we cannot support asynchronous
IO via this interface.

We also can only dispatch up to queue_max_zone_append_sectors() via the
new zone-append method and have to return a short write back to user-space
in case an IO larger than queue_max_zone_append_sectors() has been issued.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
 fs/zonefs/super.c | 92 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 90 insertions(+), 2 deletions(-)

diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 69aee3dfb660..b5432861d62e 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -20,6 +20,7 @@
 #include <linux/mman.h>
 #include <linux/sched/mm.h>
 #include <linux/crc32.h>
+#include <linux/task_io_accounting_ops.h>
 
 #include "zonefs.h"
 
@@ -582,6 +583,89 @@ static const struct iomap_dio_ops zonefs_write_dio_ops = {
 	.end_io			= zonefs_file_write_dio_end_io,
 };
 
+static void zonefs_zone_append_bio_endio(struct bio *bio)
+{
+	struct task_struct *waiter = bio->bi_private;
+
+	WRITE_ONCE(bio->bi_private, NULL);
+	blk_wake_io_task(waiter);
+
+	bio_release_pages(bio, false);
+	bio_put(bio);
+}
+
+static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
+{
+	struct inode *inode = file_inode(iocb->ki_filp);
+	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+	struct block_device *bdev = inode->i_sb->s_bdev;
+	ssize_t ret = 0;
+	ssize_t size;
+	struct bio *bio;
+	unsigned max;
+	int nr_pages;
+	blk_qc_t qc;
+
+	nr_pages = iov_iter_npages(from, BIO_MAX_PAGES);
+	if (!nr_pages)
+		return 0;
+
+	max = queue_max_zone_append_sectors(bdev_get_queue(bdev)) << 9;
+	max = ALIGN_DOWN(max, inode->i_sb->s_blocksize);
+	iov_iter_truncate(from, max);
+
+	bio = bio_alloc_bioset(GFP_NOFS, nr_pages, &fs_bio_set);
+	if (!bio)
+		return -ENOMEM;
+
+	bio_set_dev(bio, bdev);
+	bio->bi_iter.bi_sector = zi->i_zsector;
+	bio->bi_write_hint = iocb->ki_hint;
+	bio->bi_private = current;
+	bio->bi_end_io = zonefs_zone_append_bio_endio;
+	bio->bi_ioprio = iocb->ki_ioprio;
+	bio->bi_opf = REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE;
+	if (iocb->ki_flags & IOCB_DSYNC)
+		bio->bi_opf |= REQ_FUA;
+
+	ret = bio_iov_iter_get_pages(bio, from);
+	if (unlikely(ret)) {
+		bio->bi_status = BLK_STS_IOERR;
+		bio_endio(bio);
+		return ret;
+	}
+	size = bio->bi_iter.bi_size;
+	task_io_account_write(ret);
+
+	if (iocb->ki_flags & IOCB_HIPRI)
+		bio_set_polled(bio, iocb);
+
+	bio_get(bio);
+	qc = submit_bio(bio);
+	for (;;) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (!READ_ONCE(bio->bi_private))
+			break;
+		if (!(iocb->ki_flags & IOCB_HIPRI) ||
+		    !blk_poll(bdev_get_queue(bdev), qc, true))
+			io_schedule();
+	}
+	__set_current_state(TASK_RUNNING);
+
+	if (unlikely(bio->bi_status))
+		ret = blk_status_to_errno(bio->bi_status);
+
+	bio_put(bio);
+
+	zonefs_file_write_dio_end_io(iocb, size, ret, 0);
+	if (ret >= 0) {
+		iocb->ki_pos += size;
+		return size;
+	}
+
+	return ret;
+}
+
 /*
  * Handle direct writes. For sequential zone files, this is the only possible
  * write path. For these files, check that the user is issuing writes
@@ -599,6 +683,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
 	struct super_block *sb = inode->i_sb;
 	size_t count;
 	ssize_t ret;
+	bool sync = is_sync_kiocb(iocb);
 
 	/*
 	 * For async direct IOs to sequential zone files, refuse IOCB_NOWAIT
@@ -637,8 +722,11 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
 	}
 	mutex_unlock(&zi->i_truncate_mutex);
 
-	ret = iomap_dio_rw(iocb, from, &zonefs_iomap_ops,
-			   &zonefs_write_dio_ops, is_sync_kiocb(iocb));
+	if (sync && zi->i_ztype == ZONEFS_ZTYPE_SEQ)
+		ret = zonefs_file_dio_append(iocb, from);
+	else
+		ret = iomap_dio_rw(iocb, from, &zonefs_iomap_ops,
+				   &zonefs_write_dio_ops, sync);
 	if (zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
 	    (ret > 0 || ret == -EIOCBQUEUED)) {
 		if (ret > 0)
-- 
2.24.1


  parent reply	other threads:[~2020-03-27 16:50 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-27 16:50 [PATCH v3 00/10] Introduce Zone Append for writing to zoned block devices Johannes Thumshirn
2020-03-27 16:50 ` [PATCH v3 01/10] block: provide fallbacks for blk_queue_zone_is_seq and blk_queue_zone_no Johannes Thumshirn
2020-03-27 17:10   ` Christoph Hellwig
2020-03-27 16:50 ` [PATCH v3 02/10] block: Introduce REQ_OP_ZONE_APPEND Johannes Thumshirn
2020-03-27 17:19   ` Christoph Hellwig
2020-03-31 15:23   ` Keith Busch
2020-03-31 15:35     ` Johannes Thumshirn
2020-03-27 16:50 ` [PATCH v3 03/10] block: introduce blk_req_zone_write_trylock Johannes Thumshirn
2020-03-27 17:19   ` Christoph Hellwig
2020-03-27 16:50 ` [PATCH v3 04/10] block: Introduce zone write pointer offset caching Johannes Thumshirn
2020-03-27 17:21   ` Christoph Hellwig
2020-03-27 16:50 ` [PATCH v3 05/10] scsi: sd_zbc: factor out sanity checks for zoned commands Johannes Thumshirn
2020-03-27 17:21   ` Christoph Hellwig
2020-03-27 16:50 ` [PATCH v3 06/10] scsi: sd_zbc: emulate ZONE_APPEND commands Johannes Thumshirn
2020-03-28  8:51   ` Christoph Hellwig
2020-03-28  9:02     ` Damien Le Moal
2020-03-28  9:07       ` hch
2020-03-28  9:18         ` Damien Le Moal
2020-03-28  9:21           ` hch
2020-03-27 16:50 ` [PATCH v3 07/10] null_blk: Cleanup zoned device initialization Johannes Thumshirn
2020-03-27 17:23   ` Christoph Hellwig
2020-03-27 16:50 ` [PATCH v3 08/10] null_blk: Support REQ_OP_ZONE_APPEND Johannes Thumshirn
2020-03-27 17:26   ` Christoph Hellwig
2020-03-28  8:51     ` Damien Le Moal
2020-03-28 14:17       ` Johannes Thumshirn
2020-03-27 16:50 ` [PATCH v3 09/10] block: export bio_release_pages and bio_iov_iter_get_pages Johannes Thumshirn
2020-03-27 17:07   ` Christoph Hellwig
2020-03-27 17:13     ` Johannes Thumshirn
2020-03-27 17:22       ` hch
2020-03-27 16:50 ` Johannes Thumshirn [this message]
2020-03-27 17:10   ` [PATCH v3 10/10] zonefs: use REQ_OP_ZONE_APPEND for sync DIO Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200327165012.34443-11-johannes.thumshirn@wdc.com \
    --to=johannes.thumshirn@wdc.com \
    --cc=Damien.LeMoal@wdc.com \
    --cc=axboe@kernel.dk \
    --cc=hch@infradead.org \
    --cc=kbusch@kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.