linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jens Axboe <axboe@kernel.dk>
To: linux-fsdevel <linux-fsdevel@vger.kernel.org>,
	"linux-block@vger.kernel.org" <linux-block@vger.kernel.org>
Subject: [PATCH] io_uring: add support for barrier fsync
Date: Tue, 9 Apr 2019 10:27:43 -0600	[thread overview]
Message-ID: <7c7276e4-8ffa-495a-6abf-926a58ee899e@kernel.dk> (raw)

It's a quite common use case to issue a bunch of writes, then an fsync
or fdatasync when they complete. Since io_uring doesn't guarantee any
type of ordering, the application must track issued writes and wait
with the fsync issue until they have completed.

Add an IORING_FSYNC_BARRIER flag that helps with this so the application
doesn't have to do this manually. If this flag is set for the fsync
request, we won't issue it until pending IO has already completed.

Signed-off-by: Jens Axboe <axboe@kernel.dk>

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 07d6ef195d05..08f1e5766554 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -172,6 +172,7 @@ struct io_ring_ctx {
 		 */
 		struct list_head	poll_list;
 		struct list_head	cancel_list;
+		struct list_head	fsync_list;
 	} ____cacheline_aligned_in_smp;
 
 	struct async_list	pending_async[2];
@@ -202,6 +203,11 @@ struct io_poll_iocb {
 	struct wait_queue_entry		wait;
 };
 
+struct io_fsync_iocb {
+	struct file			*file;
+	unsigned			sequence;
+};
+
 /*
  * NOTE! Each of the iocb union members has the file pointer
  * as the first entry in their struct definition. So you can
@@ -213,6 +219,7 @@ struct io_kiocb {
 		struct file		*file;
 		struct kiocb		rw;
 		struct io_poll_iocb	poll;
+		struct io_fsync_iocb	fsync;
 	};
 
 	struct sqe_submit	submit;
@@ -255,6 +262,8 @@ struct io_submit_state {
 	unsigned int		ios_left;
 };
 
+static void io_sq_wq_submit_work(struct work_struct *work);
+
 static struct kmem_cache *req_cachep;
 
 static const struct file_operations io_uring_fops;
@@ -306,10 +315,32 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	spin_lock_init(&ctx->completion_lock);
 	INIT_LIST_HEAD(&ctx->poll_list);
 	INIT_LIST_HEAD(&ctx->cancel_list);
+	INIT_LIST_HEAD(&ctx->fsync_list);
 	return ctx;
 }
 
-static void io_commit_cqring(struct io_ring_ctx *ctx)
+static inline bool io_sequence_defer(struct io_ring_ctx *ctx, unsigned seq)
+{
+	return seq > ctx->cached_cq_tail + ctx->sq_ring->dropped;
+}
+
+static struct io_kiocb *io_get_ready_fsync(struct io_ring_ctx *ctx)
+{
+	struct io_kiocb *req;
+
+	if (list_empty(&ctx->fsync_list))
+		return NULL;
+
+	req = list_first_entry(&ctx->fsync_list, struct io_kiocb, list);
+	if (!io_sequence_defer(ctx, req->fsync.sequence)) {
+		list_del_init(&req->list);
+		return req;
+	}
+
+	return NULL;
+}
+
+static void __io_commit_cqring(struct io_ring_ctx *ctx)
 {
 	struct io_cq_ring *ring = ctx->cq_ring;
 
@@ -330,6 +361,16 @@ static void io_commit_cqring(struct io_ring_ctx *ctx)
 	}
 }
 
+static void io_commit_cqring(struct io_ring_ctx *ctx)
+{
+	struct io_kiocb *req;
+
+	__io_commit_cqring(ctx);
+
+	while ((req = io_get_ready_fsync(ctx)) != NULL)
+		queue_work(ctx->sqo_wq, &req->work);
+}
+
 static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
 {
 	struct io_cq_ring *ring = ctx->cq_ring;
@@ -1073,9 +1114,39 @@ static int io_nop(struct io_kiocb *req, u64 user_data)
 	return 0;
 }
 
-static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+static int io_fsync_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+	struct io_ring_ctx *ctx = req->ctx;
+	struct io_uring_sqe *sqe_copy;
+
+	if (!io_sequence_defer(ctx, req->fsync.sequence))
+		return 0;
+
+	sqe_copy = kmalloc(sizeof(*sqe_copy), GFP_KERNEL);
+	if (!sqe_copy)
+		return -EAGAIN;
+
+	spin_lock_irq(&ctx->completion_lock);
+	if (!io_sequence_defer(ctx, req->fsync.sequence)) {
+		spin_unlock_irq(&ctx->completion_lock);
+		kfree(sqe_copy);
+		return 0;
+	}
+
+	memcpy(sqe_copy, sqe, sizeof(*sqe_copy));
+	req->submit.sqe = sqe_copy;
+
+	INIT_WORK(&req->work, io_sq_wq_submit_work);
+	list_add_tail(&req->list, &ctx->fsync_list);
+	spin_unlock_irq(&ctx->completion_lock);
+	return -EIOCBQUEUED;
+}
+
+static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+			 unsigned fsync_flags)
 {
 	struct io_ring_ctx *ctx = req->ctx;
+	int ret = 0;
 
 	if (!req->file)
 		return -EBADF;
@@ -1088,8 +1159,13 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
 		return -EINVAL;
 
+	if (fsync_flags & IORING_FSYNC_BARRIER) {
+		req->fsync.sequence = ctx->cached_sq_head - 1;
+		ret = io_fsync_defer(req, sqe);
+	}
+
 	req->flags |= REQ_F_PREPPED;
-	return 0;
+	return ret;
 }
 
 static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
@@ -1102,12 +1178,15 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 	int ret;
 
 	fsync_flags = READ_ONCE(sqe->fsync_flags);
-	if (unlikely(fsync_flags & ~IORING_FSYNC_DATASYNC))
+	if (unlikely(fsync_flags & ~(IORING_FSYNC_DATASYNC|IORING_FSYNC_BARRIER)))
 		return -EINVAL;
 
-	ret = io_prep_fsync(req, sqe);
-	if (ret)
+	ret = io_prep_fsync(req, sqe, fsync_flags);
+	if (ret) {
+		if (ret == -EIOCBQUEUED)
+			return 0;
 		return ret;
+	}
 
 	/* fsync always requires a blocking context */
 	if (force_nonblock)
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index e23408692118..57b8f4d57af6 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -59,6 +59,7 @@ struct io_uring_sqe {
  * sqe->fsync_flags
  */
 #define IORING_FSYNC_DATASYNC	(1U << 0)
+#define IORING_FSYNC_BARRIER	(1U << 1)
 
 /*
  * IO completion data structure (Completion Queue Entry)

-- 
Jens Axboe


             reply	other threads:[~2019-04-09 16:27 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-09 16:27 Jens Axboe [this message]
2019-04-09 18:17 ` [PATCH] io_uring: add support for barrier fsync Christoph Hellwig
2019-04-09 18:23   ` Jens Axboe
2019-04-09 18:42     ` Chris Mason
2019-04-09 18:46       ` Jens Axboe
2019-04-09 18:56         ` Chris Mason
2019-04-11 11:05         ` Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7c7276e4-8ffa-495a-6abf-926a58ee899e@kernel.dk \
    --to=axboe@kernel.dk \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).