All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kanchan Joshi <joshi.k@samsung.com>
To: axboe@kernel.dk, viro@zeniv.linux.org.uk, bcrl@kvack.org
Cc: willy@infradead.org, hch@infradead.org, Damien.LeMoal@wdc.com,
	asml.silence@gmail.com, linux-fsdevel@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-aio@kvack.org,
	io-uring@vger.kernel.org, linux-block@vger.kernel.org,
	linux-api@vger.kernel.org,
	SelvaKumar S <selvakuma.s1@samsung.com>,
	Kanchan Joshi <joshi.k@samsung.com>,
	Nitesh Shetty <nj.shetty@samsung.com>,
	Javier Gonzalez <javier.gonz@samsung.com>
Subject: [PATCH v4 6/6] io_uring: add support for zone-append
Date: Fri, 24 Jul 2020 21:19:22 +0530	[thread overview]
Message-ID: <1595605762-17010-7-git-send-email-joshi.k@samsung.com> (raw)
In-Reply-To: <1595605762-17010-1-git-send-email-joshi.k@samsung.com>

From: SelvaKumar S <selvakuma.s1@samsung.com>

Repurpose [cqe->res, cqe->flags] into cqe->res64 (signed) to report
64bit written-offset for zone-append. The appending-write which requires
reporting written-location (conveyed by IOCB_ZONE_APPEND flag) is
ensured not to be a short-write; this avoids the need to report
number-of-bytes-copied.
append-offset is returned by lower-layer to io-uring via ret2 of
ki_complete interface. Make changes to collect it and send to user-space
via cqe->res64.

Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com>
Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
Signed-off-by: Javier Gonzalez <javier.gonz@samsung.com>
---
 fs/io_uring.c                 | 49 ++++++++++++++++++++++++++++++++++++-------
 include/uapi/linux/io_uring.h |  9 ++++++--
 2 files changed, 48 insertions(+), 10 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 7809ab2..6510cf5 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -401,7 +401,14 @@ struct io_rw {
 	/* NOTE: kiocb has the file as the first member, so don't do it here */
 	struct kiocb			kiocb;
 	u64				addr;
-	u64				len;
+	union {
+		/*
+		 * len is used only during submission.
+		 * append_offset is used only during completion.
+		 */
+		u64			len;
+		u64			append_offset;
+	};
 };
 
 struct io_connect {
@@ -541,6 +548,7 @@ enum {
 	REQ_F_NO_FILE_TABLE_BIT,
 	REQ_F_QUEUE_TIMEOUT_BIT,
 	REQ_F_WORK_INITIALIZED_BIT,
+	REQ_F_ZONE_APPEND_BIT,
 
 	/* not a real bit, just to check we're not overflowing the space */
 	__REQ_F_LAST_BIT,
@@ -598,6 +606,8 @@ enum {
 	REQ_F_QUEUE_TIMEOUT	= BIT(REQ_F_QUEUE_TIMEOUT_BIT),
 	/* io_wq_work is initialized */
 	REQ_F_WORK_INITIALIZED	= BIT(REQ_F_WORK_INITIALIZED_BIT),
+	/* to return zone append offset */
+	REQ_F_ZONE_APPEND = BIT(REQ_F_ZONE_APPEND_BIT),
 };
 
 struct async_poll {
@@ -1244,8 +1254,15 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
 		req->flags &= ~REQ_F_OVERFLOW;
 		if (cqe) {
 			WRITE_ONCE(cqe->user_data, req->user_data);
-			WRITE_ONCE(cqe->res, req->result);
-			WRITE_ONCE(cqe->flags, req->cflags);
+			if (unlikely(req->flags & REQ_F_ZONE_APPEND)) {
+				if (likely(req->result > 0))
+					WRITE_ONCE(cqe->res64, req->rw.append_offset);
+				else
+					WRITE_ONCE(cqe->res64, req->result);
+			} else {
+				WRITE_ONCE(cqe->res, req->result);
+				WRITE_ONCE(cqe->flags, req->cflags);
+			}
 		} else {
 			WRITE_ONCE(ctx->rings->cq_overflow,
 				atomic_inc_return(&ctx->cached_cq_overflow));
@@ -1284,8 +1301,15 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
 	cqe = io_get_cqring(ctx);
 	if (likely(cqe)) {
 		WRITE_ONCE(cqe->user_data, req->user_data);
-		WRITE_ONCE(cqe->res, res);
-		WRITE_ONCE(cqe->flags, cflags);
+		if (unlikely(req->flags & REQ_F_ZONE_APPEND)) {
+			if (likely(res > 0))
+				WRITE_ONCE(cqe->res64, req->rw.append_offset);
+			else
+				WRITE_ONCE(cqe->res64, res);
+		} else {
+			WRITE_ONCE(cqe->res, res);
+			WRITE_ONCE(cqe->flags, cflags);
+		}
 	} else if (ctx->cq_overflow_flushed) {
 		WRITE_ONCE(ctx->rings->cq_overflow,
 				atomic_inc_return(&ctx->cached_cq_overflow));
@@ -1943,7 +1967,7 @@ static inline void req_set_fail_links(struct io_kiocb *req)
 		req->flags |= REQ_F_FAIL_LINK;
 }
 
-static void io_complete_rw_common(struct kiocb *kiocb, long res)
+static void io_complete_rw_common(struct kiocb *kiocb, long res, long long res2)
 {
 	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
 	int cflags = 0;
@@ -1955,6 +1979,9 @@ static void io_complete_rw_common(struct kiocb *kiocb, long res)
 		req_set_fail_links(req);
 	if (req->flags & REQ_F_BUFFER_SELECTED)
 		cflags = io_put_kbuf(req);
+	if (req->flags & REQ_F_ZONE_APPEND)
+		req->rw.append_offset = res2;
+
 	__io_cqring_add_event(req, res, cflags);
 }
 
@@ -1962,7 +1989,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long long res2)
 {
 	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
 
-	io_complete_rw_common(kiocb, res);
+	io_complete_rw_common(kiocb, res, res2);
 	io_put_req(req);
 }
 
@@ -1976,8 +2003,11 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long long res2)
 	if (res != req->result)
 		req_set_fail_links(req);
 	req->result = res;
-	if (res != -EAGAIN)
+	if (res != -EAGAIN) {
+		if (req->flags & REQ_F_ZONE_APPEND)
+			req->rw.append_offset =  res2;
 		WRITE_ONCE(req->iopoll_completed, 1);
+	}
 }
 
 /*
@@ -2739,6 +2769,9 @@ static int io_write(struct io_kiocb *req, bool force_nonblock)
 						SB_FREEZE_WRITE);
 		}
 		kiocb->ki_flags |= IOCB_WRITE;
+		/* zone-append requires few extra steps during completion */
+		if (kiocb->ki_flags & IOCB_ZONE_APPEND)
+			req->flags |= REQ_F_ZONE_APPEND;
 
 		if (!force_nonblock)
 			current->signal->rlim[RLIMIT_FSIZE].rlim_cur = req->fsize;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 92c2269..2580d93 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -156,8 +156,13 @@ enum {
  */
 struct io_uring_cqe {
 	__u64	user_data;	/* sqe->data submission passed back */
-	__s32	res;		/* result code for this event */
-	__u32	flags;
+	union {
+		struct {
+			__s32	res;	/* result code for this event */
+			__u32	flags;
+		};
+		__s64	res64;	/* appending offset for zone append */
+	};
 };
 
 /*
-- 
2.7.4


  parent reply	other threads:[~2020-07-24 16:24 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CGME20200724155244epcas5p2902f57e36e490ee8772da19aa9408cdc@epcas5p2.samsung.com>
2020-07-24 15:49 ` [PATCH v4 0/6] zone-append support in io-uring and aio Kanchan Joshi
     [not found]   ` <CGME20200724155258epcas5p1a75b926950a18cd1e6c8e7a047e6c589@epcas5p1.samsung.com>
2020-07-24 15:49     ` [PATCH v4 1/6] fs: introduce FMODE_ZONE_APPEND and IOCB_ZONE_APPEND Kanchan Joshi
2020-07-24 16:34       ` Jens Axboe
2020-07-26 15:18       ` Christoph Hellwig
2020-07-28  1:49         ` Matthew Wilcox
2020-07-28  7:26           ` Christoph Hellwig
     [not found]   ` <CGME20200724155324epcas5p18e1d3b4402d1e4a8eca87d0b56a3fa9b@epcas5p1.samsung.com>
2020-07-24 15:49     ` [PATCH v4 2/6] fs: change ki_complete interface to support 64bit ret2 Kanchan Joshi
2020-07-26 15:18       ` Christoph Hellwig
     [not found]   ` <CGME20200724155329epcas5p345ba6bad0b8fe18056bb4bcd26c10019@epcas5p3.samsung.com>
2020-07-24 15:49     ` [PATCH v4 3/6] uio: return status with iov truncation Kanchan Joshi
     [not found]   ` <CGME20200724155341epcas5p15bfc55927f2abb60f19784270fe8e377@epcas5p1.samsung.com>
2020-07-24 15:49     ` [PATCH v4 4/6] block: add zone append handling for direct I/O path Kanchan Joshi
2020-07-26 15:19       ` Christoph Hellwig
     [not found]   ` <CGME20200724155346epcas5p2cfb383fe9904a45280c6145f4c13e1b4@epcas5p2.samsung.com>
2020-07-24 15:49     ` [PATCH v4 5/6] block: enable zone-append for iov_iter of bvec type Kanchan Joshi
2020-07-26 15:20       ` Christoph Hellwig
     [not found]   ` <CGME20200724155350epcas5p3b8f1d59eda7f8fbb38c828f692d42fd6@epcas5p3.samsung.com>
2020-07-24 15:49     ` Kanchan Joshi [this message]
2020-07-24 16:29       ` [PATCH v4 6/6] io_uring: add support for zone-append Jens Axboe
2020-07-27 19:16         ` Kanchan Joshi
2020-07-27 20:34           ` Jens Axboe
2020-07-30 16:08             ` Pavel Begunkov
2020-07-30 16:13               ` Jens Axboe
2020-07-30 16:26                 ` Pavel Begunkov
2020-07-30 17:16                   ` Jens Axboe
2020-07-30 17:38                     ` Pavel Begunkov
2020-07-30 17:51                       ` Kanchan Joshi
2020-07-30 17:54                         ` Jens Axboe
2020-07-30 18:25                           ` Kanchan Joshi
2020-07-31  6:42                             ` Damien Le Moal
2020-07-31  6:45                               ` hch
2020-07-31  6:59                                 ` Damien Le Moal
2020-07-31  7:58                                   ` Kanchan Joshi
2020-07-31  8:14                                     ` Damien Le Moal
2020-07-31  9:14                                       ` hch
2020-07-31  9:34                                         ` Damien Le Moal
2020-07-31  9:41                                           ` hch
2020-07-31 10:16                                             ` Damien Le Moal
2020-07-31 12:51                                               ` hch
2020-07-31 13:08                                                 ` hch
2020-07-31 15:07                                                   ` Kanchan Joshi
2022-03-02 20:47                                                   ` Luis Chamberlain
2020-08-05  7:35                                                 ` Damien Le Moal
2020-08-14  8:14                                                   ` hch
2020-08-14  8:27                                                     ` Damien Le Moal
2020-08-14 12:04                                                       ` hch
2020-08-14 12:20                                                         ` Damien Le Moal
2020-09-07  7:01                                                     ` Kanchan Joshi
2020-09-08 15:18                                                       ` hch
2020-09-24 17:19                                                         ` Kanchan Joshi
2020-09-25  2:52                                                           ` Damien Le Moal
2020-09-28 18:58                                                             ` Kanchan Joshi
2020-09-29  1:24                                                               ` Damien Le Moal
2020-09-29 18:49                                                                 ` Kanchan Joshi
2022-03-02 20:43                                                         ` Luis Chamberlain
2020-07-31  9:38                                       ` Kanchan Joshi
2022-03-02 20:51                                 ` Luis Chamberlain
2020-07-31  7:08                               ` Kanchan Joshi
2020-07-30 15:57       ` Pavel Begunkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1595605762-17010-7-git-send-email-joshi.k@samsung.com \
    --to=joshi.k@samsung.com \
    --cc=Damien.LeMoal@wdc.com \
    --cc=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=bcrl@kvack.org \
    --cc=hch@infradead.org \
    --cc=io-uring@vger.kernel.org \
    --cc=javier.gonz@samsung.com \
    --cc=linux-aio@kvack.org \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=nj.shetty@samsung.com \
    --cc=selvakuma.s1@samsung.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.