All of lore.kernel.org
 help / color / mirror / Atom feed
From: Stefan Roesch <shr@fb.com>
To: <io-uring@vger.kernel.org>, <kernel-team@fb.com>,
	<linux-mm@kvack.org>, <linux-xfs@vger.kernel.org>,
	<linux-fsdevel@vger.kernel.org>
Cc: <shr@fb.com>, <david@fromorbit.com>
Subject: [RFC PATCH v1 17/18] io_uring: support write throttling for async buffered writes
Date: Tue, 26 Apr 2022 10:43:34 -0700	[thread overview]
Message-ID: <20220426174335.4004987-18-shr@fb.com> (raw)
In-Reply-To: <20220426174335.4004987-1-shr@fb.com>

This adds the process-level throttling for the block layer for async
buffered writes to io-uring. In io_write the code now checks if the write
needs to be throttled. If this is required, it adds the request to the
list of pending io requests and starts a timer. After the timer expires,
it submits the list of pending writes.

- Add new list called pending_ios for delayed writes (throttled writes)
  to struct io_uring_task. The list is protected by the task_lock spin
  lock.
- Add new timer to struct io_uring_task.

Signed-off-by: Stefan Roesch <shr@fb.com>
---
 fs/io_uring.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 92 insertions(+), 7 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index ace3a5cdda68..4af654a82366 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -485,6 +485,11 @@ struct io_ring_ctx {
  */
 #define IO_RINGFD_REG_MAX 16
 
+struct pending_list {
+	struct list_head list;
+	struct io_kiocb *req;
+};
+
 struct io_uring_task {
 	/* submission side */
 	int			cached_refs;
@@ -501,6 +506,9 @@ struct io_uring_task {
 	struct callback_head	task_work;
 	struct file		**registered_rings;
 	bool			task_running;
+
+	struct pending_list	pending_ios;
+	struct timer_list	timer;
 };
 
 /*
@@ -1193,7 +1201,7 @@ static void io_rsrc_put_work(struct work_struct *work);
 
 static void io_req_task_queue(struct io_kiocb *req);
 static void __io_submit_flush_completions(struct io_ring_ctx *ctx);
-static int io_req_prep_async(struct io_kiocb *req);
+static int io_req_prep_async(struct io_kiocb *req, bool force);
 
 static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
 				 unsigned int issue_flags, u32 slot_index);
@@ -1201,6 +1209,7 @@ static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags);
 
 static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer);
 static void io_eventfd_signal(struct io_ring_ctx *ctx);
+static void delayed_write_fn(struct timer_list *tmr);
 
 static struct kmem_cache *req_cachep;
 
@@ -2599,6 +2608,32 @@ static void io_req_task_queue_reissue(struct io_kiocb *req)
 	io_req_task_work_add(req, false);
 }
 
+static int io_req_task_queue_reissue_delayed(struct io_kiocb *req)
+{
+	struct io_uring_task *tctx = req->task->io_uring;
+	struct pending_list *pending;
+	bool empty;
+
+	pending = kmalloc(sizeof(struct pending_list), GFP_KERNEL);
+	if (!pending)
+		return -ENOMEM;
+	pending->req = req;
+
+	spin_lock_irq(&tctx->task_lock);
+	empty = list_empty(&tctx->pending_ios.list);
+	list_add_tail(&pending->list, &tctx->pending_ios.list);
+
+	if (empty) {
+		timer_setup(&tctx->timer, delayed_write_fn, 0);
+
+		tctx->timer.expires = current->bdp_pause;
+		add_timer(&tctx->timer);
+	}
+	spin_unlock_irq(&tctx->task_lock);
+
+	return 0;
+}
+
 static inline void io_queue_next(struct io_kiocb *req)
 {
 	struct io_kiocb *nxt = io_req_find_next(req);
@@ -2916,7 +2951,7 @@ static bool io_resubmit_prep(struct io_kiocb *req)
 	struct io_async_rw *rw = req->async_data;
 
 	if (!req_has_async_data(req))
-		return !io_req_prep_async(req);
+		return !io_req_prep_async(req, false);
 	iov_iter_restore(&rw->s.iter, &rw->s.iter_state);
 	return true;
 }
@@ -3938,6 +3973,38 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 	return 0;
 }
 
+static inline unsigned long write_delay(void)
+{
+	if (likely(current->bdp_nr_dirtied_pause == -1 ||
+			!time_before(jiffies, current->bdp_pause)))
+		return 0;
+
+	return current->bdp_pause;
+}
+
+static void delayed_write_fn(struct timer_list *tmr)
+{
+	struct io_uring_task *tctx = from_timer(tctx, tmr, timer);
+	struct list_head *curr;
+	struct list_head *next;
+	LIST_HEAD(pending_ios);
+
+	/* Move list to temporary list. */
+	spin_lock_irq(&tctx->task_lock);
+	list_splice_init(&tctx->pending_ios.list, &pending_ios);
+	spin_unlock_irq(&tctx->task_lock);
+
+	list_for_each_safe(curr, next, &pending_ios) {
+		struct pending_list *io;
+
+		io = list_entry(curr, struct pending_list, list);
+		io_req_task_queue_reissue(io->req);
+
+		list_del(curr);
+		kfree(io);
+	}
+}
+
 static int io_write(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_rw_state __s, *s = &__s;
@@ -3947,6 +4014,18 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
 	ssize_t ret, ret2;
 	loff_t *ppos;
 
+	/* Write throttling active? */
+	if (!(kiocb->ki_flags & IOCB_DIRECT) && unlikely(write_delay())) {
+		int ret = io_req_prep_async(req, true);
+
+		if (unlikely(ret))
+			io_req_complete_failed(req, ret);
+		else
+			ret = io_req_task_queue_reissue_delayed(req);
+
+		return ret;
+	}
+
 	if (!req_has_async_data(req)) {
 		ret = io_import_iovec(WRITE, req, &iovec, s, issue_flags);
 		if (unlikely(ret < 0))
@@ -6962,9 +7041,9 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	return -EINVAL;
 }
 
-static int io_req_prep_async(struct io_kiocb *req)
+static int io_req_prep_async(struct io_kiocb *req, bool force)
 {
-	if (!io_op_defs[req->opcode].needs_async_setup)
+	if (!force && !io_op_defs[req->opcode].needs_async_setup)
 		return 0;
 	if (WARN_ON_ONCE(req_has_async_data(req)))
 		return -EFAULT;
@@ -6974,6 +7053,10 @@ static int io_req_prep_async(struct io_kiocb *req)
 	switch (req->opcode) {
 	case IORING_OP_READV:
 		return io_rw_prep_async(req, READ);
+	case IORING_OP_WRITE:
+		if (!force)
+			break;
+		fallthrough;
 	case IORING_OP_WRITEV:
 		return io_rw_prep_async(req, WRITE);
 	case IORING_OP_SENDMSG:
@@ -6983,6 +7066,7 @@ static int io_req_prep_async(struct io_kiocb *req)
 	case IORING_OP_CONNECT:
 		return io_connect_prep_async(req);
 	}
+
 	printk_once(KERN_WARNING "io_uring: prep_async() bad opcode %d\n",
 		    req->opcode);
 	return -EFAULT;
@@ -7016,7 +7100,7 @@ static __cold void io_drain_req(struct io_kiocb *req)
 	}
 	spin_unlock(&ctx->completion_lock);
 
-	ret = io_req_prep_async(req);
+	ret = io_req_prep_async(req, false);
 	if (ret) {
 fail:
 		io_req_complete_failed(req, ret);
@@ -7550,7 +7634,7 @@ static void io_queue_sqe_fallback(struct io_kiocb *req)
 	} else if (unlikely(req->ctx->drain_active)) {
 		io_drain_req(req);
 	} else {
-		int ret = io_req_prep_async(req);
+		int ret = io_req_prep_async(req, false);
 
 		if (unlikely(ret))
 			io_req_complete_failed(req, ret);
@@ -7746,7 +7830,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
 		struct io_kiocb *head = link->head;
 
 		if (!(req->flags & REQ_F_FAIL)) {
-			ret = io_req_prep_async(req);
+			ret = io_req_prep_async(req, false);
 			if (unlikely(ret)) {
 				req_fail_link_node(req, ret);
 				if (!(head->flags & REQ_F_FAIL))
@@ -9222,6 +9306,7 @@ static __cold int io_uring_alloc_task_context(struct task_struct *task,
 	INIT_WQ_LIST(&tctx->task_list);
 	INIT_WQ_LIST(&tctx->prior_task_list);
 	init_task_work(&tctx->task_work, tctx_task_work);
+	INIT_LIST_HEAD(&tctx->pending_ios.list);
 	return 0;
 }
 
-- 
2.30.2


  parent reply	other threads:[~2022-04-26 17:44 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-26 17:43 [RFC PATCH v1 00/18] io-uring/xfs: support async buffered writes Stefan Roesch
2022-04-26 17:43 ` [RFC PATCH v1 01/18] block: add check for async buffered writes to generic_write_checks Stefan Roesch
2022-04-26 17:43 ` [RFC PATCH v1 02/18] mm: add FGP_ATOMIC flag to __filemap_get_folio() Stefan Roesch
2022-04-26 19:06   ` Matthew Wilcox
2022-04-28 19:54     ` Stefan Roesch
2022-04-26 17:43 ` [RFC PATCH v1 03/18] iomap: add iomap_page_create_gfp to allocate iomap_pages Stefan Roesch
2022-04-26 17:43 ` [RFC PATCH v1 04/18] iomap: use iomap_page_create_gfp() in __iomap_write_begin Stefan Roesch
2022-04-26 17:43 ` [RFC PATCH v1 05/18] iomap: add async buffered write support Stefan Roesch
2022-04-26 17:43 ` [RFC PATCH v1 06/18] xfs: add iomap " Stefan Roesch
2022-04-26 22:54   ` Dave Chinner
2022-04-28 20:03     ` Stefan Roesch
2022-04-28 21:44       ` Dave Chinner
2022-04-26 17:43 ` [RFC PATCH v1 07/18] fs: split off need_remove_file_privs() do_remove_file_privs() Stefan Roesch
2022-04-26 17:43 ` [RFC PATCH v1 08/18] fs: split off need_file_update_time and do_file_update_time Stefan Roesch
2022-04-26 17:43 ` [RFC PATCH v1 09/18] fs: add pending file update time flag Stefan Roesch
2022-04-26 17:43 ` [RFC PATCH v1 10/18] xfs: Enable async write file modification handling Stefan Roesch
2022-04-26 22:55   ` Dave Chinner
2022-04-27 12:07   ` Christian Brauner
2022-04-27 15:05   ` kernel test robot
2022-04-26 17:43 ` [RFC PATCH v1 11/18] xfs: add async buffered write support Stefan Roesch
2022-04-26 22:56   ` Dave Chinner
2022-04-28 19:58     ` Stefan Roesch
2022-04-28 21:54       ` Dave Chinner
2022-05-02 21:21         ` Stefan Roesch
2022-05-06  9:29           ` Dave Chinner
2022-05-09 19:32             ` Stefan Roesch
2022-05-09 23:24               ` Dave Chinner
2022-05-09 23:44                 ` Darrick J. Wong
2022-05-10  1:12                   ` Dave Chinner
2022-05-10  6:47                     ` Christoph Hellwig
2022-05-16  2:24                       ` Dave Chinner
2022-05-16 13:39                         ` Christoph Hellwig
2022-04-26 17:43 ` [RFC PATCH v1 12/18] io_uring: add support for async buffered writes Stefan Roesch
2022-04-26 17:43 ` [RFC PATCH v1 13/18] io_uring: add tracepoint for short writes Stefan Roesch
2022-04-26 17:43 ` [RFC PATCH v1 14/18] sched: add new fields to task_struct Stefan Roesch
2022-04-26 17:43 ` [RFC PATCH v1 15/18] mm: support write throttling for async buffered writes Stefan Roesch
2022-04-28 17:47   ` Jan Kara
2022-04-28 20:16     ` Stefan Roesch
2022-05-10  9:50       ` Jan Kara
2022-05-10 20:16         ` Stefan Roesch
2022-05-11 10:38           ` Jan Kara
2022-05-13 18:57             ` Stefan Roesch
2022-04-26 17:43 ` [RFC PATCH v1 16/18] iomap: User " Stefan Roesch
2022-04-26 17:43 ` Stefan Roesch [this message]
2022-04-26 17:43 ` [RFC PATCH v1 18/18] xfs: enable async buffered write support Stefan Roesch
2022-04-26 22:37 ` [RFC PATCH v1 00/18] io-uring/xfs: support async buffered writes Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220426174335.4004987-18-shr@fb.com \
    --to=shr@fb.com \
    --cc=david@fromorbit.com \
    --cc=io-uring@vger.kernel.org \
    --cc=kernel-team@fb.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.