All of lore.kernel.org
 help / color / mirror / Atom feed
From: Stefan Roesch <shr@fb.com>
To: <io-uring@vger.kernel.org>, <kernel-team@fb.com>,
	<linux-mm@kvack.org>, <linux-xfs@vger.kernel.org>,
	<linux-fsdevel@vger.kernel.org>
Cc: <shr@fb.com>, <david@fromorbit.com>
Subject: [RFC PATCH v1 15/18] mm: support write throttling for async buffered writes
Date: Tue, 26 Apr 2022 10:43:32 -0700	[thread overview]
Message-ID: <20220426174335.4004987-16-shr@fb.com> (raw)
In-Reply-To: <20220426174335.4004987-1-shr@fb.com>

This change adds support for async write throttling in the function
balance_dirty_pages(). So far if throttling was required, the code was
waiting synchronously as long as the writes were throttled. This change
introduces asynchronous throttling. Instead of waiting in the function
balance_dirty_pages(), the timeout is set in the task_struct field
bdp_pause. Once the timeout has expired, the writes are no longer
throttled.

- Add a new parameter to the balance_dirty_pages() function
  - This allows the caller to pass in the nowait flag
  - When the nowait flag is specified, the code does not wait in
    balance_dirty_pages(), but instead stores the wait expiration in the
    new task_struct field bdp_pause.

- The function balance_dirty_pages_ratelimited() resets the new values
  in the task_struct, once the timeout has expired

This change is required to support write throttling for the async
buffered writes. While the writes are throttled, io_uring still can make
progress with processing other requests.

Signed-off-by: Stefan Roesch <shr@fb.com>
---
 include/linux/writeback.h |  1 +
 mm/page-writeback.c       | 54 ++++++++++++++++++++++++++++-----------
 2 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index fec248ab1fec..48176a8047db 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -373,6 +373,7 @@ unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh);
 
 void wb_update_bandwidth(struct bdi_writeback *wb);
 void balance_dirty_pages_ratelimited(struct address_space *mapping);
+void  balance_dirty_pages_ratelimited_flags(struct address_space *mapping, bool is_async);
 bool wb_over_bg_thresh(struct bdi_writeback *wb);
 
 typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 7e2da284e427..a62aa8a4c2f2 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1546,7 +1546,7 @@ static inline void wb_dirty_limits(struct dirty_throttle_control *dtc)
  * perform some writeout.
  */
 static void balance_dirty_pages(struct bdi_writeback *wb,
-				unsigned long pages_dirtied)
+				unsigned long pages_dirtied, bool is_async)
 {
 	struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
 	struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
@@ -1780,6 +1780,14 @@ static void balance_dirty_pages(struct bdi_writeback *wb,
 					  period,
 					  pause,
 					  start_time);
+		if (is_async) {
+			if (current->bdp_nr_dirtied_pause == -1) {
+				current->bdp_pause = now + pause;
+				current->bdp_nr_dirtied_pause = nr_dirtied_pause;
+			}
+			break;
+		}
+
 		__set_current_state(TASK_KILLABLE);
 		wb->dirty_sleep = now;
 		io_schedule_timeout(pause);
@@ -1787,6 +1795,8 @@ static void balance_dirty_pages(struct bdi_writeback *wb,
 		current->dirty_paused_when = now + pause;
 		current->nr_dirtied = 0;
 		current->nr_dirtied_pause = nr_dirtied_pause;
+		current->bdp_nr_dirtied_pause = -1;
+		current->bdp_pause = 0;
 
 		/*
 		 * This is typically equal to (dirty < thresh) and can also
@@ -1851,19 +1861,7 @@ static DEFINE_PER_CPU(int, bdp_ratelimits);
  */
 DEFINE_PER_CPU(int, dirty_throttle_leaks) = 0;
 
-/**
- * balance_dirty_pages_ratelimited - balance dirty memory state
- * @mapping: address_space which was dirtied
- *
- * Processes which are dirtying memory should call in here once for each page
- * which was newly dirtied.  The function will periodically check the system's
- * dirty state and will initiate writeback if needed.
- *
- * Once we're over the dirty memory limit we decrease the ratelimiting
- * by a lot, to prevent individual processes from overshooting the limit
- * by (ratelimit_pages) each.
- */
-void balance_dirty_pages_ratelimited(struct address_space *mapping)
+void balance_dirty_pages_ratelimited_flags(struct address_space *mapping, bool is_async)
 {
 	struct inode *inode = mapping->host;
 	struct backing_dev_info *bdi = inode_to_bdi(inode);
@@ -1874,6 +1872,15 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping)
 	if (!(bdi->capabilities & BDI_CAP_WRITEBACK))
 		return;
 
+	if (current->bdp_nr_dirtied_pause != -1 && time_after(jiffies, current->bdp_pause)) {
+		current->dirty_paused_when = current->bdp_pause;
+		current->nr_dirtied = 0;
+		current->nr_dirtied_pause = current->bdp_nr_dirtied_pause;
+
+		current->bdp_nr_dirtied_pause = -1;
+		current->bdp_pause = 0;
+	}
+
 	if (inode_cgwb_enabled(inode))
 		wb = wb_get_create_current(bdi, GFP_KERNEL);
 	if (!wb)
@@ -1912,10 +1919,27 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping)
 	preempt_enable();
 
 	if (unlikely(current->nr_dirtied >= ratelimit))
-		balance_dirty_pages(wb, current->nr_dirtied);
+		balance_dirty_pages(wb, current->nr_dirtied, is_async);
 
 	wb_put(wb);
 }
+
+/**
+ * balance_dirty_pages_ratelimited - balance dirty memory state
+ * @mapping: address_space which was dirtied
+ *
+ * Processes which are dirtying memory should call in here once for each page
+ * which was newly dirtied.  The function will periodically check the system's
+ * dirty state and will initiate writeback if needed.
+ *
+ * Once we're over the dirty memory limit we decrease the ratelimiting
+ * by a lot, to prevent individual processes from overshooting the limit
+ * by (ratelimit_pages) each.
+ */
+void balance_dirty_pages_ratelimited(struct address_space *mapping)
+{
+	balance_dirty_pages_ratelimited_flags(mapping, false);
+}
 EXPORT_SYMBOL(balance_dirty_pages_ratelimited);
 
 /**
-- 
2.30.2


  parent reply	other threads:[~2022-04-26 17:44 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-26 17:43 [RFC PATCH v1 00/18] io-uring/xfs: support async buffered writes Stefan Roesch
2022-04-26 17:43 ` [RFC PATCH v1 01/18] block: add check for async buffered writes to generic_write_checks Stefan Roesch
2022-04-26 17:43 ` [RFC PATCH v1 02/18] mm: add FGP_ATOMIC flag to __filemap_get_folio() Stefan Roesch
2022-04-26 19:06   ` Matthew Wilcox
2022-04-28 19:54     ` Stefan Roesch
2022-04-26 17:43 ` [RFC PATCH v1 03/18] iomap: add iomap_page_create_gfp to allocate iomap_pages Stefan Roesch
2022-04-26 17:43 ` [RFC PATCH v1 04/18] iomap: use iomap_page_create_gfp() in __iomap_write_begin Stefan Roesch
2022-04-26 17:43 ` [RFC PATCH v1 05/18] iomap: add async buffered write support Stefan Roesch
2022-04-26 17:43 ` [RFC PATCH v1 06/18] xfs: add iomap " Stefan Roesch
2022-04-26 22:54   ` Dave Chinner
2022-04-28 20:03     ` Stefan Roesch
2022-04-28 21:44       ` Dave Chinner
2022-04-26 17:43 ` [RFC PATCH v1 07/18] fs: split off need_remove_file_privs() do_remove_file_privs() Stefan Roesch
2022-04-26 17:43 ` [RFC PATCH v1 08/18] fs: split off need_file_update_time and do_file_update_time Stefan Roesch
2022-04-26 17:43 ` [RFC PATCH v1 09/18] fs: add pending file update time flag Stefan Roesch
2022-04-26 17:43 ` [RFC PATCH v1 10/18] xfs: Enable async write file modification handling Stefan Roesch
2022-04-26 22:55   ` Dave Chinner
2022-04-27 12:07   ` Christian Brauner
2022-04-27 15:05   ` kernel test robot
2022-04-26 17:43 ` [RFC PATCH v1 11/18] xfs: add async buffered write support Stefan Roesch
2022-04-26 22:56   ` Dave Chinner
2022-04-28 19:58     ` Stefan Roesch
2022-04-28 21:54       ` Dave Chinner
2022-05-02 21:21         ` Stefan Roesch
2022-05-06  9:29           ` Dave Chinner
2022-05-09 19:32             ` Stefan Roesch
2022-05-09 23:24               ` Dave Chinner
2022-05-09 23:44                 ` Darrick J. Wong
2022-05-10  1:12                   ` Dave Chinner
2022-05-10  6:47                     ` Christoph Hellwig
2022-05-16  2:24                       ` Dave Chinner
2022-05-16 13:39                         ` Christoph Hellwig
2022-04-26 17:43 ` [RFC PATCH v1 12/18] io_uring: add support for async buffered writes Stefan Roesch
2022-04-26 17:43 ` [RFC PATCH v1 13/18] io_uring: add tracepoint for short writes Stefan Roesch
2022-04-26 17:43 ` [RFC PATCH v1 14/18] sched: add new fields to task_struct Stefan Roesch
2022-04-26 17:43 ` Stefan Roesch [this message]
2022-04-28 17:47   ` [RFC PATCH v1 15/18] mm: support write throttling for async buffered writes Jan Kara
2022-04-28 20:16     ` Stefan Roesch
2022-05-10  9:50       ` Jan Kara
2022-05-10 20:16         ` Stefan Roesch
2022-05-11 10:38           ` Jan Kara
2022-05-13 18:57             ` Stefan Roesch
2022-04-26 17:43 ` [RFC PATCH v1 16/18] iomap: User " Stefan Roesch
2022-04-26 17:43 ` [RFC PATCH v1 17/18] io_uring: support write " Stefan Roesch
2022-04-26 17:43 ` [RFC PATCH v1 18/18] xfs: enable async buffered write support Stefan Roesch
2022-04-26 22:37 ` [RFC PATCH v1 00/18] io-uring/xfs: support async buffered writes Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220426174335.4004987-16-shr@fb.com \
    --to=shr@fb.com \
    --cc=david@fromorbit.com \
    --cc=io-uring@vger.kernel.org \
    --cc=kernel-team@fb.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.