All of lore.kernel.org
 help / color / mirror / Atom feed
From: Xie Yongmei <yongmeixie@hotmail.com>
To: Andrew Morton <akpm@linux-foundation.org>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	linux-fsdevel@vger.kernel.org
Cc: yongmeixie@hotmail.comc, Xie Yongmei <yongmeixie@hotmail.com>
Subject: [PATCH 3/3] writeback: specify writeback period and expire interval per memcg
Date: Wed, 27 Apr 2022 05:32:41 -0400	[thread overview]
Message-ID: <TYYP286MB1115331A1F4852D7CA3E86A2C5FA9@TYYP286MB1115.JPNP286.PROD.OUTLOOK.COM> (raw)
In-Reply-To: <20220427093241.108281-1-yongmeixie@hotmail.com>

dirty_writeback_interval: dirty wakeup period
dirty_expire_interval: expire period

This patch provides per memcg setttings for writeback interval.

Dirty writeback could be triggered in the below ways:
  - mark_inode_dirty: when the first time of dirtying pages for this inode,
		it tries to wakeup the callback hook wb_workfn in
		wakeup period later.
  - wb_workfn: if there're more writeback works to do, it would wakeup the
		callback hook wb_workfn in another wakeup period later.
  - external event: kswad found dirty pages piled up at the end of inactive
		list or desktop mode timer.
  - buffered write context: balance_dirty_pages tries to wake up background
		writeback once dirty pages above freerun level of pages.
  - sync context: sync(fs sync) writeback immediately

No matter how writeback is triggered, wb_workfn is the unique callback hook
to manipulate the flushing things. Actually, wb_check_old_data_flush
handles the period writeback and decides the scope of dirty pages which
have to be written back because they were too old.

Signed-off-by: Xie Yongmei <yongmeixie@hotmail.com>
---
 fs/fs-writeback.c          |  11 ++--
 include/linux/memcontrol.h |  16 ++++++
 mm/backing-dev.c           |   4 +-
 mm/memcontrol.c            | 114 +++++++++++++++++++++++++++++++++++++
 4 files changed, 140 insertions(+), 5 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 591fe9cf1659..f59e4709ec39 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1980,6 +1980,7 @@ static long wb_writeback(struct bdi_writeback *wb,
 	struct inode *inode;
 	long progress;
 	struct blk_plug plug;
+	unsigned int dirty_expire = wb_dirty_expire_interval(wb);
 
 	blk_start_plug(&plug);
 	spin_lock(&wb->list_lock);
@@ -2015,7 +2016,7 @@ static long wb_writeback(struct bdi_writeback *wb,
 		 */
 		if (work->for_kupdate) {
 			dirtied_before = jiffies -
-				msecs_to_jiffies(dirty_expire_interval * 10);
+				msecs_to_jiffies(dirty_expire * 10);
 		} else if (work->for_background)
 			dirtied_before = jiffies;
 
@@ -2101,15 +2102,16 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
 {
 	unsigned long expired;
 	long nr_pages;
+	unsigned int writeback_interval = wb_dirty_writeback_interval(wb);
 
 	/*
 	 * When set to zero, disable periodic writeback
 	 */
-	if (!dirty_writeback_interval)
+	if (!writeback_interval)
 		return 0;
 
 	expired = wb->last_old_flush +
-			msecs_to_jiffies(dirty_writeback_interval * 10);
+			msecs_to_jiffies(writeback_interval * 10);
 	if (time_before(jiffies, expired))
 		return 0;
 
@@ -2194,6 +2196,7 @@ void wb_workfn(struct work_struct *work)
 	struct bdi_writeback *wb = container_of(to_delayed_work(work),
 						struct bdi_writeback, dwork);
 	long pages_written;
+	unsigned int writeback_interval = wb_dirty_writeback_interval(wb);
 
 	set_worker_desc("flush-%s", bdi_dev_name(wb->bdi));
 
@@ -2222,7 +2225,7 @@ void wb_workfn(struct work_struct *work)
 
 	if (!list_empty(&wb->work_list))
 		wb_wakeup(wb);
-	else if (wb_has_dirty_io(wb) && dirty_writeback_interval)
+	else if (wb_has_dirty_io(wb) && writeback_interval)
 		wb_wakeup_delayed(wb);
 }
 
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 386fc9b70c95..c1dc88bb8f80 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -347,6 +347,8 @@ struct mem_cgroup {
 #ifdef CONFIG_CGROUP_WRITEBACK_PARA
 	int dirty_background_ratio;
 	int dirty_ratio;
+	int dirty_writeback_interval;
+	int dirty_expire_interval;
 #endif
 
 	struct mem_cgroup_per_node *nodeinfo[];
@@ -1642,6 +1644,8 @@ static inline void mem_cgroup_flush_foreign(struct bdi_writeback *wb)
 #ifdef CONFIG_CGROUP_WRITEBACK_PARA
 unsigned int wb_dirty_background_ratio(struct bdi_writeback *wb);
 unsigned int wb_dirty_ratio(struct bdi_writeback *wb);
+unsigned int wb_dirty_writeback_interval(struct bdi_writeback *wb);
+unsigned int wb_dirty_expire_interval(struct bdi_writeback *wb);
 #else
 static inline
 unsigned int wb_dirty_background_ratio(struct bdi_writeback *wb)
@@ -1654,6 +1658,18 @@ unsigned int wb_dirty_ratio(struct bdi_writeback *wb)
 {
 	return vm_dirty_ratio;
 }
+
+static inline
+unsigned int wb_dirty_writeback_interval(struct bdi_writeback *wb)
+{
+	return dirty_writeback_interval;
+}
+
+static inline
+unsigned int wb_dirty_expire_interval(struct bdi_writeback *wb)
+{
+	return dirty_expire_interval;
+}
 #endif
 
 struct sock;
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 7176af65b103..685558362ad8 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -15,6 +15,7 @@
 #include <linux/writeback.h>
 #include <linux/device.h>
 #include <trace/events/writeback.h>
+#include <linux/memcontrol.h>
 
 struct backing_dev_info noop_backing_dev_info;
 EXPORT_SYMBOL_GPL(noop_backing_dev_info);
@@ -264,8 +265,9 @@ subsys_initcall(default_bdi_init);
 void wb_wakeup_delayed(struct bdi_writeback *wb)
 {
 	unsigned long timeout;
+	unsigned int dirty_interval = wb_dirty_writeback_interval(wb);
 
-	timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
+	timeout = msecs_to_jiffies(dirty_interval * 10);
 	spin_lock_bh(&wb->work_lock);
 	if (test_bit(WB_registered, &wb->state))
 		queue_delayed_work(bdi_wq, &wb->dwork, timeout);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b1c1b150637a..c392aec22e2e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4851,17 +4851,49 @@ unsigned int wb_dirty_ratio(struct bdi_writeback *wb)
 	return memcg->dirty_ratio;
 }
 
+unsigned int wb_dirty_writeback_interval(struct bdi_writeback *wb)
+{
+	struct mem_cgroup *memcg;
+
+	if (mem_cgroup_disabled() || !wb)
+		return dirty_writeback_interval;
+
+	memcg = mem_cgroup_from_css(wb->memcg_css);
+	if (memcg == root_mem_cgroup || memcg->dirty_writeback_interval < 0)
+		return dirty_writeback_interval;
+
+	return memcg->dirty_writeback_interval;
+}
+
+unsigned int wb_dirty_expire_interval(struct bdi_writeback *wb)
+{
+	struct mem_cgroup *memcg;
+
+	if (mem_cgroup_disabled() || !wb)
+		return dirty_expire_interval;
+
+	memcg = mem_cgroup_from_css(wb->memcg_css);
+	if (memcg == root_mem_cgroup || memcg->dirty_expire_interval < 0)
+		return dirty_expire_interval;
+
+	return memcg->dirty_expire_interval;
+}
+
 static void wb_memcg_inherit_from_parent(struct mem_cgroup *parent,
 					 struct mem_cgroup *memcg)
 {
 	memcg->dirty_background_ratio = parent->dirty_background_ratio;
 	memcg->dirty_ratio = parent->dirty_ratio;
+	memcg->dirty_writeback_interval = parent->dirty_writeback_interval;
+	memcg->dirty_expire_interval = parent->dirty_expire_interval;
 }
 
 static void wb_memcg_init(struct mem_cgroup *memcg)
 {
 	memcg->dirty_background_ratio = -1;
 	memcg->dirty_ratio = -1;
+	memcg->dirty_writeback_interval = -1;
+	memcg->dirty_expire_interval = -1;
 }
 
 static int mem_cgroup_dirty_background_ratio_show(struct seq_file *m, void *v)
@@ -4918,6 +4950,64 @@ mem_cgroup_dirty_ratio_write(struct kernfs_open_file *of,
 	memcg->dirty_ratio = dirty_ratio;
 	return nbytes;
 }
+
+static int mem_cgroup_dirty_writeback_interval_show(struct seq_file *m, void *v)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+
+	seq_printf(m, "%d\n", memcg->dirty_writeback_interval);
+	return 0;
+}
+
+static ssize_t
+mem_cgroup_dirty_writeback_interval_write(struct kernfs_open_file *of,
+					  char *buf, size_t nbytes,
+					  loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	int ret, writeback_interval;
+
+	buf = strstrip(buf);
+	ret = kstrtoint(buf, 0, &writeback_interval);
+	if (ret)
+		return ret;
+
+	if (writeback_interval < -1)
+		return -EINVAL;
+
+	if (memcg->dirty_writeback_interval != writeback_interval) {
+		memcg->dirty_writeback_interval = writeback_interval;
+		wakeup_flusher_threads(WB_REASON_PERIODIC);
+	}
+	return nbytes;
+}
+
+static int mem_cgroup_dirty_expire_interval_show(struct seq_file *m, void *v)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+
+	seq_printf(m, "%d\n", memcg->dirty_expire_interval);
+	return 0;
+}
+
+static ssize_t
+mem_cgroup_dirty_expire_interval_write(struct kernfs_open_file *of,
+				       char *buf, size_t nbytes, loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	int ret, expire_interval;
+
+	buf = strstrip(buf);
+	ret = kstrtoint(buf, 0, &expire_interval);
+	if (ret)
+		return ret;
+
+	if (expire_interval < -1)
+		return -EINVAL;
+
+	memcg->dirty_expire_interval = expire_interval;
+	return nbytes;
+}
 #else
 static void wb_memcg_inherit_from_parent(struct mem_cgroup *parent,
 					 struct mem_cgroup *memcg)
@@ -5067,6 +5157,18 @@ static struct cftype mem_cgroup_legacy_files[] = {
 		.seq_show = mem_cgroup_dirty_ratio_show,
 		.write = mem_cgroup_dirty_ratio_write,
 	},
+	{
+		.name = "dirty_writeback_interval_centisecs",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = mem_cgroup_dirty_writeback_interval_show,
+		.write = mem_cgroup_dirty_writeback_interval_write,
+	},
+	{
+		.name = "dirty_expire_interval_centisecs",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = mem_cgroup_dirty_expire_interval_show,
+		.write = mem_cgroup_dirty_expire_interval_write,
+	},
 #endif
 	{ },	/* terminate */
 };
@@ -6549,6 +6651,18 @@ static struct cftype memory_files[] = {
 		.seq_show = mem_cgroup_dirty_ratio_show,
 		.write = mem_cgroup_dirty_ratio_write,
 	},
+	{
+		.name = "dirty_writeback_interval_centisecs",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = mem_cgroup_dirty_writeback_interval_show,
+		.write = mem_cgroup_dirty_writeback_interval_write,
+	},
+	{
+		.name = "dirty_expire_interval_centisecs",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = mem_cgroup_dirty_expire_interval_show,
+		.write = mem_cgroup_dirty_expire_interval_write,
+	},
 #endif
 	{ }	/* terminate */
 };
-- 
2.27.0


  parent reply	other threads:[~2022-04-27  9:37 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20220427093241.108281-1-yongmeixie@hotmail.com>
2022-04-27  9:32 ` [PATCH 1/3] writeback: refine trace event balance_dirty_pages Xie Yongmei
2022-04-27  9:32 ` [PATCH 2/3] writeback: per memcg dirty flush Xie Yongmei
2022-04-27 10:35   ` Michal Hocko
2022-04-27  9:32 ` Xie Yongmei [this message]
2022-04-27 10:36   ` [PATCH 3/3] writeback: specify writeback period and expire interval per memcg Michal Hocko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=TYYP286MB1115331A1F4852D7CA3E86A2C5FA9@TYYP286MB1115.JPNP286.PROD.OUTLOOK.COM \
    --to=yongmeixie@hotmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=yongmeixie@hotmail.comc \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.