Linux-Fsdevel Archive on lore.kernel.org
 help / color / Atom feed
From: Tejun Heo <tj@kernel.org>
To: axboe@kernel.dk
Cc: linux-kernel@vger.kernel.org, jack@suse.cz, hch@infradead.org,
	hannes@cmpxchg.org, linux-fsdevel@vger.kernel.org,
	vgoyal@redhat.com, lizefan@huawei.com, cgroups@vger.kernel.org,
	linux-mm@kvack.org, mhocko@suse.cz, clm@fb.com,
	fengguang.wu@intel.com, david@fromorbit.com, gthelen@google.com,
	Tejun Heo <tj@kernel.org>
Subject: [PATCH 05/19] writeback: move global_dirty_limit into wb_domain
Date: Mon,  6 Apr 2015 16:04:20 -0400
Message-ID: <1428350674-8303-6-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1428350674-8303-1-git-send-email-tj@kernel.org>

This patch is a part of the series to define wb_domain which
represents a domain that wb's (bdi_writeback's) belong to and are
measured against each other in.  This will enable IO backpressure
propagation for cgroup writeback.

global_dirty_limit exists to regulate the global dirty threshold which
is a property of the wb_domain.  This patch moves hard_dirty_limit,
dirty_lock, and update_time into wb_domain.

This is pure reorganization and doesn't introduce any behavioral
changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Greg Thelen <gthelen@google.com>
---
 fs/fs-writeback.c                |  2 +-
 include/linux/writeback.h        | 17 ++++++++++++++-
 include/trace/events/writeback.h |  7 +++---
 mm/page-writeback.c              | 46 ++++++++++++++++++++--------------------
 4 files changed, 44 insertions(+), 28 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 85daae2..5b842bd 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -868,7 +868,7 @@ static long writeback_chunk_size(struct bdi_writeback *wb,
 		pages = LONG_MAX;
 	else {
 		pages = min(wb->avg_write_bandwidth / 2,
-			    global_dirty_limit / DIRTY_SCOPE);
+			    global_wb_domain.dirty_limit / DIRTY_SCOPE);
 		pages = min(pages, work->nr_pages);
 		pages = round_down(pages + MIN_WRITEBACK_PAGES,
 				   MIN_WRITEBACK_PAGES);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 4972dcf..fe0924a 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -95,6 +95,8 @@ struct writeback_control {
  * dirtyable memory accordingly.
  */
 struct wb_domain {
+	spinlock_t lock;
+
 	/*
 	 * Scale the writeback cache size proportional to the relative
 	 * writeout speed.
@@ -115,6 +117,19 @@ struct wb_domain {
 	struct fprop_global completions;
 	struct timer_list period_timer;	/* timer for aging of completions */
 	unsigned long period_time;
+
+	/*
+	 * The dirtyable memory and dirty threshold could be suddenly
+	 * knocked down by a large amount (eg. on the startup of KVM in a
+	 * swapless system). This may throw the system into deep dirty
+	 * exceeded state and throttle heavy/light dirtiers alike. To
+	 * retain good responsiveness, maintain global_dirty_limit for
+	 * tracking slowly down to the knocked down dirty threshold.
+	 *
+	 * Both fields are protected by ->lock.
+	 */
+	unsigned long dirty_limit_tstamp;
+	unsigned long dirty_limit;
 };
 
 /*
@@ -153,7 +168,7 @@ void throttle_vm_writeout(gfp_t gfp_mask);
 bool zone_dirty_ok(struct zone *zone);
 int wb_domain_init(struct wb_domain *dom, gfp_t gfp);
 
-extern unsigned long global_dirty_limit;
+extern struct wb_domain global_wb_domain;
 
 /* These are exported to sysctl. */
 extern int dirty_background_ratio;
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 5c9a68c..d5ac3dd 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -344,7 +344,7 @@ TRACE_EVENT(global_dirty_state,
 		__entry->nr_written	= global_page_state(NR_WRITTEN);
 		__entry->background_thresh = background_thresh;
 		__entry->dirty_thresh	= dirty_thresh;
-		__entry->dirty_limit = global_dirty_limit;
+		__entry->dirty_limit	= global_wb_domain.dirty_limit;
 	),
 
 	TP_printk("dirty=%lu writeback=%lu unstable=%lu "
@@ -446,8 +446,9 @@ TRACE_EVENT(balance_dirty_pages,
 		unsigned long freerun = (thresh + bg_thresh) / 2;
 		strlcpy(__entry->bdi, dev_name(bdi->dev), 32);
 
-		__entry->limit		= global_dirty_limit;
-		__entry->setpoint	= (global_dirty_limit + freerun) / 2;
+		__entry->limit		= global_wb_domain.dirty_limit;
+		__entry->setpoint	= (global_wb_domain.dirty_limit +
+						freerun) / 2;
 		__entry->dirty		= dirty;
 		__entry->bdi_setpoint	= __entry->setpoint *
 						bdi_thresh / (thresh + 1);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 43380dc..c141533 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -122,9 +122,7 @@ EXPORT_SYMBOL(laptop_mode);
 
 /* End of sysctl-exported parameters */
 
-unsigned long global_dirty_limit;
-
-static struct wb_domain global_wb_domain;
+struct wb_domain global_wb_domain;
 
 /*
  * Length of period for aging writeout fractions of bdis. This is an
@@ -470,9 +468,15 @@ static void writeout_period(unsigned long t)
 int wb_domain_init(struct wb_domain *dom, gfp_t gfp)
 {
 	memset(dom, 0, sizeof(*dom));
+
+	spin_lock_init(&dom->lock);
+
 	init_timer_deferrable(&dom->period_timer);
 	dom->period_timer.function = writeout_period;
 	dom->period_timer.data = (unsigned long)dom;
+
+	dom->dirty_limit_tstamp = jiffies;
+
 	return fprop_global_init(&dom->completions, gfp);
 }
 
@@ -532,7 +536,9 @@ static unsigned long dirty_freerun_ceiling(unsigned long thresh,
 
 static unsigned long hard_dirty_limit(unsigned long thresh)
 {
-	return max(thresh, global_dirty_limit);
+	struct wb_domain *dom = &global_wb_domain;
+
+	return max(thresh, dom->dirty_limit);
 }
 
 /**
@@ -916,17 +922,10 @@ out:
 	wb->avg_write_bandwidth = avg;
 }
 
-/*
- * The global dirtyable memory and dirty threshold could be suddenly knocked
- * down by a large amount (eg. on the startup of KVM in a swapless system).
- * This may throw the system into deep dirty exceeded state and throttle
- * heavy/light dirtiers alike. To retain good responsiveness, maintain
- * global_dirty_limit for tracking slowly down to the knocked down dirty
- * threshold.
- */
 static void update_dirty_limit(unsigned long thresh, unsigned long dirty)
 {
-	unsigned long limit = global_dirty_limit;
+	struct wb_domain *dom = &global_wb_domain;
+	unsigned long limit = dom->dirty_limit;
 
 	/*
 	 * Follow up in one step.
@@ -939,7 +938,7 @@ static void update_dirty_limit(unsigned long thresh, unsigned long dirty)
 	/*
 	 * Follow down slowly. Use the higher one as the target, because thresh
 	 * may drop below dirty. This is exactly the reason to introduce
-	 * global_dirty_limit which is guaranteed to lie above the dirty pages.
+	 * dom->dirty_limit which is guaranteed to lie above the dirty pages.
 	 */
 	thresh = max(thresh, dirty);
 	if (limit > thresh) {
@@ -948,28 +947,27 @@ static void update_dirty_limit(unsigned long thresh, unsigned long dirty)
 	}
 	return;
 update:
-	global_dirty_limit = limit;
+	dom->dirty_limit = limit;
 }
 
 static void global_update_bandwidth(unsigned long thresh,
 				    unsigned long dirty,
 				    unsigned long now)
 {
-	static DEFINE_SPINLOCK(dirty_lock);
-	static unsigned long update_time = INITIAL_JIFFIES;
+	struct wb_domain *dom = &global_wb_domain;
 
 	/*
 	 * check locklessly first to optimize away locking for the most time
 	 */
-	if (time_before(now, update_time + BANDWIDTH_INTERVAL))
+	if (time_before(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL))
 		return;
 
-	spin_lock(&dirty_lock);
-	if (time_after_eq(now, update_time + BANDWIDTH_INTERVAL)) {
+	spin_lock(&dom->lock);
+	if (time_after_eq(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL)) {
 		update_dirty_limit(thresh, dirty);
-		update_time = now;
+		dom->dirty_limit_tstamp = now;
 	}
-	spin_unlock(&dirty_lock);
+	spin_unlock(&dom->lock);
 }
 
 /*
@@ -1761,10 +1759,12 @@ void laptop_sync_completion(void)
 
 void writeback_set_ratelimit(void)
 {
+	struct wb_domain *dom = &global_wb_domain;
 	unsigned long background_thresh;
 	unsigned long dirty_thresh;
+
 	global_dirty_limits(&background_thresh, &dirty_thresh);
-	global_dirty_limit = dirty_thresh;
+	dom->dirty_limit = dirty_thresh;
 	ratelimit_pages = dirty_thresh / (num_online_cpus() * 32);
 	if (ratelimit_pages < 16)
 		ratelimit_pages = 16;
-- 
2.1.0

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply index

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-04-06 20:04 [PATCHSET 2/3 v2 block/for-4.1/core] writeback: cgroup writeback backpressure propagation Tejun Heo
2015-04-06 20:04 ` [PATCH 01/19] memcg: make mem_cgroup_read_{stat|event}() iterate possible cpus instead of online Tejun Heo
2015-04-06 20:04 ` [PATCH 02/19] writeback: clean up wb_dirty_limit() Tejun Heo
2015-04-06 20:04 ` [PATCH 03/19] writeback: reorganize [__]wb_update_bandwidth() Tejun Heo
2015-04-06 20:04 ` [PATCH 04/19] writeback: implement wb_domain Tejun Heo
2015-04-06 20:04 ` Tejun Heo [this message]
2015-04-06 20:04 ` [PATCH 06/19] writeback: consolidate dirty throttle parameters into dirty_throttle_control Tejun Heo
2015-04-06 20:04 ` [PATCH 07/19] writeback: add dirty_throttle_control->wb_bg_thresh Tejun Heo
2015-04-06 20:04 ` [PATCH 08/19] writeback: make __wb_calc_thresh() take dirty_throttle_control Tejun Heo
2015-04-06 20:04 ` [PATCH 09/19] writeback: add dirty_throttle_control->pos_ratio Tejun Heo
2015-04-06 20:04 ` [PATCH 10/19] writeback: add dirty_throttle_control->wb_completions Tejun Heo
2015-04-06 20:04 ` [PATCH 11/19] writeback: add dirty_throttle_control->dom Tejun Heo
2015-04-06 20:04 ` [PATCH 12/19] writeback: make __wb_writeout_inc() and hard_dirty_limit() take wb_domaas a parameter Tejun Heo
2015-04-06 20:04 ` [PATCH 13/19] writeback: separate out domain_dirty_limits() Tejun Heo
2015-04-06 20:04 ` [PATCH 14/19] writeback: move over_bground_thresh() to mm/page-writeback.c Tejun Heo
2015-04-06 20:04 ` [PATCH 15/19] writeback: update wb_over_bg_thresh() to use wb_domain aware operations Tejun Heo
2015-04-06 20:04 ` [PATCH 16/19] writeback: implement memcg wb_domain Tejun Heo
2015-04-06 20:04 ` [PATCH 17/19] writeback: reset wb_domain->dirty_limit[_tstmp] when memcg domain size changes Tejun Heo
2015-04-06 20:04 ` [PATCH 18/19] writeback: implement memcg writeback domain based throttling Tejun Heo
2015-04-06 20:04 ` [PATCH 19/19] mm: vmscan: disable memcg direct reclaim stalling if cgroup writeback support is in use Tejun Heo
2015-04-06 20:07 ` [PATCHSET 2/3 v2 block/for-4.1/core] writeback: cgroup writeback backpressure propagation Tejun Heo
2015-05-22 22:23 [PATCHSET 2/3 v3 block/for-4.2/core] " Tejun Heo
2015-05-22 22:23 ` [PATCH 05/19] writeback: move global_dirty_limit into wb_domain Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1428350674-8303-6-git-send-email-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=axboe@kernel.dk \
    --cc=cgroups@vger.kernel.org \
    --cc=clm@fb.com \
    --cc=david@fromorbit.com \
    --cc=fengguang.wu@intel.com \
    --cc=gthelen@google.com \
    --cc=hannes@cmpxchg.org \
    --cc=hch@infradead.org \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lizefan@huawei.com \
    --cc=mhocko@suse.cz \
    --cc=vgoyal@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-Fsdevel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-fsdevel/0 linux-fsdevel/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-fsdevel linux-fsdevel/ https://lore.kernel.org/linux-fsdevel \
		linux-fsdevel@vger.kernel.org
	public-inbox-index linux-fsdevel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-fsdevel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git