linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
To: axboe-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org
Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	jack-AlSwsSmVLrQ@public.gmane.org,
	hch-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org,
	hannes-druUgvl0LCNAfugRpC6u6w@public.gmane.org,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	vgoyal-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
	lizefan-hv44wF8Li93QT0dZR+AlfA@public.gmane.org,
	cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-mm-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org,
	mhocko-AlSwsSmVLrQ@public.gmane.org,
	clm-b10kYP2dOMg@public.gmane.org,
	fengguang.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org,
	david-FqsqvQoI3Ljby3iVrkZq2A@public.gmane.org,
	gthelen-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org,
	Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Subject: [PATCH 15/18] writeback: implement memcg wb_domain
Date: Mon, 23 Mar 2015 01:07:44 -0400	[thread overview]
Message-ID: <1427087267-16592-16-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1427087267-16592-1-git-send-email-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>

Dirtyable memory is distributed to a wb (bdi_writeback) according to
the relative bandwidth the wb is writing out in the whole system.
This distribution is global - each wb is measured against all other
wb's and gets the proportinately sized portion of the memory in the
whole system.

For cgroup writeback, the amount of dirtyable memory is scoped by
memcg and thus each wb would need to be measured and controlled in its
memcg.  IOW, a wb will belong to two writeback domains - the global
and memcg domains.

The previous patches laid the groundwork to support the two wb_domains
and this patch implements memcg wb_domain.  memcg->cgwb_domain is
initialized on css online and destroyed on css release,
wb->memcg_completions is added, and __wb_writeout_inc() is updated to
increment completions against both global and memcg wb_domains.

The following patches will update balance_dirty_pages() and its
subroutines to actually consider memcg wb_domain for throttling.

Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Cc: Jens Axboe <axboe-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org>
Cc: Jan Kara <jack-AlSwsSmVLrQ@public.gmane.org>
Cc: Wu Fengguang <fengguang.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Cc: Greg Thelen <gthelen-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
---
 include/linux/backing-dev-defs.h |  1 +
 include/linux/memcontrol.h       | 12 +++++++++++-
 include/linux/writeback.h        |  3 +++
 mm/backing-dev.c                 |  9 ++++++++-
 mm/memcontrol.c                  | 39 +++++++++++++++++++++++++++++++++++++++
 mm/page-writeback.c              | 25 +++++++++++++++++++++++++
 6 files changed, 87 insertions(+), 2 deletions(-)

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 97a92fa..8d470b7 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -118,6 +118,7 @@ struct bdi_writeback {
 
 #ifdef CONFIG_CGROUP_WRITEBACK
 	struct percpu_ref refcnt;	/* used only for !root wb's */
+	struct fprop_local_percpu memcg_completions;
 	struct cgroup_subsys_state *memcg_css; /* the associated memcg */
 	struct cgroup_subsys_state *blkcg_css; /* and blkcg */
 	struct list_head memcg_node;	/* anchored at memcg->cgwb_list */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 662a953..e3177be 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -389,8 +389,18 @@ enum {
 };
 
 #ifdef CONFIG_CGROUP_WRITEBACK
+
 struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg);
-#endif
+struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb);
+
+#else	/* CONFIG_CGROUP_WRITEBACK */
+
+static inline struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
+{
+	return NULL;
+}
+
+#endif	/* CONFIG_CGROUP_WRITEBACK */
 
 struct sock;
 #if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index fa6c3b4..e421625 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -167,6 +167,9 @@ static inline void laptop_sync_completion(void) { }
 void throttle_vm_writeout(gfp_t gfp_mask);
 bool zone_dirty_ok(struct zone *zone);
 int wb_domain_init(struct wb_domain *dom, gfp_t gfp);
+#ifdef CONFIG_CGROUP_WRITEBACK
+void wb_domain_exit(struct wb_domain *dom);
+#endif
 
 extern struct wb_domain global_wb_domain;
 
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 331e4d7..8828edf 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -483,6 +483,7 @@ static void cgwb_release_workfn(struct work_struct *work)
 	css_put(wb->blkcg_css);
 	wb_congested_put(wb->congested);
 
+	fprop_local_destroy_percpu(&wb->memcg_completions);
 	percpu_ref_exit(&wb->refcnt);
 	wb_exit(wb);
 	kfree_rcu(wb, rcu);
@@ -549,9 +550,13 @@ static int cgwb_create(struct backing_dev_info *bdi,
 	if (ret)
 		goto err_wb_exit;
 
+	ret = fprop_local_init_percpu(&wb->memcg_completions, gfp);
+	if (ret)
+		goto err_ref_exit;
+
 	wb->congested = wb_congested_get_create(bdi, blkcg_css->id, gfp);
 	if (!wb->congested)
-		goto err_ref_exit;
+		goto err_fprop_exit;
 
 	wb->memcg_css = memcg_css;
 	wb->blkcg_css = blkcg_css;
@@ -588,6 +593,8 @@ static int cgwb_create(struct backing_dev_info *bdi,
 
 err_put_congested:
 	wb_congested_put(wb->congested);
+err_fprop_exit:
+	fprop_local_destroy_percpu(&wb->memcg_completions);
 err_ref_exit:
 	percpu_ref_exit(&wb->refcnt);
 err_wb_exit:
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ab483e9..2a74cf3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -344,6 +344,7 @@ struct mem_cgroup {
 
 #ifdef CONFIG_CGROUP_WRITEBACK
 	struct list_head cgwb_list;
+	struct wb_domain cgwb_domain;
 #endif
 
 	/* List of events which userspace want to receive */
@@ -4085,6 +4086,37 @@ struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg)
 	return &memcg->cgwb_list;
 }
 
+static int memcg_wb_domain_init(struct mem_cgroup *memcg, gfp_t gfp)
+{
+	return wb_domain_init(&memcg->cgwb_domain, gfp);
+}
+
+static void memcg_wb_domain_exit(struct mem_cgroup *memcg)
+{
+	wb_domain_exit(&memcg->cgwb_domain);
+}
+
+struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
+
+	if (!memcg->css.parent)
+		return NULL;
+
+	return &memcg->cgwb_domain;
+}
+
+#else	/* CONFIG_CGROUP_WRITEBACK */
+
+static int memcg_wb_domain_init(struct mem_cgroup *memcg, gfp_t gfp)
+{
+	return 0;
+}
+
+static void memcg_wb_domain_exit(struct mem_cgroup *memcg)
+{
+}
+
 #endif	/* CONFIG_CGROUP_WRITEBACK */
 
 /*
@@ -4471,9 +4503,15 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
 	memcg->stat = alloc_percpu(struct mem_cgroup_stat_cpu);
 	if (!memcg->stat)
 		goto out_free;
+
+	if (memcg_wb_domain_init(memcg, GFP_KERNEL))
+		goto out_free_stat;
+
 	spin_lock_init(&memcg->pcp_counter_lock);
 	return memcg;
 
+out_free_stat:
+	free_percpu(memcg->stat);
 out_free:
 	kfree(memcg);
 	return NULL;
@@ -4500,6 +4538,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
 		free_mem_cgroup_per_zone_info(memcg, node);
 
 	free_percpu(memcg->stat);
+	memcg_wb_domain_exit(memcg);
 	kfree(memcg);
 }
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 2626e6c..9ccd37e 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -171,6 +171,11 @@ static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *m
 	return mdtc->gdtc;
 }
 
+static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
+{
+	return &wb->memcg_completions;
+}
+
 static void wb_min_max_ratio(struct bdi_writeback *wb,
 			     unsigned long *minp, unsigned long *maxp)
 {
@@ -213,6 +218,11 @@ static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *m
 	return NULL;
 }
 
+static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
+{
+	return NULL;
+}
+
 static void wb_min_max_ratio(struct bdi_writeback *wb,
 			     unsigned long *minp, unsigned long *maxp)
 {
@@ -530,9 +540,16 @@ static void wb_domain_writeout_inc(struct wb_domain *dom,
  */
 static inline void __wb_writeout_inc(struct bdi_writeback *wb)
 {
+	struct wb_domain *cgdom;
+
 	__inc_wb_stat(wb, WB_WRITTEN);
 	wb_domain_writeout_inc(&global_wb_domain, &wb->completions,
 			       wb->bdi->max_prop_frac);
+
+	cgdom = mem_cgroup_wb_domain(wb);
+	if (cgdom)
+		wb_domain_writeout_inc(cgdom, wb_memcg_completions(wb),
+				       wb->bdi->max_prop_frac);
 }
 
 void wb_writeout_inc(struct bdi_writeback *wb)
@@ -583,6 +600,14 @@ int wb_domain_init(struct wb_domain *dom, gfp_t gfp)
 	return fprop_global_init(&dom->completions, gfp);
 }
 
+#ifdef CONFIG_CGROUP_WRITEBACK
+void wb_domain_exit(struct wb_domain *dom)
+{
+	del_timer_sync(&dom->period_timer);
+	fprop_global_destroy(&dom->completions);
+}
+#endif
+
 /*
  * bdi_min_ratio keeps the sum of the minimum dirty shares of all
  * registered backing devices, which, for obvious reasons, can not
-- 
2.1.0

  parent reply	other threads:[~2015-03-23  5:07 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-03-23  5:07 [PATCHSET 2/3 block/for-4.1/core] writeback: cgroup writeback backpressure propagation Tejun Heo
2015-03-23  5:07 ` [PATCH 01/18] memcg: make mem_cgroup_read_{stat|event}() iterate possible cpus instead of online Tejun Heo
2015-03-25 22:39   ` [PATCH 1.5/18] writeback: clean up wb_dirty_limit() Tejun Heo
2015-03-23  5:07 ` [PATCH 02/18] writeback: reorganize [__]wb_update_bandwidth() Tejun Heo
2015-03-23  5:07 ` [PATCH 03/18] writeback: implement wb_domain Tejun Heo
2015-03-23  5:07 ` [PATCH 04/18] writeback: move global_dirty_limit into wb_domain Tejun Heo
2015-03-23  5:07 ` [PATCH 05/18] writeback: consolidate dirty throttle parameters into dirty_throttle_control Tejun Heo
2015-03-23  5:07 ` [PATCH 06/18] writeback: add dirty_throttle_control->wb_bg_thresh Tejun Heo
2015-03-23  5:07 ` [PATCH 07/18] writeback: make __wb_dirty_limit() take dirty_throttle_control Tejun Heo
     [not found]   ` <1427087267-16592-8-git-send-email-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2015-03-25 22:42     ` [PATCH v2 07/18] writeback: make __wb_calc_thresh() " Tejun Heo
2015-03-23  5:07 ` [PATCH 08/18] writeback: add dirty_throttle_control->pos_ratio Tejun Heo
2015-03-23  5:07 ` [PATCH 09/18] writeback: add dirty_throttle_control->wb_completions Tejun Heo
2015-03-23  5:07 ` [PATCH 10/18] writeback: add dirty_throttle_control->dom Tejun Heo
2015-03-23  5:07 ` [PATCH 11/18] writeback: make __wb_writeout_inc() and hard_dirty_limit() take wb_domaas a parameter Tejun Heo
2015-03-23  5:07 ` [PATCH 12/18] writeback: separate out domain_dirty_limits() Tejun Heo
2015-03-23  5:07 ` [PATCH 13/18] writeback: move over_bground_thresh() to mm/page-writeback.c Tejun Heo
     [not found] ` <1427087267-16592-1-git-send-email-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2015-03-23  5:07   ` [PATCH 14/18] writeback: update wb_over_bg_thresh() to use wb_domain aware operations Tejun Heo
2015-03-23  5:07   ` Tejun Heo [this message]
2015-03-23  5:07   ` [PATCH 16/18] writeback: reset wb_domain->dirty_limit[_tstmp] when memcg domain size changes Tejun Heo
2015-03-23  5:07 ` [PATCH 17/18] writeback: implement memcg writeback domain based throttling Tejun Heo
2015-03-23  5:07 ` [PATCH 18/18] mm: vmscan: remove memcg stalling on writeback pages during direct reclaim Tejun Heo
2015-03-23  5:27   ` Tejun Heo
2015-03-25 22:26   ` [PATCH v2 18/18] mm: vmscan: disable memcg direct reclaim stalling if cgroup writeback support is in use Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1427087267-16592-16-git-send-email-tj@kernel.org \
    --to=tj-dgejt+ai2ygdnm+yrofe0a@public.gmane.org \
    --cc=axboe-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org \
    --cc=cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=clm-b10kYP2dOMg@public.gmane.org \
    --cc=david-FqsqvQoI3Ljby3iVrkZq2A@public.gmane.org \
    --cc=fengguang.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org \
    --cc=gthelen-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org \
    --cc=hannes-druUgvl0LCNAfugRpC6u6w@public.gmane.org \
    --cc=hch-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org \
    --cc=jack-AlSwsSmVLrQ@public.gmane.org \
    --cc=linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=linux-mm-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org \
    --cc=lizefan-hv44wF8Li93QT0dZR+AlfA@public.gmane.org \
    --cc=mhocko-AlSwsSmVLrQ@public.gmane.org \
    --cc=vgoyal-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).