From mboxrd@z Thu Jan 1 00:00:00 1970 From: Greg Thelen Subject: [PATCH v9 03/13] memcg: add dirty page accounting infrastructure Date: Wed, 17 Aug 2011 09:14:55 -0700 Message-ID: <1313597705-6093-4-git-send-email-gthelen@google.com> References: <1313597705-6093-1-git-send-email-gthelen@google.com> Return-path: In-Reply-To: <1313597705-6093-1-git-send-email-gthelen@google.com> Sender: owner-linux-mm@kvack.org To: Andrew Morton Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org, containers@lists.osdl.org, linux-fsdevel@vger.kernel.org, KAMEZAWA Hiroyuki , Balbir Singh , Daisuke Nishimura , Minchan Kim , Johannes Weiner , Wu Fengguang , Dave Chinner , Vivek Goyal , Andrea Righi , Ciju Rajan K , David Rientjes , Greg Thelen List-Id: containers.vger.kernel.org Add memcg routines to count dirty, writeback, and unstable_NFS pages. These routines are not yet used by the kernel to count such pages. A later change adds kernel calls to these new routines. As inode pages are marked dirty, if the dirtied page's cgroup differs from the inode's cgroup, then mark the inode shared across several cgroup. Signed-off-by: Greg Thelen Signed-off-by: Andrea Righi --- Changelog since v8: - In v8 this patch was applied after 'memcg: add mem_cgroup_mark_inode_dirty()'. In this version (v9), this patch comes first. The result is that this patch does not contain code to mark inode with I_MEMCG_SHARED. That logic is deferred until the later 'memcg: add mem_cgroup_mark_inode_dirty()' patch. include/linux/memcontrol.h | 8 ++++- mm/memcontrol.c | 87 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 86 insertions(+), 9 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5633f51..e6af3a9 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -27,9 +27,15 @@ struct page_cgroup; struct page; struct mm_struct; -/* Stats that can be updated by kernel. */ +/* + * Per mem_cgroup page counts tracked by kernel. As pages enter and leave these + * states, the kernel notifies memcg using mem_cgroup_{inc,dec}_page_stat(). + */ enum mem_cgroup_page_stat_item { MEMCG_NR_FILE_MAPPED, /* # of pages charged as file rss */ + MEMCG_NR_FILE_DIRTY, /* # of dirty pages in page cache */ + MEMCG_NR_FILE_WRITEBACK, /* # of pages under writeback */ + MEMCG_NR_FILE_UNSTABLE_NFS, /* # of NFS unstable pages */ }; extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c6faa32..723b8bf 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -84,8 +84,11 @@ enum mem_cgroup_stat_index { */ MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */ MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */ - MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */ MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */ + MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */ + MEM_CGROUP_STAT_FILE_DIRTY, /* # of dirty pages in page cache */ + MEM_CGROUP_STAT_FILE_WRITEBACK, /* # of pages under writeback */ + MEM_CGROUP_STAT_FILE_UNSTABLE_NFS, /* # of NFS unstable pages */ MEM_CGROUP_STAT_DATA, /* end of data requires synchronization */ MEM_CGROUP_ON_MOVE, /* someone is moving account between groups */ MEM_CGROUP_STAT_NSTATS, @@ -2066,6 +2069,44 @@ void mem_cgroup_update_page_stat(struct page *page, ClearPageCgroupFileMapped(pc); idx = MEM_CGROUP_STAT_FILE_MAPPED; break; + + case MEMCG_NR_FILE_DIRTY: + /* Use Test{Set,Clear} to only un/charge the memcg once. */ + if (val > 0) { + if (TestSetPageCgroupFileDirty(pc)) + val = 0; + } else { + if (!TestClearPageCgroupFileDirty(pc)) + val = 0; + } + idx = MEM_CGROUP_STAT_FILE_DIRTY; + break; + + case MEMCG_NR_FILE_WRITEBACK: + /* + * This counter is adjusted while holding the mapping's + * tree_lock. Therefore there is no race between settings and + * clearing of this flag. + */ + if (val > 0) + SetPageCgroupFileWriteback(pc); + else + ClearPageCgroupFileWriteback(pc); + idx = MEM_CGROUP_STAT_FILE_WRITEBACK; + break; + + case MEMCG_NR_FILE_UNSTABLE_NFS: + /* Use Test{Set,Clear} to only un/charge the memcg once. */ + if (val > 0) { + if (TestSetPageCgroupFileUnstableNFS(pc)) + val = 0; + } else { + if (!TestClearPageCgroupFileUnstableNFS(pc)) + val = 0; + } + idx = MEM_CGROUP_STAT_FILE_UNSTABLE_NFS; + break; + default: BUG(); } @@ -2663,6 +2704,17 @@ void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail) } #endif +static inline +void mem_cgroup_move_account_page_stat(struct mem_cgroup *from, + struct mem_cgroup *to, + enum mem_cgroup_stat_index idx) +{ + preempt_disable(); + __this_cpu_dec(from->stat->count[idx]); + __this_cpu_inc(to->stat->count[idx]); + preempt_enable(); +} + /** * mem_cgroup_move_account - move account of the page * @page: the page @@ -2711,13 +2763,18 @@ static int mem_cgroup_move_account(struct page *page, move_lock_page_cgroup(pc, &flags); - if (PageCgroupFileMapped(pc)) { - /* Update mapped_file data for mem_cgroup */ - preempt_disable(); - __this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); - __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); - preempt_enable(); - } + if (PageCgroupFileMapped(pc)) + mem_cgroup_move_account_page_stat(from, to, + MEM_CGROUP_STAT_FILE_MAPPED); + if (PageCgroupFileDirty(pc)) + mem_cgroup_move_account_page_stat(from, to, + MEM_CGROUP_STAT_FILE_DIRTY); + if (PageCgroupFileWriteback(pc)) + mem_cgroup_move_account_page_stat(from, to, + MEM_CGROUP_STAT_FILE_WRITEBACK); + if (PageCgroupFileUnstableNFS(pc)) + mem_cgroup_move_account_page_stat(from, to, + MEM_CGROUP_STAT_FILE_UNSTABLE_NFS); mem_cgroup_charge_statistics(from, PageCgroupCache(pc), -nr_pages); if (uncharge) /* This is not "cancel", but cancel_charge does all we need. */ @@ -4147,6 +4204,9 @@ enum { MCS_SWAP, MCS_PGFAULT, MCS_PGMAJFAULT, + MCS_FILE_DIRTY, + MCS_WRITEBACK, + MCS_UNSTABLE_NFS, MCS_INACTIVE_ANON, MCS_ACTIVE_ANON, MCS_INACTIVE_FILE, @@ -4171,6 +4231,9 @@ struct { {"swap", "total_swap"}, {"pgfault", "total_pgfault"}, {"pgmajfault", "total_pgmajfault"}, + {"dirty", "total_dirty"}, + {"writeback", "total_writeback"}, + {"nfs_unstable", "total_nfs_unstable"}, {"inactive_anon", "total_inactive_anon"}, {"active_anon", "total_active_anon"}, {"inactive_file", "total_inactive_file"}, @@ -4204,6 +4267,14 @@ mem_cgroup_get_local_stat(struct mem_cgroup *mem, struct mcs_total_stat *s) val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGMAJFAULT); s->stat[MCS_PGMAJFAULT] += val; + /* dirty stat */ + val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_FILE_DIRTY); + s->stat[MCS_FILE_DIRTY] += val * PAGE_SIZE; + val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_FILE_WRITEBACK); + s->stat[MCS_WRITEBACK] += val * PAGE_SIZE; + val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_FILE_UNSTABLE_NFS); + s->stat[MCS_UNSTABLE_NFS] += val * PAGE_SIZE; + /* per zone stat */ val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_INACTIVE_ANON)); s->stat[MCS_INACTIVE_ANON] += val * PAGE_SIZE; -- 1.7.3.1 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/ Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754217Ab1HQQSZ (ORCPT ); Wed, 17 Aug 2011 12:18:25 -0400 Received: from smtp-out.google.com ([216.239.44.51]:41245 "EHLO smtp-out.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753946Ab1HQQSV (ORCPT ); Wed, 17 Aug 2011 12:18:21 -0400 DomainKey-Signature: a=rsa-sha1; s=beta; d=google.com; c=nofws; q=dns; h=from:to:cc:subject:date:message-id:x-mailer:in-reply-to:references; b=RldL7BHkWN6QUr5hs61ZNRLfhcHDeEgnz5ApuXq3lFefV7+6ULex8XmAESZk3BQe/ JF2GZ3sH0OaO5kOGfkuNg== From: Greg Thelen To: Andrew Morton Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org, containers@lists.osdl.org, linux-fsdevel@vger.kernel.org, KAMEZAWA Hiroyuki , Balbir Singh , Daisuke Nishimura , Minchan Kim , Johannes Weiner , Wu Fengguang , Dave Chinner , Vivek Goyal , Andrea Righi , Ciju Rajan K , David Rientjes , Greg Thelen Subject: [PATCH v9 03/13] memcg: add dirty page accounting infrastructure Date: Wed, 17 Aug 2011 09:14:55 -0700 Message-Id: <1313597705-6093-4-git-send-email-gthelen@google.com> X-Mailer: git-send-email 1.7.3.1 In-Reply-To: <1313597705-6093-1-git-send-email-gthelen@google.com> References: <1313597705-6093-1-git-send-email-gthelen@google.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Add memcg routines to count dirty, writeback, and unstable_NFS pages. These routines are not yet used by the kernel to count such pages. A later change adds kernel calls to these new routines. As inode pages are marked dirty, if the dirtied page's cgroup differs from the inode's cgroup, then mark the inode shared across several cgroup. Signed-off-by: Greg Thelen Signed-off-by: Andrea Righi --- Changelog since v8: - In v8 this patch was applied after 'memcg: add mem_cgroup_mark_inode_dirty()'. In this version (v9), this patch comes first. The result is that this patch does not contain code to mark inode with I_MEMCG_SHARED. That logic is deferred until the later 'memcg: add mem_cgroup_mark_inode_dirty()' patch. include/linux/memcontrol.h | 8 ++++- mm/memcontrol.c | 87 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 86 insertions(+), 9 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5633f51..e6af3a9 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -27,9 +27,15 @@ struct page_cgroup; struct page; struct mm_struct; -/* Stats that can be updated by kernel. */ +/* + * Per mem_cgroup page counts tracked by kernel. As pages enter and leave these + * states, the kernel notifies memcg using mem_cgroup_{inc,dec}_page_stat(). + */ enum mem_cgroup_page_stat_item { MEMCG_NR_FILE_MAPPED, /* # of pages charged as file rss */ + MEMCG_NR_FILE_DIRTY, /* # of dirty pages in page cache */ + MEMCG_NR_FILE_WRITEBACK, /* # of pages under writeback */ + MEMCG_NR_FILE_UNSTABLE_NFS, /* # of NFS unstable pages */ }; extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c6faa32..723b8bf 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -84,8 +84,11 @@ enum mem_cgroup_stat_index { */ MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */ MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */ - MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */ MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */ + MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */ + MEM_CGROUP_STAT_FILE_DIRTY, /* # of dirty pages in page cache */ + MEM_CGROUP_STAT_FILE_WRITEBACK, /* # of pages under writeback */ + MEM_CGROUP_STAT_FILE_UNSTABLE_NFS, /* # of NFS unstable pages */ MEM_CGROUP_STAT_DATA, /* end of data requires synchronization */ MEM_CGROUP_ON_MOVE, /* someone is moving account between groups */ MEM_CGROUP_STAT_NSTATS, @@ -2066,6 +2069,44 @@ void mem_cgroup_update_page_stat(struct page *page, ClearPageCgroupFileMapped(pc); idx = MEM_CGROUP_STAT_FILE_MAPPED; break; + + case MEMCG_NR_FILE_DIRTY: + /* Use Test{Set,Clear} to only un/charge the memcg once. */ + if (val > 0) { + if (TestSetPageCgroupFileDirty(pc)) + val = 0; + } else { + if (!TestClearPageCgroupFileDirty(pc)) + val = 0; + } + idx = MEM_CGROUP_STAT_FILE_DIRTY; + break; + + case MEMCG_NR_FILE_WRITEBACK: + /* + * This counter is adjusted while holding the mapping's + * tree_lock. Therefore there is no race between settings and + * clearing of this flag. + */ + if (val > 0) + SetPageCgroupFileWriteback(pc); + else + ClearPageCgroupFileWriteback(pc); + idx = MEM_CGROUP_STAT_FILE_WRITEBACK; + break; + + case MEMCG_NR_FILE_UNSTABLE_NFS: + /* Use Test{Set,Clear} to only un/charge the memcg once. */ + if (val > 0) { + if (TestSetPageCgroupFileUnstableNFS(pc)) + val = 0; + } else { + if (!TestClearPageCgroupFileUnstableNFS(pc)) + val = 0; + } + idx = MEM_CGROUP_STAT_FILE_UNSTABLE_NFS; + break; + default: BUG(); } @@ -2663,6 +2704,17 @@ void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail) } #endif +static inline +void mem_cgroup_move_account_page_stat(struct mem_cgroup *from, + struct mem_cgroup *to, + enum mem_cgroup_stat_index idx) +{ + preempt_disable(); + __this_cpu_dec(from->stat->count[idx]); + __this_cpu_inc(to->stat->count[idx]); + preempt_enable(); +} + /** * mem_cgroup_move_account - move account of the page * @page: the page @@ -2711,13 +2763,18 @@ static int mem_cgroup_move_account(struct page *page, move_lock_page_cgroup(pc, &flags); - if (PageCgroupFileMapped(pc)) { - /* Update mapped_file data for mem_cgroup */ - preempt_disable(); - __this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); - __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); - preempt_enable(); - } + if (PageCgroupFileMapped(pc)) + mem_cgroup_move_account_page_stat(from, to, + MEM_CGROUP_STAT_FILE_MAPPED); + if (PageCgroupFileDirty(pc)) + mem_cgroup_move_account_page_stat(from, to, + MEM_CGROUP_STAT_FILE_DIRTY); + if (PageCgroupFileWriteback(pc)) + mem_cgroup_move_account_page_stat(from, to, + MEM_CGROUP_STAT_FILE_WRITEBACK); + if (PageCgroupFileUnstableNFS(pc)) + mem_cgroup_move_account_page_stat(from, to, + MEM_CGROUP_STAT_FILE_UNSTABLE_NFS); mem_cgroup_charge_statistics(from, PageCgroupCache(pc), -nr_pages); if (uncharge) /* This is not "cancel", but cancel_charge does all we need. */ @@ -4147,6 +4204,9 @@ enum { MCS_SWAP, MCS_PGFAULT, MCS_PGMAJFAULT, + MCS_FILE_DIRTY, + MCS_WRITEBACK, + MCS_UNSTABLE_NFS, MCS_INACTIVE_ANON, MCS_ACTIVE_ANON, MCS_INACTIVE_FILE, @@ -4171,6 +4231,9 @@ struct { {"swap", "total_swap"}, {"pgfault", "total_pgfault"}, {"pgmajfault", "total_pgmajfault"}, + {"dirty", "total_dirty"}, + {"writeback", "total_writeback"}, + {"nfs_unstable", "total_nfs_unstable"}, {"inactive_anon", "total_inactive_anon"}, {"active_anon", "total_active_anon"}, {"inactive_file", "total_inactive_file"}, @@ -4204,6 +4267,14 @@ mem_cgroup_get_local_stat(struct mem_cgroup *mem, struct mcs_total_stat *s) val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGMAJFAULT); s->stat[MCS_PGMAJFAULT] += val; + /* dirty stat */ + val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_FILE_DIRTY); + s->stat[MCS_FILE_DIRTY] += val * PAGE_SIZE; + val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_FILE_WRITEBACK); + s->stat[MCS_WRITEBACK] += val * PAGE_SIZE; + val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_FILE_UNSTABLE_NFS); + s->stat[MCS_UNSTABLE_NFS] += val * PAGE_SIZE; + /* per zone stat */ val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_INACTIVE_ANON)); s->stat[MCS_INACTIVE_ANON] += val * PAGE_SIZE; -- 1.7.3.1