From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
To: "Theodore Ts'o" <tytso@mit.edu>,
Andreas Dilger <adilger.kernel@dilger.ca>,
Jan Kara <jack@suse.com>,
Andrew Morton <akpm@linux-foundation.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>,
Hugh Dickins <hughd@google.com>,
Andrea Arcangeli <aarcange@redhat.com>,
Dave Hansen <dave.hansen@intel.com>,
Vlastimil Babka <vbabka@suse.cz>,
Matthew Wilcox <willy@infradead.org>,
Ross Zwisler <ross.zwisler@linux.intel.com>,
linux-ext4@vger.kernel.org, linux-fsdevel@vger.kernel.org,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
linux-block@vger.kernel.org,
"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Subject: [PATCHv6 23/37] mm: account huge pages to dirty, writaback, reclaimable, etc.
Date: Thu, 26 Jan 2017 14:58:05 +0300 [thread overview]
Message-ID: <20170126115819.58875-24-kirill.shutemov@linux.intel.com> (raw)
In-Reply-To: <20170126115819.58875-1-kirill.shutemov@linux.intel.com>
We need to account huge pages according to its size to get background
writaback work properly.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
fs/fs-writeback.c | 10 +++---
include/linux/backing-dev.h | 10 ++++++
include/linux/memcontrol.h | 22 ++-----------
mm/migrate.c | 1 +
mm/page-writeback.c | 80 +++++++++++++++++++++++++++++----------------
mm/rmap.c | 4 +--
6 files changed, 74 insertions(+), 53 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ef600591d96f..e1c9faddc9e1 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -366,8 +366,9 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
struct page *page = radix_tree_deref_slot_protected(slot,
&mapping->tree_lock);
if (likely(page) && PageDirty(page)) {
- __dec_wb_stat(old_wb, WB_RECLAIMABLE);
- __inc_wb_stat(new_wb, WB_RECLAIMABLE);
+ int nr = hpage_nr_pages(page);
+ __add_wb_stat(old_wb, WB_RECLAIMABLE, -nr);
+ __add_wb_stat(new_wb, WB_RECLAIMABLE, nr);
}
}
@@ -376,9 +377,10 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
struct page *page = radix_tree_deref_slot_protected(slot,
&mapping->tree_lock);
if (likely(page)) {
+ int nr = hpage_nr_pages(page);
WARN_ON_ONCE(!PageWriteback(page));
- __dec_wb_stat(old_wb, WB_WRITEBACK);
- __inc_wb_stat(new_wb, WB_WRITEBACK);
+ __add_wb_stat(old_wb, WB_WRITEBACK, -nr);
+ __add_wb_stat(new_wb, WB_WRITEBACK, nr);
}
}
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 43b93a947e61..e63487f78824 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -61,6 +61,16 @@ static inline void __add_wb_stat(struct bdi_writeback *wb,
__percpu_counter_add(&wb->stat[item], amount, WB_STAT_BATCH);
}
+static inline void add_wb_stat(struct bdi_writeback *wb,
+ enum wb_stat_item item, s64 amount)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __add_wb_stat(wb, item, amount);
+ local_irq_restore(flags);
+}
+
static inline void __inc_wb_stat(struct bdi_writeback *wb,
enum wb_stat_item item)
{
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 254698856b8f..7a341b01937f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -29,6 +29,7 @@
#include <linux/mmzone.h>
#include <linux/writeback.h>
#include <linux/page-flags.h>
+#include <linux/mm.h>
struct mem_cgroup;
struct page;
@@ -517,18 +518,6 @@ static inline void mem_cgroup_update_page_stat(struct page *page,
this_cpu_add(page->mem_cgroup->stat->count[idx], val);
}
-static inline void mem_cgroup_inc_page_stat(struct page *page,
- enum mem_cgroup_stat_index idx)
-{
- mem_cgroup_update_page_stat(page, idx, 1);
-}
-
-static inline void mem_cgroup_dec_page_stat(struct page *page,
- enum mem_cgroup_stat_index idx)
-{
- mem_cgroup_update_page_stat(page, idx, -1);
-}
-
unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
gfp_t gfp_mask,
unsigned long *total_scanned);
@@ -739,13 +728,8 @@ static inline bool mem_cgroup_oom_synchronize(bool wait)
return false;
}
-static inline void mem_cgroup_inc_page_stat(struct page *page,
- enum mem_cgroup_stat_index idx)
-{
-}
-
-static inline void mem_cgroup_dec_page_stat(struct page *page,
- enum mem_cgroup_stat_index idx)
+static inline void mem_cgroup_update_page_stat(struct page *page,
+ enum mem_cgroup_stat_index idx, int val)
{
}
diff --git a/mm/migrate.c b/mm/migrate.c
index 366466ed7fdc..20a9ce2fcc64 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -485,6 +485,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
* are mapped to swap space.
*/
if (newzone != oldzone) {
+ BUG_ON(PageTransHuge(page));
__dec_node_state(oldzone->zone_pgdat, NR_FILE_PAGES);
__inc_node_state(newzone->zone_pgdat, NR_FILE_PAGES);
if (PageSwapBacked(page) && !PageSwapCache(page)) {
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 47d5b12c460e..d7b905d66add 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2430,19 +2430,22 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
if (mapping_cap_account_dirty(mapping)) {
struct bdi_writeback *wb;
+ struct zone *zone = page_zone(page);
+ pg_data_t *pgdat = page_pgdat(page);
+ int nr = hpage_nr_pages(page);
inode_attach_wb(inode, page);
wb = inode_to_wb(inode);
- mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_DIRTY);
- __inc_node_page_state(page, NR_FILE_DIRTY);
- __inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
- __inc_node_page_state(page, NR_DIRTIED);
- __inc_wb_stat(wb, WB_RECLAIMABLE);
- __inc_wb_stat(wb, WB_DIRTIED);
- task_io_account_write(PAGE_SIZE);
- current->nr_dirtied++;
- this_cpu_inc(bdp_ratelimits);
+ mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_DIRTY, nr);
+ __mod_node_page_state(pgdat, NR_FILE_DIRTY, nr);
+ __mod_zone_page_state(zone, NR_ZONE_WRITE_PENDING, nr);
+ __mod_node_page_state(pgdat, NR_DIRTIED, nr);
+ __add_wb_stat(wb, WB_RECLAIMABLE, nr);
+ __add_wb_stat(wb, WB_DIRTIED, nr);
+ task_io_account_write(nr * PAGE_SIZE);
+ current->nr_dirtied += nr;
+ this_cpu_add(bdp_ratelimits, nr);
}
}
EXPORT_SYMBOL(account_page_dirtied);
@@ -2456,11 +2459,15 @@ void account_page_cleaned(struct page *page, struct address_space *mapping,
struct bdi_writeback *wb)
{
if (mapping_cap_account_dirty(mapping)) {
- mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY);
- dec_node_page_state(page, NR_FILE_DIRTY);
- dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
- dec_wb_stat(wb, WB_RECLAIMABLE);
- task_io_account_cancelled_write(PAGE_SIZE);
+ struct zone *zone = page_zone(page);
+ pg_data_t *pgdat = page_pgdat(page);
+ int nr = hpage_nr_pages(page);
+
+ mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_DIRTY, -nr);
+ mod_node_page_state(pgdat, NR_FILE_DIRTY, -nr);
+ mod_zone_page_state(zone, NR_ZONE_WRITE_PENDING, -nr);
+ add_wb_stat(wb, WB_RECLAIMABLE, -nr);
+ task_io_account_cancelled_write(PAGE_SIZE * nr);
}
}
@@ -2520,14 +2527,16 @@ void account_page_redirty(struct page *page)
struct address_space *mapping = page->mapping;
if (mapping && mapping_cap_account_dirty(mapping)) {
+ pg_data_t *pgdat = page_pgdat(page);
+ int nr = hpage_nr_pages(page);
struct inode *inode = mapping->host;
struct bdi_writeback *wb;
bool locked;
wb = unlocked_inode_to_wb_begin(inode, &locked);
- current->nr_dirtied--;
- dec_node_page_state(page, NR_DIRTIED);
- dec_wb_stat(wb, WB_DIRTIED);
+ current->nr_dirtied -= nr;
+ mod_node_page_state(pgdat, NR_DIRTIED, -nr);
+ add_wb_stat(wb, WB_DIRTIED, -nr);
unlocked_inode_to_wb_end(inode, locked);
}
}
@@ -2713,10 +2722,15 @@ int clear_page_dirty_for_io(struct page *page)
*/
wb = unlocked_inode_to_wb_begin(inode, &locked);
if (TestClearPageDirty(page)) {
- mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY);
- dec_node_page_state(page, NR_FILE_DIRTY);
- dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
- dec_wb_stat(wb, WB_RECLAIMABLE);
+ struct zone *zone = page_zone(page);
+ pg_data_t *pgdat = page_pgdat(page);
+ int nr = hpage_nr_pages(page);
+
+ mem_cgroup_update_page_stat(page,
+ MEM_CGROUP_STAT_DIRTY, -nr);
+ mod_node_page_state(pgdat, NR_FILE_DIRTY, -nr);
+ mod_zone_page_state(zone, NR_ZONE_WRITE_PENDING, -nr);
+ add_wb_stat(wb, WB_RECLAIMABLE, -nr);
ret = 1;
}
unlocked_inode_to_wb_end(inode, locked);
@@ -2760,10 +2774,15 @@ int test_clear_page_writeback(struct page *page)
ret = TestClearPageWriteback(page);
}
if (ret) {
- mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
- dec_node_page_state(page, NR_WRITEBACK);
- dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
- inc_node_page_state(page, NR_WRITTEN);
+ struct zone *zone = page_zone(page);
+ pg_data_t *pgdat = page_pgdat(page);
+ int nr = hpage_nr_pages(page);
+
+ mem_cgroup_update_page_stat(page,
+ MEM_CGROUP_STAT_WRITEBACK, -nr);
+ mod_node_page_state(pgdat, NR_WRITEBACK, -nr);
+ mod_zone_page_state(zone, NR_ZONE_WRITE_PENDING, -nr);
+ mod_node_page_state(pgdat, NR_WRITTEN, nr);
}
unlock_page_memcg(page);
return ret;
@@ -2815,9 +2834,14 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
ret = TestSetPageWriteback(page);
}
if (!ret) {
- mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
- inc_node_page_state(page, NR_WRITEBACK);
- inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
+ struct zone *zone = page_zone(page);
+ pg_data_t *pgdat = page_pgdat(page);
+ int nr = hpage_nr_pages(page);
+
+ mem_cgroup_update_page_stat(page,
+ MEM_CGROUP_STAT_WRITEBACK, nr);
+ mod_node_page_state(pgdat, NR_WRITEBACK, nr);
+ mod_zone_page_state(zone, NR_ZONE_WRITE_PENDING, nr);
}
unlock_page_memcg(page);
return ret;
diff --git a/mm/rmap.c b/mm/rmap.c
index d9daa54dc316..38f1682f8dfc 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1154,7 +1154,7 @@ void page_add_file_rmap(struct page *page, bool compound)
goto out;
}
__mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, nr);
- mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
+ mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, nr);
out:
unlock_page_memcg(page);
}
@@ -1196,7 +1196,7 @@ static void page_remove_file_rmap(struct page *page, bool compound)
* pte lock(a spinlock) is held, which implies preemption disabled.
*/
__mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, -nr);
- mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
+ mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, -nr);
if (unlikely(PageMlocked(page)))
clear_page_mlock(page);
--
2.11.0
next prev parent reply other threads:[~2017-01-26 11:59 UTC|newest]
Thread overview: 67+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-01-26 11:57 [PATCHv6 00/37] ext4: support of huge pages Kirill A. Shutemov
2017-01-26 11:57 ` [PATCHv6 01/37] mm, shmem: swich huge tmpfs to multi-order radix-tree entries Kirill A. Shutemov
2017-02-09 3:57 ` Matthew Wilcox
2017-02-09 16:58 ` Kirill A. Shutemov
2017-02-13 13:43 ` Kirill A. Shutemov
2017-01-26 11:57 ` [PATCHv6 02/37] Revert "radix-tree: implement radix_tree_maybe_preload_order()" Kirill A. Shutemov
2017-01-26 15:38 ` Matthew Wilcox
2017-01-26 11:57 ` [PATCHv6 03/37] page-flags: relax page flag policy for few flags Kirill A. Shutemov
2017-02-09 4:01 ` Matthew Wilcox
2017-02-13 13:59 ` Kirill A. Shutemov
2017-01-26 11:57 ` [PATCHv6 04/37] mm, rmap: account file thp pages Kirill A. Shutemov
2017-02-09 20:17 ` Matthew Wilcox
2017-01-26 11:57 ` [PATCHv6 05/37] thp: try to free page's buffers before attempt split Kirill A. Shutemov
2017-02-09 20:14 ` Matthew Wilcox
2017-02-13 14:32 ` Kirill A. Shutemov
2017-01-26 11:57 ` [PATCHv6 06/37] thp: handle write-protection faults for file THP Kirill A. Shutemov
2017-01-26 15:44 ` Matthew Wilcox
2017-01-26 15:57 ` Kirill A. Shutemov
2017-02-09 20:19 ` Matthew Wilcox
2017-01-26 11:57 ` [PATCHv6 07/37] filemap: allocate huge page in page_cache_read(), if allowed Kirill A. Shutemov
2017-02-09 21:18 ` Matthew Wilcox
2017-02-13 15:17 ` Kirill A. Shutemov
2017-01-26 11:57 ` [PATCHv6 08/37] filemap: handle huge pages in do_generic_file_read() Kirill A. Shutemov
2017-02-09 21:55 ` Matthew Wilcox
2017-02-13 15:33 ` Kirill A. Shutemov
2017-02-13 16:01 ` Matthew Wilcox
2017-02-13 16:09 ` Matthew Wilcox
2017-02-13 16:28 ` Matthew Wilcox
2017-01-26 11:57 ` [PATCHv6 09/37] filemap: allocate huge page in pagecache_get_page(), if allowed Kirill A. Shutemov
2017-02-09 21:59 ` Matthew Wilcox
2017-01-26 11:57 ` [PATCHv6 10/37] filemap: handle huge pages in filemap_fdatawait_range() Kirill A. Shutemov
2017-02-09 23:03 ` Matthew Wilcox
2017-01-26 11:57 ` [PATCHv6 11/37] HACK: readahead: alloc huge pages, if allowed Kirill A. Shutemov
2017-02-09 23:34 ` Matthew Wilcox
2017-02-10 0:23 ` Andreas Dilger
2017-02-10 14:51 ` Matthew Wilcox
2017-01-26 11:57 ` [PATCHv6 12/37] brd: make it handle huge pages Kirill A. Shutemov
2017-02-10 17:24 ` Matthew Wilcox
2017-01-26 11:57 ` [PATCHv6 13/37] mm: make write_cache_pages() work on " Kirill A. Shutemov
2017-02-10 17:46 ` Matthew Wilcox
2017-01-26 11:57 ` [PATCHv6 14/37] thp: introduce hpage_size() and hpage_mask() Kirill A. Shutemov
2017-01-26 11:57 ` [PATCHv6 15/37] thp: do not threat slab pages as huge in hpage_{nr_pages,size,mask} Kirill A. Shutemov
2017-02-10 22:13 ` Matthew Wilcox
2017-01-26 11:57 ` [PATCHv6 16/37] thp: make thp_get_unmapped_area() respect S_HUGE_MODE Kirill A. Shutemov
2017-02-10 17:50 ` Matthew Wilcox
2017-01-26 11:57 ` [PATCHv6 17/37] fs: make block_read_full_page() be able to read huge page Kirill A. Shutemov
2017-02-10 17:58 ` Matthew Wilcox
2017-01-26 11:58 ` [PATCHv6 18/37] fs: make block_write_{begin,end}() be able to handle huge pages Kirill A. Shutemov
2017-01-26 11:58 ` [PATCHv6 19/37] fs: make block_page_mkwrite() aware about " Kirill A. Shutemov
2017-01-26 11:58 ` [PATCHv6 20/37] truncate: make truncate_inode_pages_range() " Kirill A. Shutemov
2017-01-26 11:58 ` [PATCHv6 21/37] truncate: make invalidate_inode_pages2_range() " Kirill A. Shutemov
2017-01-26 11:58 ` [PATCHv6 22/37] mm, hugetlb: switch hugetlbfs to multi-order radix-tree entries Kirill A. Shutemov
2017-01-26 11:58 ` Kirill A. Shutemov [this message]
2017-01-26 11:58 ` [PATCHv6 24/37] ext4: make ext4_mpage_readpages() hugepage-aware Kirill A. Shutemov
2017-01-26 11:58 ` [PATCHv6 25/37] ext4: make ext4_writepage() work on huge pages Kirill A. Shutemov
2017-01-26 11:58 ` [PATCHv6 26/37] ext4: handle huge pages in ext4_page_mkwrite() Kirill A. Shutemov
2017-01-26 11:58 ` [PATCHv6 27/37] ext4: handle huge pages in __ext4_block_zero_page_range() Kirill A. Shutemov
2017-01-26 11:58 ` [PATCHv6 28/37] ext4: make ext4_block_write_begin() aware about huge pages Kirill A. Shutemov
2017-01-26 11:58 ` [PATCHv6 29/37] ext4: handle huge pages in ext4_da_write_end() Kirill A. Shutemov
2017-01-26 11:58 ` [PATCHv6 30/37] ext4: make ext4_da_page_release_reservation() aware about huge pages Kirill A. Shutemov
2017-01-26 11:58 ` [PATCHv6 31/37] ext4: handle writeback with " Kirill A. Shutemov
2017-01-26 11:58 ` [PATCHv6 32/37] ext4: make EXT4_IOC_MOVE_EXT work " Kirill A. Shutemov
2017-01-26 11:58 ` [PATCHv6 33/37] ext4: fix SEEK_DATA/SEEK_HOLE for " Kirill A. Shutemov
2017-01-26 11:58 ` [PATCHv6 34/37] ext4: make fallocate() operations work with " Kirill A. Shutemov
2017-01-26 11:58 ` [PATCHv6 35/37] ext4: reserve larger jounral transaction for " Kirill A. Shutemov
2017-01-26 11:58 ` [PATCHv6 36/37] mm, fs, ext4: expand use of page_mapping() and page_to_pgoff() Kirill A. Shutemov
2017-01-26 11:58 ` [PATCHv6 37/37] ext4, vfs: add huge= mount option Kirill A. Shutemov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170126115819.58875-24-kirill.shutemov@linux.intel.com \
--to=kirill.shutemov@linux.intel.com \
--cc=aarcange@redhat.com \
--cc=adilger.kernel@dilger.ca \
--cc=akpm@linux-foundation.org \
--cc=dave.hansen@intel.com \
--cc=hughd@google.com \
--cc=jack@suse.com \
--cc=linux-block@vger.kernel.org \
--cc=linux-ext4@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=ross.zwisler@linux.intel.com \
--cc=tytso@mit.edu \
--cc=vbabka@suse.cz \
--cc=viro@zeniv.linux.org.uk \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).