All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] reusing of mapping page supplies a way for file page allocation under low memory due to pagecache over size and is controlled by sysctl parameters. it is used only for rw page allocation rather than fault or readahead allocation. it is like relclaim but is lighter than reclaim. it only reuses clean and zero mapcount pages of mapping. for special filesystems using this feature like below:
@ 2016-05-31  9:08 ` zhouxianrong
  0 siblings, 0 replies; 13+ messages in thread
From: zhouxianrong @ 2016-05-31  9:08 UTC (permalink / raw)
  To: viro
  Cc: linux-fsdevel, linux-mm, zhouxianrong, zhouxiyu, wanghaijun5, yuchao0

From: z00281421 <z00281421@notesmail.huawei.com>

const struct address_space_operations special_aops = {
    ...
	.reuse_mapping_page = generic_reuse_mapping_page,
}

Signed-off-by: z00281421 <z00281421@notesmail.huawei.com>
---
 fs/buffer.c                   |    2 +
 include/linux/fs.h            |    7 ++++
 include/linux/pagemap.h       |   36 ++++++++++++++++
 include/linux/radix-tree.h    |    2 +-
 include/linux/vm_event_item.h |    2 +-
 kernel/sysctl.c               |    9 ++++
 mm/filemap.c                  |   93 +++++++++++++++++++++++++++++++++++++++--
 mm/page-writeback.c           |    6 +++
 mm/vmstat.c                   |    1 +
 9 files changed, 153 insertions(+), 5 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 754813a..a212720 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -634,6 +634,8 @@ static void __set_page_dirty(struct page *page, struct address_space *mapping,
 		account_page_dirtied(page, mapping);
 		radix_tree_tag_set(&mapping->page_tree,
 				page_index(page), PAGECACHE_TAG_DIRTY);
+		radix_tree_tag_clear(&mapping->page_tree,
+				page_index(page), PAGECACHE_TAG_REUSE);
 	}
 	spin_unlock_irqrestore(&mapping->tree_lock, flags);
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dd28814..a2e33e0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -68,6 +68,7 @@ extern struct inodes_stat_t inodes_stat;
 extern int leases_enable, lease_break_time;
 extern int sysctl_protected_symlinks;
 extern int sysctl_protected_hardlinks;
+extern unsigned long sysctl_cache_reuse_ratio;
 
 struct buffer_head;
 typedef int (get_block_t)(struct inode *inode, sector_t iblock,
@@ -412,6 +413,9 @@ struct address_space_operations {
 	int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
 				sector_t *span);
 	void (*swap_deactivate)(struct file *file);
+
+	/* reuse mapping page support */
+	struct page *(*reuse_mapping_page)(struct address_space *, gfp_t);
 };
 
 extern const struct address_space_operations empty_aops;
@@ -497,6 +501,7 @@ struct block_device {
 #define PAGECACHE_TAG_DIRTY	0
 #define PAGECACHE_TAG_WRITEBACK	1
 #define PAGECACHE_TAG_TOWRITE	2
+#define PAGECACHE_TAG_REUSE	3
 
 int mapping_tagged(struct address_space *mapping, int tag);
 
@@ -2762,6 +2767,8 @@ extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
+extern struct page *generic_reuse_mapping_page(struct address_space *mapping,
+		gfp_t gfp_mask);
 
 ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos);
 ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 9735410..c454dbb 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -76,6 +76,16 @@ static inline gfp_t mapping_gfp_constraint(struct address_space *mapping,
 	return mapping_gfp_mask(mapping) & gfp_mask;
 }
 
+static inline struct page *mapping_reuse_page(struct address_space *mapping,
+				gfp_t gfp_mask)
+{
+	if (unlikely(mapping_unevictable(mapping)))
+		return NULL;
+	if (mapping->a_ops->reuse_mapping_page)
+		return mapping->a_ops->reuse_mapping_page(mapping, gfp_mask);
+	return NULL;
+}
+
 /*
  * This is non-atomic.  Only to be used before the mapping is activated.
  * Probably needs a barrier...
@@ -201,11 +211,21 @@ static inline struct page *__page_cache_alloc(gfp_t gfp)
 
 static inline struct page *page_cache_alloc(struct address_space *x)
 {
+	struct page *page;
+
+	page = mapping_reuse_page(x, mapping_gfp_mask(x));
+	if (page)
+		return page;
 	return __page_cache_alloc(mapping_gfp_mask(x));
 }
 
 static inline struct page *page_cache_alloc_cold(struct address_space *x)
 {
+	struct page *page;
+
+	page = mapping_reuse_page(x, mapping_gfp_mask(x)|__GFP_COLD);
+	if (page)
+		return page;
 	return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD);
 }
 
@@ -215,6 +235,22 @@ static inline struct page *page_cache_alloc_readahead(struct address_space *x)
 				  __GFP_COLD | __GFP_NORETRY | __GFP_NOWARN);
 }
 
+static inline struct page *page_cache_alloc_fault(struct address_space *x)
+{
+	return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD);
+}
+
+static inline struct page *page_cache_alloc_reuse(struct address_space *x,
+			     gfp_t gfp)
+{
+	struct page *page;
+
+	page = mapping_reuse_page(x, gfp);
+	if (page)
+		return page;
+	return __page_cache_alloc(gfp);
+}
+
 typedef int filler_t(void *, struct page *);
 
 pgoff_t page_cache_next_hole(struct address_space *mapping,
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index cb4b7e8..2e5aa47 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -64,7 +64,7 @@ static inline bool radix_tree_is_internal_node(void *ptr)
 
 /*** radix-tree API starts here ***/
 
-#define RADIX_TREE_MAX_TAGS 3
+#define RADIX_TREE_MAX_TAGS 4
 
 #ifndef RADIX_TREE_MAP_SHIFT
 #define RADIX_TREE_MAP_SHIFT	(CONFIG_BASE_SMALL ? 4 : 6)
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index ec08432..a34b456 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -37,7 +37,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 #endif
 		PGINODESTEAL, SLABS_SCANNED, KSWAPD_INODESTEAL,
 		KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
-		PAGEOUTRUN, ALLOCSTALL, PGROTATED,
+		PAGEOUTRUN, ALLOCSTALL, PGROTATED, PGREUSED,
 		DROP_PAGECACHE, DROP_SLAB,
 #ifdef CONFIG_NUMA_BALANCING
 		NUMA_PTE_UPDATES,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 87b2fc3..35e3c7d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1334,6 +1334,15 @@ static struct ctl_table vm_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one_hundred,
 	},
+	{
+		.procname	= "cache_reuse_ratio",
+		.data		= &sysctl_cache_reuse_ratio,
+		.maxlen		= sizeof(sysctl_cache_reuse_ratio),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one_hundred,
+	},
 #ifdef CONFIG_HUGETLB_PAGE
 	{
 		.procname	= "nr_hugepages",
diff --git a/mm/filemap.c b/mm/filemap.c
index 00ae878..f0fed97 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -47,6 +47,8 @@
 
 #include <asm/mman.h>
 
+unsigned long sysctl_cache_reuse_ratio = 100;
+
 /*
  * Shared mappings implemented 30.11.1994. It's not fully working yet,
  * though.
@@ -110,6 +112,18 @@
  *   ->tasklist_lock            (memory_failure, collect_procs_ao)
  */
 
+static unsigned long page_cache_over_reuse_limit(void)
+{
+	unsigned long lru_file, limit;
+
+	limit = totalram_pages * sysctl_cache_reuse_ratio / 100;
+	lru_file = global_page_state(NR_ACTIVE_FILE)
+		+ global_page_state(NR_INACTIVE_FILE);
+	if (lru_file > limit)
+		return lru_file - limit;
+	return 0;
+}
+
 static void page_cache_tree_delete(struct address_space *mapping,
 				   struct page *page, void *shadow)
 {
@@ -1194,7 +1208,7 @@ no_page:
 		if (fgp_flags & FGP_NOFS)
 			gfp_mask &= ~__GFP_FS;
 
-		page = __page_cache_alloc(gfp_mask);
+		page = page_cache_alloc_reuse(mapping, gfp_mask);
 		if (!page)
 			return NULL;
 
@@ -1784,6 +1798,13 @@ readpage:
 			unlock_page(page);
 		}
 
+		if (!page_mapcount(page)) {
+			spin_lock_irq(&mapping->tree_lock);
+			radix_tree_tag_set(&mapping->page_tree,
+					index, PAGECACHE_TAG_REUSE);
+			spin_unlock_irq(&mapping->tree_lock);
+		}
+
 		goto page_ok;
 
 readpage_error:
@@ -1899,7 +1920,7 @@ static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask)
 	int ret;
 
 	do {
-		page = __page_cache_alloc(gfp_mask|__GFP_COLD);
+		page = page_cache_alloc_fault(mapping, gfp_mask|__GFP_COLD);
 		if (!page)
 			return -ENOMEM;
 
@@ -2270,6 +2291,72 @@ int generic_file_readonly_mmap(struct file * file, struct vm_area_struct * vma)
 EXPORT_SYMBOL(generic_file_mmap);
 EXPORT_SYMBOL(generic_file_readonly_mmap);
 
+struct page *generic_reuse_mapping_page(struct address_space *mapping,
+				gfp_t gfp_mask)
+{
+	int i;
+	pgoff_t index = 0;
+	struct page *p = NULL;
+	struct pagevec pvec;
+
+	if (!page_cache_over_reuse_limit())
+		return NULL;
+	if (unlikely(!mapping->nrpages))
+		return NULL;
+	if (!mapping_tagged(mapping, PAGECACHE_TAG_REUSE))
+		return NULL;
+	lru_add_drain();
+	pagevec_init(&pvec, 0);
+	while (!p && pagevec_lookup_tag(&pvec, mapping, &index,
+		PAGECACHE_TAG_REUSE, PAGEVEC_SIZE)) {
+		for (i = 0; i < pagevec_count(&pvec); i++) {
+			struct page *page = pvec.pages[i];
+
+			if (PageActive(page))
+				continue;
+			if (PageDirty(page))
+				continue;
+			if (page_mapcount(page))
+				continue;
+			if (!trylock_page(page))
+				continue;
+			if (unlikely(page_mapping(page) != mapping)) {
+				unlock_page(page);
+				continue;
+			}
+			if (invalidate_inode_page(page)) {
+				if (likely(!isolate_lru_page(page))) {
+					get_page(page);
+					ClearPageUptodate(page);
+					WARN_ON(TestClearPageDirty(page));
+					WARN_ON(TestClearPageWriteback(page));
+					WARN_ON(TestClearPageActive(page));
+					WARN_ON(TestClearPageUnevictable(page));
+					ClearPageError(page);
+					ClearPageReferenced(page);
+					ClearPageReclaim(page);
+					ClearPageMappedToDisk(page);
+					ClearPageReadahead(page);
+					unlock_page(page);
+					count_vm_event(PGREUSED);
+					p = page;
+					break;
+				}
+			} else {
+				spin_lock_irq(&mapping->tree_lock);
+				radix_tree_tag_clear(&mapping->page_tree,
+					page->index, PAGECACHE_TAG_REUSE);
+				spin_unlock_irq(&mapping->tree_lock);
+			}
+			unlock_page(page);
+		}
+		pagevec_release(&pvec);
+		cond_resched();
+	}
+	return p;
+}
+EXPORT_SYMBOL(generic_reuse_mapping_page);
+
 static struct page *wait_on_page_read(struct page *page)
 {
 	if (!IS_ERR(page)) {
@@ -2293,7 +2380,7 @@ static struct page *do_read_cache_page(struct address_space *mapping,
 repeat:
 	page = find_get_page(mapping, index);
 	if (!page) {
-		page = __page_cache_alloc(gfp | __GFP_COLD);
+		page = page_cache_alloc_reuse(mapping, gfp | __GFP_COLD);
 		if (!page)
 			return ERR_PTR(-ENOMEM);
 		err = add_to_page_cache_lru(page, mapping, index, gfp);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index b9956fd..0709df3 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2490,6 +2490,8 @@ int __set_page_dirty_nobuffers(struct page *page)
 		account_page_dirtied(page, mapping);
 		radix_tree_tag_set(&mapping->page_tree, page_index(page),
 				   PAGECACHE_TAG_DIRTY);
+		radix_tree_tag_clear(&mapping->page_tree, page_index(page),
+				   PAGECACHE_TAG_REUSE);
 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
 		unlock_page_memcg(page);
 
@@ -2737,6 +2739,10 @@ int test_clear_page_writeback(struct page *page)
 			radix_tree_tag_clear(&mapping->page_tree,
 						page_index(page),
 						PAGECACHE_TAG_WRITEBACK);
+			if (!PageSwapBacked(page) && !page_mapcount(page))
+				radix_tree_tag_set(&mapping->page_tree,
+							page_index(page),
+							PAGECACHE_TAG_REUSE);
 			if (bdi_cap_account_writeback(bdi)) {
 				struct bdi_writeback *wb = inode_to_wb(inode);
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 77e42ef..273cd1d 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -773,6 +773,7 @@ const char * const vmstat_text[] = {
 	"allocstall",
 
 	"pgrotated",
+	"pgreused",
 
 	"drop_pagecache",
 	"drop_slab",
-- 
1.7.9.5

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH] reusing of mapping page supplies a way for file page allocation under low memory due to pagecache over size and is controlled by sysctl parameters. it is used only for rw page allocation rather than fault or readahead allocation. it is like relclaim but is lighter than reclaim. it only reuses clean and zero mapcount pages of mapping. for special filesystems using this feature like below:
@ 2016-05-31  9:08 ` zhouxianrong
  0 siblings, 0 replies; 13+ messages in thread
From: zhouxianrong @ 2016-05-31  9:08 UTC (permalink / raw)
  To: viro
  Cc: linux-fsdevel, linux-mm, zhouxianrong, zhouxiyu, wanghaijun5, yuchao0

From: z00281421 <z00281421@notesmail.huawei.com>

const struct address_space_operations special_aops = {
    ...
	.reuse_mapping_page = generic_reuse_mapping_page,
}

Signed-off-by: z00281421 <z00281421@notesmail.huawei.com>
---
 fs/buffer.c                   |    2 +
 include/linux/fs.h            |    7 ++++
 include/linux/pagemap.h       |   36 ++++++++++++++++
 include/linux/radix-tree.h    |    2 +-
 include/linux/vm_event_item.h |    2 +-
 kernel/sysctl.c               |    9 ++++
 mm/filemap.c                  |   93 +++++++++++++++++++++++++++++++++++++++--
 mm/page-writeback.c           |    6 +++
 mm/vmstat.c                   |    1 +
 9 files changed, 153 insertions(+), 5 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 754813a..a212720 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -634,6 +634,8 @@ static void __set_page_dirty(struct page *page, struct address_space *mapping,
 		account_page_dirtied(page, mapping);
 		radix_tree_tag_set(&mapping->page_tree,
 				page_index(page), PAGECACHE_TAG_DIRTY);
+		radix_tree_tag_clear(&mapping->page_tree,
+				page_index(page), PAGECACHE_TAG_REUSE);
 	}
 	spin_unlock_irqrestore(&mapping->tree_lock, flags);
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dd28814..a2e33e0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -68,6 +68,7 @@ extern struct inodes_stat_t inodes_stat;
 extern int leases_enable, lease_break_time;
 extern int sysctl_protected_symlinks;
 extern int sysctl_protected_hardlinks;
+extern unsigned long sysctl_cache_reuse_ratio;
 
 struct buffer_head;
 typedef int (get_block_t)(struct inode *inode, sector_t iblock,
@@ -412,6 +413,9 @@ struct address_space_operations {
 	int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
 				sector_t *span);
 	void (*swap_deactivate)(struct file *file);
+
+	/* reuse mapping page support */
+	struct page *(*reuse_mapping_page)(struct address_space *, gfp_t);
 };
 
 extern const struct address_space_operations empty_aops;
@@ -497,6 +501,7 @@ struct block_device {
 #define PAGECACHE_TAG_DIRTY	0
 #define PAGECACHE_TAG_WRITEBACK	1
 #define PAGECACHE_TAG_TOWRITE	2
+#define PAGECACHE_TAG_REUSE	3
 
 int mapping_tagged(struct address_space *mapping, int tag);
 
@@ -2762,6 +2767,8 @@ extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
+extern struct page *generic_reuse_mapping_page(struct address_space *mapping,
+		gfp_t gfp_mask);
 
 ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos);
 ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 9735410..c454dbb 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -76,6 +76,16 @@ static inline gfp_t mapping_gfp_constraint(struct address_space *mapping,
 	return mapping_gfp_mask(mapping) & gfp_mask;
 }
 
+static inline struct page *mapping_reuse_page(struct address_space *mapping,
+				gfp_t gfp_mask)
+{
+	if (unlikely(mapping_unevictable(mapping)))
+		return NULL;
+	if (mapping->a_ops->reuse_mapping_page)
+		return mapping->a_ops->reuse_mapping_page(mapping, gfp_mask);
+	return NULL;
+}
+
 /*
  * This is non-atomic.  Only to be used before the mapping is activated.
  * Probably needs a barrier...
@@ -201,11 +211,21 @@ static inline struct page *__page_cache_alloc(gfp_t gfp)
 
 static inline struct page *page_cache_alloc(struct address_space *x)
 {
+	struct page *page;
+
+	page = mapping_reuse_page(x, mapping_gfp_mask(x));
+	if (page)
+		return page;
 	return __page_cache_alloc(mapping_gfp_mask(x));
 }
 
 static inline struct page *page_cache_alloc_cold(struct address_space *x)
 {
+	struct page *page;
+
+	page = mapping_reuse_page(x, mapping_gfp_mask(x)|__GFP_COLD);
+	if (page)
+		return page;
 	return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD);
 }
 
@@ -215,6 +235,22 @@ static inline struct page *page_cache_alloc_readahead(struct address_space *x)
 				  __GFP_COLD | __GFP_NORETRY | __GFP_NOWARN);
 }
 
+static inline struct page *page_cache_alloc_fault(struct address_space *x)
+{
+	return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD);
+}
+
+static inline struct page *page_cache_alloc_reuse(struct address_space *x,
+			     gfp_t gfp)
+{
+	struct page *page;
+
+	page = mapping_reuse_page(x, gfp);
+	if (page)
+		return page;
+	return __page_cache_alloc(gfp);
+}
+
 typedef int filler_t(void *, struct page *);
 
 pgoff_t page_cache_next_hole(struct address_space *mapping,
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index cb4b7e8..2e5aa47 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -64,7 +64,7 @@ static inline bool radix_tree_is_internal_node(void *ptr)
 
 /*** radix-tree API starts here ***/
 
-#define RADIX_TREE_MAX_TAGS 3
+#define RADIX_TREE_MAX_TAGS 4
 
 #ifndef RADIX_TREE_MAP_SHIFT
 #define RADIX_TREE_MAP_SHIFT	(CONFIG_BASE_SMALL ? 4 : 6)
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index ec08432..a34b456 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -37,7 +37,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 #endif
 		PGINODESTEAL, SLABS_SCANNED, KSWAPD_INODESTEAL,
 		KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
-		PAGEOUTRUN, ALLOCSTALL, PGROTATED,
+		PAGEOUTRUN, ALLOCSTALL, PGROTATED, PGREUSED,
 		DROP_PAGECACHE, DROP_SLAB,
 #ifdef CONFIG_NUMA_BALANCING
 		NUMA_PTE_UPDATES,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 87b2fc3..35e3c7d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1334,6 +1334,15 @@ static struct ctl_table vm_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one_hundred,
 	},
+	{
+		.procname	= "cache_reuse_ratio",
+		.data		= &sysctl_cache_reuse_ratio,
+		.maxlen		= sizeof(sysctl_cache_reuse_ratio),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one_hundred,
+	},
 #ifdef CONFIG_HUGETLB_PAGE
 	{
 		.procname	= "nr_hugepages",
diff --git a/mm/filemap.c b/mm/filemap.c
index 00ae878..f0fed97 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -47,6 +47,8 @@
 
 #include <asm/mman.h>
 
+unsigned long sysctl_cache_reuse_ratio = 100;
+
 /*
  * Shared mappings implemented 30.11.1994. It's not fully working yet,
  * though.
@@ -110,6 +112,18 @@
  *   ->tasklist_lock            (memory_failure, collect_procs_ao)
  */
 
+static unsigned long page_cache_over_reuse_limit(void)
+{
+	unsigned long lru_file, limit;
+
+	limit = totalram_pages * sysctl_cache_reuse_ratio / 100;
+	lru_file = global_page_state(NR_ACTIVE_FILE)
+		+ global_page_state(NR_INACTIVE_FILE);
+	if (lru_file > limit)
+		return lru_file - limit;
+	return 0;
+}
+
 static void page_cache_tree_delete(struct address_space *mapping,
 				   struct page *page, void *shadow)
 {
@@ -1194,7 +1208,7 @@ no_page:
 		if (fgp_flags & FGP_NOFS)
 			gfp_mask &= ~__GFP_FS;
 
-		page = __page_cache_alloc(gfp_mask);
+		page = page_cache_alloc_reuse(mapping, gfp_mask);
 		if (!page)
 			return NULL;
 
@@ -1784,6 +1798,13 @@ readpage:
 			unlock_page(page);
 		}
 
+		if (!page_mapcount(page)) {
+			spin_lock_irq(&mapping->tree_lock);
+			radix_tree_tag_set(&mapping->page_tree,
+					index, PAGECACHE_TAG_REUSE);
+			spin_unlock_irq(&mapping->tree_lock);
+		}
+
 		goto page_ok;
 
 readpage_error:
@@ -1899,7 +1920,7 @@ static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask)
 	int ret;
 
 	do {
-		page = __page_cache_alloc(gfp_mask|__GFP_COLD);
+		page = page_cache_alloc_fault(mapping, gfp_mask|__GFP_COLD);
 		if (!page)
 			return -ENOMEM;
 
@@ -2270,6 +2291,72 @@ int generic_file_readonly_mmap(struct file * file, struct vm_area_struct * vma)
 EXPORT_SYMBOL(generic_file_mmap);
 EXPORT_SYMBOL(generic_file_readonly_mmap);
 
+struct page *generic_reuse_mapping_page(struct address_space *mapping,
+				gfp_t gfp_mask)
+{
+	int i;
+	pgoff_t index = 0;
+	struct page *p = NULL;
+	struct pagevec pvec;
+
+	if (!page_cache_over_reuse_limit())
+		return NULL;
+	if (unlikely(!mapping->nrpages))
+		return NULL;
+	if (!mapping_tagged(mapping, PAGECACHE_TAG_REUSE))
+		return NULL;
+	lru_add_drain();
+	pagevec_init(&pvec, 0);
+	while (!p && pagevec_lookup_tag(&pvec, mapping, &index,
+		PAGECACHE_TAG_REUSE, PAGEVEC_SIZE)) {
+		for (i = 0; i < pagevec_count(&pvec); i++) {
+			struct page *page = pvec.pages[i];
+
+			if (PageActive(page))
+				continue;
+			if (PageDirty(page))
+				continue;
+			if (page_mapcount(page))
+				continue;
+			if (!trylock_page(page))
+				continue;
+			if (unlikely(page_mapping(page) != mapping)) {
+				unlock_page(page);
+				continue;
+			}
+			if (invalidate_inode_page(page)) {
+				if (likely(!isolate_lru_page(page))) {
+					get_page(page);
+					ClearPageUptodate(page);
+					WARN_ON(TestClearPageDirty(page));
+					WARN_ON(TestClearPageWriteback(page));
+					WARN_ON(TestClearPageActive(page));
+					WARN_ON(TestClearPageUnevictable(page));
+					ClearPageError(page);
+					ClearPageReferenced(page);
+					ClearPageReclaim(page);
+					ClearPageMappedToDisk(page);
+					ClearPageReadahead(page);
+					unlock_page(page);
+					count_vm_event(PGREUSED);
+					p = page;
+					break;
+				}
+			} else {
+				spin_lock_irq(&mapping->tree_lock);
+				radix_tree_tag_clear(&mapping->page_tree,
+					page->index, PAGECACHE_TAG_REUSE);
+				spin_unlock_irq(&mapping->tree_lock);
+			}
+			unlock_page(page);
+		}
+		pagevec_release(&pvec);
+		cond_resched();
+	}
+	return p;
+}
+EXPORT_SYMBOL(generic_reuse_mapping_page);
+
 static struct page *wait_on_page_read(struct page *page)
 {
 	if (!IS_ERR(page)) {
@@ -2293,7 +2380,7 @@ static struct page *do_read_cache_page(struct address_space *mapping,
 repeat:
 	page = find_get_page(mapping, index);
 	if (!page) {
-		page = __page_cache_alloc(gfp | __GFP_COLD);
+		page = page_cache_alloc_reuse(mapping, gfp | __GFP_COLD);
 		if (!page)
 			return ERR_PTR(-ENOMEM);
 		err = add_to_page_cache_lru(page, mapping, index, gfp);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index b9956fd..0709df3 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2490,6 +2490,8 @@ int __set_page_dirty_nobuffers(struct page *page)
 		account_page_dirtied(page, mapping);
 		radix_tree_tag_set(&mapping->page_tree, page_index(page),
 				   PAGECACHE_TAG_DIRTY);
+		radix_tree_tag_clear(&mapping->page_tree, page_index(page),
+				   PAGECACHE_TAG_REUSE);
 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
 		unlock_page_memcg(page);
 
@@ -2737,6 +2739,10 @@ int test_clear_page_writeback(struct page *page)
 			radix_tree_tag_clear(&mapping->page_tree,
 						page_index(page),
 						PAGECACHE_TAG_WRITEBACK);
+			if (!PageSwapBacked(page) && !page_mapcount(page))
+				radix_tree_tag_set(&mapping->page_tree,
+							page_index(page),
+							PAGECACHE_TAG_REUSE);
 			if (bdi_cap_account_writeback(bdi)) {
 				struct bdi_writeback *wb = inode_to_wb(inode);
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 77e42ef..273cd1d 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -773,6 +773,7 @@ const char * const vmstat_text[] = {
 	"allocstall",
 
 	"pgrotated",
+	"pgreused",
 
 	"drop_pagecache",
 	"drop_slab",
-- 
1.7.9.5

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] reusing of mapping page supplies a way for file page allocation under low memory due to pagecache over size and is controlled by sysctl parameters. it is used only for rw page allocation rather than fault or readahead allocation. it is like relclaim but is lighter than reclaim. it only reuses clean and zero mapcount pages of mapping. for special filesystems using this feature like below:
  2016-05-31  9:08 ` zhouxianrong
  (?)
@ 2016-05-31  9:36 ` Michal Hocko
  2016-05-31 13:35   ` 答复: [PATCH] reusing of mapping page supplies a way for file page allocation under low memory due to pagecache over size and is controlled by sysctl parameters. it is used only for rw page allocation rather than fault or readahead allocation. it is like zhouxianrong
  -1 siblings, 1 reply; 13+ messages in thread
From: Michal Hocko @ 2016-05-31  9:36 UTC (permalink / raw)
  To: zhouxianrong
  Cc: viro, linux-fsdevel, linux-mm, zhouxiyu, wanghaijun5, yuchao0

On Tue 31-05-16 17:08:22, zhouxianrong@huawei.com wrote:
> From: z00281421 <z00281421@notesmail.huawei.com>
> 
> const struct address_space_operations special_aops = {
>     ...
> 	.reuse_mapping_page = generic_reuse_mapping_page,
> }

Please try to write a proper changelog which explains what is the
change, why do we need it and who is it going to use.
-- 
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* 答复: [PATCH] reusing of mapping page supplies a way for file page allocation under low memory due to pagecache over size and is controlled by sysctl parameters. it is used only for rw page allocation rather than fault or readahead allocation. it is like...
  2016-05-31  9:36 ` Michal Hocko
@ 2016-05-31 13:35   ` zhouxianrong
  2016-05-31 14:03     ` Michal Hocko
  0 siblings, 1 reply; 13+ messages in thread
From: zhouxianrong @ 2016-05-31 13:35 UTC (permalink / raw)
  To: Michal Hocko
  Cc: viro, linux-fsdevel, linux-mm, Zhouxiyu, wanghaijun (E), Yuchao (T)

Hey :
	the consideration of this patch is that reusing mapping page rather than allocating a new page for page cache when system be placed in some states.
For lookup pages quickly add a new tag PAGECACHE_TAG_REUSE for radix tree which tag the pages that is suitable for reusing.

A page suitable for reusing within mapping is
1. clean
2. map count is zero
3. whose mapping is evictable

A page tagged as PAGECACHE_TAG_REUSE when
1. after writeback a page within end_page_writeback setting the page as PAGECACHE_TAG_REUSE
2. in generic vfs read path default to setting page as PAGECACHE_TAG_REUSE

A page cleared PAGECACHE_TAG_REUSE when
1. page become diry within __set_page_dirty and __set_page_dirty_nobuffers (dirty page is not suitable for reusing)

The steps of resuing a page is
1. invalid a page by invalidate_inode_page (remove form mapping)
2. isolate from lru list by isolate_lru_page
3. clear page uptodate and other page flags

How to startup the functional
1. the system is under low memory state and there are fs rw operations
2. page cache size is get bigger over sysctl limit

If a filesystem wanted to introduce this functional, for example ext4, do like below:

static const struct address_space_operations ext4_aops = {
	...
	.readpage		= ext4_readpage,
	.readpages		= ext4_readpages,
	.writepage		= ext4_writepage,
	.write_begin		= ext4_write_begin,
	.write_end		= ext4_write_end,
	...
	.reuse_mapping_page = generic_reuse_mapping_page,
};

-----邮件原件-----
发件人: Michal Hocko [mailto:mhocko@kernel.org] 
发送时间: 2016年5月31日 17:37
收件人: zhouxianrong
抄送: viro@zeniv.linux.org.uk; linux-fsdevel@vger.kernel.org; linux-mm@kvack.org; Zhouxiyu; wanghaijun (E); Yuchao (T)
主题: Re: [PATCH] reusing of mapping page supplies a way for file page allocation under low memory due to pagecache over size and is controlled by sysctl parameters. it is used only for rw page allocation rather than fault or readahead allocation. it is like...

On Tue 31-05-16 17:08:22, zhouxianrong@huawei.com wrote:
> From: z00281421 <z00281421@notesmail.huawei.com>
> 
> const struct address_space_operations special_aops = {
>     ...
> 	.reuse_mapping_page = generic_reuse_mapping_page, }

Please try to write a proper changelog which explains what is the change, why do we need it and who is it going to use.
--
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: 答复: [PATCH] reusing of mapping page supplies a way for file page allocation under low memory due to pagecache over size and is controlled by sysctl parameters. it is used only for rw page allocation rather than fault or readahead allocation. it is like...
  2016-05-31 13:35   ` 答复: [PATCH] reusing of mapping page supplies a way for file page allocation under low memory due to pagecache over size and is controlled by sysctl parameters. it is used only for rw page allocation rather than fault or readahead allocation. it is like zhouxianrong
@ 2016-05-31 14:03     ` Michal Hocko
  2016-06-01  1:52       ` zhouxianrong
  0 siblings, 1 reply; 13+ messages in thread
From: Michal Hocko @ 2016-05-31 14:03 UTC (permalink / raw)
  To: zhouxianrong
  Cc: viro, linux-fsdevel, linux-mm, Zhouxiyu, wanghaijun (E), Yuchao (T)

On Tue 31-05-16 13:35:37, zhouxianrong wrote:
> Hey :
> the consideration of this patch is that reusing mapping page
> rather than allocating a new page for page cache when system be
> placed in some states.  For lookup pages quickly add a new tag
> PAGECACHE_TAG_REUSE for radix tree which tag the pages that is
> suitable for reusing.
> 
> A page suitable for reusing within mapping is
> 1. clean
> 2. map count is zero
> 3. whose mapping is evictable

Those pages are trivially reclaimable so why should we tag them in a
special way?

[...]

> How to startup the functional
> 1. the system is under low memory state and there are fs rw operations
> 2. page cache size is get bigger over sysctl limit

So is this a form of a page cache limit to trigger the reclaim earlier
than on the global memory pressure?
-- 
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: 答复: [PATCH] reusing of mapping page supplies a way for file page allocation under low memory due to pagecache over size and is controlled by sysctl parameters. it is used only for rw page allocation rather than fault or readahead allocation. it is like...
  2016-05-31 14:03     ` Michal Hocko
@ 2016-06-01  1:52       ` zhouxianrong
  2016-06-01  8:18         ` Michal Hocko
  0 siblings, 1 reply; 13+ messages in thread
From: zhouxianrong @ 2016-06-01  1:52 UTC (permalink / raw)
  To: Michal Hocko
  Cc: viro, linux-fsdevel, linux-mm, Zhouxiyu, wanghaijun (E), Yuchao (T)

 >> A page suitable for reusing within mapping is
 >> 1. clean
 >> 2. map count is zero
 >> 3. whose mapping is evictable
 >
 > Those pages are trivially reclaimable so why should we tag them in a
 > special way?
   yes, those pages can be reclaimed by reclaim procedure. i think in low memory
   case for a process that doing file rw directly-reusing-mapping-page may be
   a choice than alloc_page just like directly reclaim. alloc_page could failed return
   due to gfp and watermark in low memory. for reusing-mapping-page procedure quickly
   select a page that is be reused so introduce a tag for this purpose.


 > So is this a form of a page cache limit to trigger the reclaim earlier
 > than on the global memory pressure?
   my thinking is that page cache limit trigger reuse-mapping-page . the limit is earlier than on the global memory pressure.
   reuse-mapping-page can suppress increment of page cache size and big page cache size
   is one reason of low memory and fragment.

On 2016/5/31 22:03, Michal Hocko wrote:
> On Tue 31-05-16 13:35:37, zhouxianrong wrote:
>> Hey :
>> the consideration of this patch is that reusing mapping page
>> rather than allocating a new page for page cache when system be
>> placed in some states.  For lookup pages quickly add a new tag
>> PAGECACHE_TAG_REUSE for radix tree which tag the pages that is
>> suitable for reusing.
>>
>> A page suitable for reusing within mapping is
>> 1. clean
>> 2. map count is zero
>> 3. whose mapping is evictable
>
> Those pages are trivially reclaimable so why should we tag them in a
> special way?
>
> [...]
>
>> How to startup the functional
>> 1. the system is under low memory state and there are fs rw operations
>> 2. page cache size is get bigger over sysctl limit
>
> So is this a form of a page cache limit to trigger the reclaim earlier
> than on the global memory pressure?
>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: 答复: [PATCH] reusing of mapping page supplies a way for file page allocation under low memory due to pagecache over size and is controlled by sysctl parameters. it is used only for rw page allocation rather than fault or readahead allocation. it is like...
  2016-06-01  1:52       ` zhouxianrong
@ 2016-06-01  8:18         ` Michal Hocko
  2016-06-01  9:06             ` zhouxianrong
  2016-06-01  9:28             ` zhouxianrong
  0 siblings, 2 replies; 13+ messages in thread
From: Michal Hocko @ 2016-06-01  8:18 UTC (permalink / raw)
  To: zhouxianrong
  Cc: viro, linux-fsdevel, linux-mm, Zhouxiyu, wanghaijun (E), Yuchao (T)

On Wed 01-06-16 09:52:45, zhouxianrong wrote:
> >> A page suitable for reusing within mapping is
> >> 1. clean
> >> 2. map count is zero
> >> 3. whose mapping is evictable
> >
> > Those pages are trivially reclaimable so why should we tag them in a
> > special way?
> yes, those pages can be reclaimed by reclaim procedure. i think in low memory
> case for a process that doing file rw directly-reusing-mapping-page may be
> a choice than alloc_page just like directly reclaim. alloc_page could failed return
> due to gfp and watermark in low memory. for reusing-mapping-page procedure quickly
> select a page that is be reused so introduce a tag for this purpose.

Why would you want to reuse a page about which you have no idea about
its age compared to the LRU pages which would be mostly clean as well?
I mean this needs a deep justification!

> > So is this a form of a page cache limit to trigger the reclaim earlier
> > than on the global memory pressure?

> my thinking is that page cache limit trigger reuse-mapping-page. the
> limit is earlier than on the global memory pressure.
> reuse-mapping-page can suppress increment of page cache size and big page cache size
> is one reason of low memory and fragment.

But why would you want to limit the amount of the page cache in the
first place when it should be trivially reclaimable most of the time?
-- 
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: 答复: [PATCH] reusing of mapping page supplies a way for file page allocation under low memory due to pagecache over size and is controlled by sysctl parameters. it is used only for rw page allocation rather than fault or readahead allocation. it is like...
  2016-06-01  8:18         ` Michal Hocko
@ 2016-06-01  9:06             ` zhouxianrong
  2016-06-01  9:28             ` zhouxianrong
  1 sibling, 0 replies; 13+ messages in thread
From: zhouxianrong @ 2016-06-01  9:06 UTC (permalink / raw)
  To: Michal Hocko
  Cc: viro, linux-fsdevel, linux-mm, Zhouxiyu, wanghaijun (E), Yuchao (T)

 > Why would you want to reuse a page about which you have no idea about
 > its age compared to the LRU pages which would be mostly clean as well?
 > I mean this needs a deep justification!

reusing could not reuse page with page_mapcount > 0; it only reuse a pure file page without mmap.
only file pages producted by rw syscall can be reuse; so no AF consideration for it and just use
active/inactive flag to distinguish.

On 2016/6/1 16:18, Michal Hocko wrote:
> On Wed 01-06-16 09:52:45, zhouxianrong wrote:
>>>> A page suitable for reusing within mapping is
>>>> 1. clean
>>>> 2. map count is zero
>>>> 3. whose mapping is evictable
>>>
>>> Those pages are trivially reclaimable so why should we tag them in a
>>> special way?
>> yes, those pages can be reclaimed by reclaim procedure. i think in low memory
>> case for a process that doing file rw directly-reusing-mapping-page may be
>> a choice than alloc_page just like directly reclaim. alloc_page could failed return
>> due to gfp and watermark in low memory. for reusing-mapping-page procedure quickly
>> select a page that is be reused so introduce a tag for this purpose.
>
> Why would you want to reuse a page about which you have no idea about
> its age compared to the LRU pages which would be mostly clean as well?
> I mean this needs a deep justification!
>
>>> So is this a form of a page cache limit to trigger the reclaim earlier
>>> than on the global memory pressure?
>
>> my thinking is that page cache limit trigger reuse-mapping-page. the
>> limit is earlier than on the global memory pressure.
>> reuse-mapping-page can suppress increment of page cache size and big page cache size
>> is one reason of low memory and fragment.
>
> But why would you want to limit the amount of the page cache in the
> first place when it should be trivially reclaimable most of the time?
>


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: 答复: [PATCH] reusing of mapping page supplies a way for file page allocation under low memory due to pagecache over size and is controlled by sysctl parameters. it is used only for rw page allocation rather than fault or readahead allocation. it is like...
@ 2016-06-01  9:06             ` zhouxianrong
  0 siblings, 0 replies; 13+ messages in thread
From: zhouxianrong @ 2016-06-01  9:06 UTC (permalink / raw)
  To: Michal Hocko
  Cc: viro, linux-fsdevel, linux-mm, Zhouxiyu, wanghaijun (E), Yuchao (T)

 > Why would you want to reuse a page about which you have no idea about
 > its age compared to the LRU pages which would be mostly clean as well?
 > I mean this needs a deep justification!

reusing could not reuse page with page_mapcount > 0; it only reuse a pure file page without mmap.
only file pages producted by rw syscall can be reuse; so no AF consideration for it and just use
active/inactive flag to distinguish.

On 2016/6/1 16:18, Michal Hocko wrote:
> On Wed 01-06-16 09:52:45, zhouxianrong wrote:
>>>> A page suitable for reusing within mapping is
>>>> 1. clean
>>>> 2. map count is zero
>>>> 3. whose mapping is evictable
>>>
>>> Those pages are trivially reclaimable so why should we tag them in a
>>> special way?
>> yes, those pages can be reclaimed by reclaim procedure. i think in low memory
>> case for a process that doing file rw directly-reusing-mapping-page may be
>> a choice than alloc_page just like directly reclaim. alloc_page could failed return
>> due to gfp and watermark in low memory. for reusing-mapping-page procedure quickly
>> select a page that is be reused so introduce a tag for this purpose.
>
> Why would you want to reuse a page about which you have no idea about
> its age compared to the LRU pages which would be mostly clean as well?
> I mean this needs a deep justification!
>
>>> So is this a form of a page cache limit to trigger the reclaim earlier
>>> than on the global memory pressure?
>
>> my thinking is that page cache limit trigger reuse-mapping-page. the
>> limit is earlier than on the global memory pressure.
>> reuse-mapping-page can suppress increment of page cache size and big page cache size
>> is one reason of low memory and fragment.
>
> But why would you want to limit the amount of the page cache in the
> first place when it should be trivially reclaimable most of the time?
>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: 答复: [PATCH] reusing of mapping page supplies a way for file page allocation under low memory due to pagecache over size and is controlled by sysctl parameters. it is used only for rw page allocation rather than fault or readahead allocation. it is like...
  2016-06-01  8:18         ` Michal Hocko
@ 2016-06-01  9:28             ` zhouxianrong
  2016-06-01  9:28             ` zhouxianrong
  1 sibling, 0 replies; 13+ messages in thread
From: zhouxianrong @ 2016-06-01  9:28 UTC (permalink / raw)
  To: Michal Hocko
  Cc: viro, linux-fsdevel, linux-mm, Zhouxiyu, wanghaijun (E), Yuchao (T)

 > Why would you want to reuse a page about which you have no idea about
 > its age compared to the LRU pages which would be mostly clean as well?
 > I mean this needs a deep justification!
 >

pages only dirtied by rw syscall can be reused
pages dirtied by fault due to page_mkwrite or bdi-dirty-account now does not be reused

On 2016/6/1 16:18, Michal Hocko wrote:
> On Wed 01-06-16 09:52:45, zhouxianrong wrote:
>>>> A page suitable for reusing within mapping is
>>>> 1. clean
>>>> 2. map count is zero
>>>> 3. whose mapping is evictable
>>>
>>> Those pages are trivially reclaimable so why should we tag them in a
>>> special way?
>> yes, those pages can be reclaimed by reclaim procedure. i think in low memory
>> case for a process that doing file rw directly-reusing-mapping-page may be
>> a choice than alloc_page just like directly reclaim. alloc_page could failed return
>> due to gfp and watermark in low memory. for reusing-mapping-page procedure quickly
>> select a page that is be reused so introduce a tag for this purpose.
>
> Why would you want to reuse a page about which you have no idea about
> its age compared to the LRU pages which would be mostly clean as well?
> I mean this needs a deep justification!
>
>>> So is this a form of a page cache limit to trigger the reclaim earlier
>>> than on the global memory pressure?
>
>> my thinking is that page cache limit trigger reuse-mapping-page. the
>> limit is earlier than on the global memory pressure.
>> reuse-mapping-page can suppress increment of page cache size and big page cache size
>> is one reason of low memory and fragment.
>
> But why would you want to limit the amount of the page cache in the
> first place when it should be trivially reclaimable most of the time?
>


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: 答复: [PATCH] reusing of mapping page supplies a way for file page allocation under low memory due to pagecache over size and is controlled by sysctl parameters. it is used only for rw page allocation rather than fault or readahead allocation. it is like...
@ 2016-06-01  9:28             ` zhouxianrong
  0 siblings, 0 replies; 13+ messages in thread
From: zhouxianrong @ 2016-06-01  9:28 UTC (permalink / raw)
  To: Michal Hocko
  Cc: viro, linux-fsdevel, linux-mm, Zhouxiyu, wanghaijun (E), Yuchao (T)

 > Why would you want to reuse a page about which you have no idea about
 > its age compared to the LRU pages which would be mostly clean as well?
 > I mean this needs a deep justification!
 >

pages only dirtied by rw syscall can be reused
pages dirtied by fault due to page_mkwrite or bdi-dirty-account now does not be reused

On 2016/6/1 16:18, Michal Hocko wrote:
> On Wed 01-06-16 09:52:45, zhouxianrong wrote:
>>>> A page suitable for reusing within mapping is
>>>> 1. clean
>>>> 2. map count is zero
>>>> 3. whose mapping is evictable
>>>
>>> Those pages are trivially reclaimable so why should we tag them in a
>>> special way?
>> yes, those pages can be reclaimed by reclaim procedure. i think in low memory
>> case for a process that doing file rw directly-reusing-mapping-page may be
>> a choice than alloc_page just like directly reclaim. alloc_page could failed return
>> due to gfp and watermark in low memory. for reusing-mapping-page procedure quickly
>> select a page that is be reused so introduce a tag for this purpose.
>
> Why would you want to reuse a page about which you have no idea about
> its age compared to the LRU pages which would be mostly clean as well?
> I mean this needs a deep justification!
>
>>> So is this a form of a page cache limit to trigger the reclaim earlier
>>> than on the global memory pressure?
>
>> my thinking is that page cache limit trigger reuse-mapping-page. the
>> limit is earlier than on the global memory pressure.
>> reuse-mapping-page can suppress increment of page cache size and big page cache size
>> is one reason of low memory and fragment.
>
> But why would you want to limit the amount of the page cache in the
> first place when it should be trivially reclaimable most of the time?
>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: 答复: [PATCH] reusing of mapping page supplies a way for file page allocation under low memory due to pagecache over size and is controlled by sysctl parameters. it is used only for rw page allocation rather than fault or readahead allocation. it is like...
  2016-06-01  9:06             ` zhouxianrong
  (?)
@ 2016-06-01  9:49             ` Michal Hocko
  2016-06-01  9:56               ` zhouxianrong
  -1 siblings, 1 reply; 13+ messages in thread
From: Michal Hocko @ 2016-06-01  9:49 UTC (permalink / raw)
  To: zhouxianrong
  Cc: viro, linux-fsdevel, linux-mm, Zhouxiyu, wanghaijun (E), Yuchao (T)

On Wed 01-06-16 17:06:09, zhouxianrong wrote:
> > Why would you want to reuse a page about which you have no idea about
> > its age compared to the LRU pages which would be mostly clean as well?
> > I mean this needs a deep justification!
> 
> reusing could not reuse page with page_mapcount > 0; it only reuse
> a pure file page without mmap. only file pages producted by rw
> syscall can be reuse; so no AF consideration for it and just use
> active/inactive flag to distinguish.

I am sorry but I do not follow.
-- 
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: 答复: [PATCH] reusing of mapping page supplies a way for file page allocation under low memory due to pagecache over size and is controlled by sysctl parameters. it is used only for rw page allocation rather than fault or readahead allocation. it is like...
  2016-06-01  9:49             ` Michal Hocko
@ 2016-06-01  9:56               ` zhouxianrong
  0 siblings, 0 replies; 13+ messages in thread
From: zhouxianrong @ 2016-06-01  9:56 UTC (permalink / raw)
  To: Michal Hocko
  Cc: viro, linux-fsdevel, linux-mm, Zhouxiyu, wanghaijun (E), Yuchao (T)

ok, the idea is not better, thank you

On 2016/6/1 17:49, Michal Hocko wrote:
> On Wed 01-06-16 17:06:09, zhouxianrong wrote:
>>> Why would you want to reuse a page about which you have no idea about
>>> its age compared to the LRU pages which would be mostly clean as well?
>>> I mean this needs a deep justification!
>>
>> reusing could not reuse page with page_mapcount > 0; it only reuse
>> a pure file page without mmap. only file pages producted by rw
>> syscall can be reuse; so no AF consideration for it and just use
>> active/inactive flag to distinguish.
>
> I am sorry but I do not follow.
>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2016-06-01  9:56 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-31  9:08 [PATCH] reusing of mapping page supplies a way for file page allocation under low memory due to pagecache over size and is controlled by sysctl parameters. it is used only for rw page allocation rather than fault or readahead allocation. it is like relclaim but is lighter than reclaim. it only reuses clean and zero mapcount pages of mapping. for special filesystems using this feature like below: zhouxianrong
2016-05-31  9:08 ` zhouxianrong
2016-05-31  9:36 ` Michal Hocko
2016-05-31 13:35   ` 答复: [PATCH] reusing of mapping page supplies a way for file page allocation under low memory due to pagecache over size and is controlled by sysctl parameters. it is used only for rw page allocation rather than fault or readahead allocation. it is like zhouxianrong
2016-05-31 14:03     ` Michal Hocko
2016-06-01  1:52       ` zhouxianrong
2016-06-01  8:18         ` Michal Hocko
2016-06-01  9:06           ` zhouxianrong
2016-06-01  9:06             ` zhouxianrong
2016-06-01  9:49             ` Michal Hocko
2016-06-01  9:56               ` zhouxianrong
2016-06-01  9:28           ` zhouxianrong
2016-06-01  9:28             ` zhouxianrong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.