All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] erofs: force inplace I/O under low memory scenario
       [not found] <20201208054600.16302-1-hsiangkao.ref@aol.com>
@ 2020-12-08  5:46   ` Gao Xiang via Linux-erofs
  0 siblings, 0 replies; 8+ messages in thread
From: Gao Xiang @ 2020-12-08  5:46 UTC (permalink / raw)
  To: linux-erofs, Chao Yu; +Cc: LKML, Chao Yu, Gao Xiang

From: Gao Xiang <hsiangkao@redhat.com>

Try to forcely switch to inplace I/O under low memory scenario in
order to avoid direct memory reclaim due to cached page allocation.

Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
---
This was commercially used internally for years, but due to customized
page->mapping before, it cannot cleanly upstream till now. Since magical
page->mapping is now gone, adapt this to the latest dev branch for
better low-memory performance (fully use inplace I/O instead.)

 fs/erofs/compress.h |  3 +++
 fs/erofs/zdata.c    | 49 +++++++++++++++++++++++++++++++++++++--------
 2 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
index 2bbf47f353ef..c51a741a1232 100644
--- a/fs/erofs/compress.h
+++ b/fs/erofs/compress.h
@@ -27,11 +27,13 @@ struct z_erofs_decompress_req {
 };
 
 #define Z_EROFS_SHORTLIVED_PAGE		(-1UL << 2)
+#define Z_EROFS_PREALLOCATED_PAGE	(-2UL << 2)
 
 /*
  * For all pages in a pcluster, page->private should be one of
  * Type                         Last 2bits      page->private
  * short-lived page             00              Z_EROFS_SHORTLIVED_PAGE
+ * preallocated page (tryalloc) 00              Z_EROFS_PREALLOCATED_PAGE
  * cached/managed page          00              pointer to z_erofs_pcluster
  * online page (file-backed,    01/10/11        sub-index << 2 | count
  *              some pages can be used for inplace I/O)
@@ -39,6 +41,7 @@ struct z_erofs_decompress_req {
  * page->mapping should be one of
  * Type                 page->mapping
  * short-lived page     NULL
+ * preallocated page    NULL
  * cached/managed page  non-NULL or NULL (invalidated/truncated page)
  * online page          non-NULL
  *
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index b1b6cd03046f..b84e6a2fb00c 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -20,6 +20,11 @@
 enum z_erofs_cache_alloctype {
 	DONTALLOC,	/* don't allocate any cached pages */
 	DELAYEDALLOC,	/* delayed allocation (at the time of submitting io) */
+	/*
+	 * try to use cached I/O if page allocation succeeds or fallback
+	 * to in-place I/O instead to avoid any direct reclaim.
+	 */
+	TRYALLOC,
 };
 
 /*
@@ -154,13 +159,15 @@ static DEFINE_MUTEX(z_pagemap_global_lock);
 
 static void preload_compressed_pages(struct z_erofs_collector *clt,
 				     struct address_space *mc,
-				     enum z_erofs_cache_alloctype type)
+				     enum z_erofs_cache_alloctype type,
+				     struct list_head *pagepool)
 {
 	const struct z_erofs_pcluster *pcl = clt->pcl;
 	const unsigned int clusterpages = BIT(pcl->clusterbits);
 	struct page **pages = clt->compressedpages;
 	pgoff_t index = pcl->obj.index + (pages - pcl->compressed_pages);
 	bool standalone = true;
+	gfp_t gfp = mapping_gfp_constraint(mc, GFP_KERNEL) & ~__GFP_DIRECT_RECLAIM;
 
 	if (clt->mode < COLLECT_PRIMARY_FOLLOWED)
 		return;
@@ -168,6 +175,7 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
 	for (; pages < pcl->compressed_pages + clusterpages; ++pages) {
 		struct page *page;
 		compressed_page_t t;
+		struct page *newpage = NULL;
 
 		/* the compressed page was loaded before */
 		if (READ_ONCE(*pages))
@@ -179,7 +187,17 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
 			t = tag_compressed_page_justfound(page);
 		} else if (type == DELAYEDALLOC) {
 			t = tagptr_init(compressed_page_t, PAGE_UNALLOCATED);
+		} else if (type == TRYALLOC) {
+			gfp |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
+
+			newpage = erofs_allocpage(pagepool, gfp);
+			if (!newpage)
+				goto dontalloc;
+
+			set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE);
+			t = tag_compressed_page_justfound(newpage);
 		} else {	/* DONTALLOC */
+dontalloc:
 			if (standalone)
 				clt->compressedpages = pages;
 			standalone = false;
@@ -189,8 +207,12 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
 		if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t)))
 			continue;
 
-		if (page)
+		if (page) {
 			put_page(page);
+		} else if (newpage) {
+			set_page_private(newpage, 0);
+			list_add(&newpage->lru, pagepool);
+		}
 	}
 
 	if (standalone)		/* downgrade to PRIMARY_FOLLOWED_NOINPLACE */
@@ -560,7 +582,7 @@ static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe,
 }
 
 static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
-				struct page *page)
+				struct page *page, struct list_head *pagepool)
 {
 	struct inode *const inode = fe->inode;
 	struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
@@ -613,11 +635,12 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
 
 	/* preload all compressed pages (maybe downgrade role if necessary) */
 	if (should_alloc_managed_pages(fe, sbi->ctx.cache_strategy, map->m_la))
-		cache_strategy = DELAYEDALLOC;
+		cache_strategy = TRYALLOC;
 	else
 		cache_strategy = DONTALLOC;
 
-	preload_compressed_pages(clt, MNGD_MAPPING(sbi), cache_strategy);
+	preload_compressed_pages(clt, MNGD_MAPPING(sbi),
+				 cache_strategy, pagepool);
 
 hitted:
 	/*
@@ -1011,6 +1034,16 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
 	justfound = tagptr_unfold_tags(t);
 	page = tagptr_unfold_ptr(t);
 
+	/*
+	 * preallocated cached pages, which is used to avoid direct reclaim
+	 * otherwise, it will go inplace I/O path instead.
+	 */
+	if (page->private == Z_EROFS_PREALLOCATED_PAGE) {
+		WRITE_ONCE(pcl->compressed_pages[nr], page);
+		set_page_private(page, 0);
+		tocache = true;
+		goto out_tocache;
+	}
 	mapping = READ_ONCE(page->mapping);
 
 	/*
@@ -1073,7 +1106,7 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
 		cond_resched();
 		goto repeat;
 	}
-
+out_tocache:
 	if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
 		/* turn into temporary page if fails */
 		set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
@@ -1282,7 +1315,7 @@ static int z_erofs_readpage(struct file *file, struct page *page)
 
 	f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT;
 
-	err = z_erofs_do_read_page(&f, page);
+	err = z_erofs_do_read_page(&f, page, &pagepool);
 	(void)z_erofs_collector_end(&f.clt);
 
 	/* if some compressed cluster ready, need submit them anyway */
@@ -1336,7 +1369,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
 		/* traversal in reverse order */
 		head = (void *)page_private(page);
 
-		err = z_erofs_do_read_page(&f, page);
+		err = z_erofs_do_read_page(&f, page, &pagepool);
 		if (err)
 			erofs_err(inode->i_sb,
 				  "readahead error at page %lu @ nid %llu",
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH] erofs: force inplace I/O under low memory scenario
@ 2020-12-08  5:46   ` Gao Xiang via Linux-erofs
  0 siblings, 0 replies; 8+ messages in thread
From: Gao Xiang via Linux-erofs @ 2020-12-08  5:46 UTC (permalink / raw)
  To: linux-erofs, Chao Yu; +Cc: LKML

From: Gao Xiang <hsiangkao@redhat.com>

Try to forcely switch to inplace I/O under low memory scenario in
order to avoid direct memory reclaim due to cached page allocation.

Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
---
This was commercially used internally for years, but due to customized
page->mapping before, it cannot cleanly upstream till now. Since magical
page->mapping is now gone, adapt this to the latest dev branch for
better low-memory performance (fully use inplace I/O instead.)

 fs/erofs/compress.h |  3 +++
 fs/erofs/zdata.c    | 49 +++++++++++++++++++++++++++++++++++++--------
 2 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
index 2bbf47f353ef..c51a741a1232 100644
--- a/fs/erofs/compress.h
+++ b/fs/erofs/compress.h
@@ -27,11 +27,13 @@ struct z_erofs_decompress_req {
 };
 
 #define Z_EROFS_SHORTLIVED_PAGE		(-1UL << 2)
+#define Z_EROFS_PREALLOCATED_PAGE	(-2UL << 2)
 
 /*
  * For all pages in a pcluster, page->private should be one of
  * Type                         Last 2bits      page->private
  * short-lived page             00              Z_EROFS_SHORTLIVED_PAGE
+ * preallocated page (tryalloc) 00              Z_EROFS_PREALLOCATED_PAGE
  * cached/managed page          00              pointer to z_erofs_pcluster
  * online page (file-backed,    01/10/11        sub-index << 2 | count
  *              some pages can be used for inplace I/O)
@@ -39,6 +41,7 @@ struct z_erofs_decompress_req {
  * page->mapping should be one of
  * Type                 page->mapping
  * short-lived page     NULL
+ * preallocated page    NULL
  * cached/managed page  non-NULL or NULL (invalidated/truncated page)
  * online page          non-NULL
  *
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index b1b6cd03046f..b84e6a2fb00c 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -20,6 +20,11 @@
 enum z_erofs_cache_alloctype {
 	DONTALLOC,	/* don't allocate any cached pages */
 	DELAYEDALLOC,	/* delayed allocation (at the time of submitting io) */
+	/*
+	 * try to use cached I/O if page allocation succeeds or fallback
+	 * to in-place I/O instead to avoid any direct reclaim.
+	 */
+	TRYALLOC,
 };
 
 /*
@@ -154,13 +159,15 @@ static DEFINE_MUTEX(z_pagemap_global_lock);
 
 static void preload_compressed_pages(struct z_erofs_collector *clt,
 				     struct address_space *mc,
-				     enum z_erofs_cache_alloctype type)
+				     enum z_erofs_cache_alloctype type,
+				     struct list_head *pagepool)
 {
 	const struct z_erofs_pcluster *pcl = clt->pcl;
 	const unsigned int clusterpages = BIT(pcl->clusterbits);
 	struct page **pages = clt->compressedpages;
 	pgoff_t index = pcl->obj.index + (pages - pcl->compressed_pages);
 	bool standalone = true;
+	gfp_t gfp = mapping_gfp_constraint(mc, GFP_KERNEL) & ~__GFP_DIRECT_RECLAIM;
 
 	if (clt->mode < COLLECT_PRIMARY_FOLLOWED)
 		return;
@@ -168,6 +175,7 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
 	for (; pages < pcl->compressed_pages + clusterpages; ++pages) {
 		struct page *page;
 		compressed_page_t t;
+		struct page *newpage = NULL;
 
 		/* the compressed page was loaded before */
 		if (READ_ONCE(*pages))
@@ -179,7 +187,17 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
 			t = tag_compressed_page_justfound(page);
 		} else if (type == DELAYEDALLOC) {
 			t = tagptr_init(compressed_page_t, PAGE_UNALLOCATED);
+		} else if (type == TRYALLOC) {
+			gfp |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
+
+			newpage = erofs_allocpage(pagepool, gfp);
+			if (!newpage)
+				goto dontalloc;
+
+			set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE);
+			t = tag_compressed_page_justfound(newpage);
 		} else {	/* DONTALLOC */
+dontalloc:
 			if (standalone)
 				clt->compressedpages = pages;
 			standalone = false;
@@ -189,8 +207,12 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
 		if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t)))
 			continue;
 
-		if (page)
+		if (page) {
 			put_page(page);
+		} else if (newpage) {
+			set_page_private(newpage, 0);
+			list_add(&newpage->lru, pagepool);
+		}
 	}
 
 	if (standalone)		/* downgrade to PRIMARY_FOLLOWED_NOINPLACE */
@@ -560,7 +582,7 @@ static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe,
 }
 
 static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
-				struct page *page)
+				struct page *page, struct list_head *pagepool)
 {
 	struct inode *const inode = fe->inode;
 	struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
@@ -613,11 +635,12 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
 
 	/* preload all compressed pages (maybe downgrade role if necessary) */
 	if (should_alloc_managed_pages(fe, sbi->ctx.cache_strategy, map->m_la))
-		cache_strategy = DELAYEDALLOC;
+		cache_strategy = TRYALLOC;
 	else
 		cache_strategy = DONTALLOC;
 
-	preload_compressed_pages(clt, MNGD_MAPPING(sbi), cache_strategy);
+	preload_compressed_pages(clt, MNGD_MAPPING(sbi),
+				 cache_strategy, pagepool);
 
 hitted:
 	/*
@@ -1011,6 +1034,16 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
 	justfound = tagptr_unfold_tags(t);
 	page = tagptr_unfold_ptr(t);
 
+	/*
+	 * preallocated cached pages, which is used to avoid direct reclaim
+	 * otherwise, it will go inplace I/O path instead.
+	 */
+	if (page->private == Z_EROFS_PREALLOCATED_PAGE) {
+		WRITE_ONCE(pcl->compressed_pages[nr], page);
+		set_page_private(page, 0);
+		tocache = true;
+		goto out_tocache;
+	}
 	mapping = READ_ONCE(page->mapping);
 
 	/*
@@ -1073,7 +1106,7 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
 		cond_resched();
 		goto repeat;
 	}
-
+out_tocache:
 	if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
 		/* turn into temporary page if fails */
 		set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
@@ -1282,7 +1315,7 @@ static int z_erofs_readpage(struct file *file, struct page *page)
 
 	f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT;
 
-	err = z_erofs_do_read_page(&f, page);
+	err = z_erofs_do_read_page(&f, page, &pagepool);
 	(void)z_erofs_collector_end(&f.clt);
 
 	/* if some compressed cluster ready, need submit them anyway */
@@ -1336,7 +1369,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
 		/* traversal in reverse order */
 		head = (void *)page_private(page);
 
-		err = z_erofs_do_read_page(&f, page);
+		err = z_erofs_do_read_page(&f, page, &pagepool);
 		if (err)
 			erofs_err(inode->i_sb,
 				  "readahead error at page %lu @ nid %llu",
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] erofs: force inplace I/O under low memory scenario
  2020-12-08  5:46   ` Gao Xiang via Linux-erofs
@ 2020-12-09 10:07     ` Chao Yu
  -1 siblings, 0 replies; 8+ messages in thread
From: Chao Yu @ 2020-12-09 10:07 UTC (permalink / raw)
  To: Gao Xiang, linux-erofs; +Cc: LKML, Chao Yu, Gao Xiang

On 2020/12/8 13:46, Gao Xiang wrote:
> From: Gao Xiang <hsiangkao@redhat.com>
> 
> Try to forcely switch to inplace I/O under low memory scenario in
> order to avoid direct memory reclaim due to cached page allocation.
> 
> Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
> ---
> This was commercially used internally for years, but due to customized
> page->mapping before, it cannot cleanly upstream till now. Since magical
> page->mapping is now gone, adapt this to the latest dev branch for
> better low-memory performance (fully use inplace I/O instead.)
> 
>   fs/erofs/compress.h |  3 +++
>   fs/erofs/zdata.c    | 49 +++++++++++++++++++++++++++++++++++++--------
>   2 files changed, 44 insertions(+), 8 deletions(-)
> 
> diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
> index 2bbf47f353ef..c51a741a1232 100644
> --- a/fs/erofs/compress.h
> +++ b/fs/erofs/compress.h
> @@ -27,11 +27,13 @@ struct z_erofs_decompress_req {
>   };
>   
>   #define Z_EROFS_SHORTLIVED_PAGE		(-1UL << 2)
> +#define Z_EROFS_PREALLOCATED_PAGE	(-2UL << 2)
>   
>   /*
>    * For all pages in a pcluster, page->private should be one of
>    * Type                         Last 2bits      page->private
>    * short-lived page             00              Z_EROFS_SHORTLIVED_PAGE
> + * preallocated page (tryalloc) 00              Z_EROFS_PREALLOCATED_PAGE
>    * cached/managed page          00              pointer to z_erofs_pcluster
>    * online page (file-backed,    01/10/11        sub-index << 2 | count
>    *              some pages can be used for inplace I/O)
> @@ -39,6 +41,7 @@ struct z_erofs_decompress_req {
>    * page->mapping should be one of
>    * Type                 page->mapping
>    * short-lived page     NULL
> + * preallocated page    NULL
>    * cached/managed page  non-NULL or NULL (invalidated/truncated page)
>    * online page          non-NULL
>    *
> diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
> index b1b6cd03046f..b84e6a2fb00c 100644
> --- a/fs/erofs/zdata.c
> +++ b/fs/erofs/zdata.c
> @@ -20,6 +20,11 @@
>   enum z_erofs_cache_alloctype {
>   	DONTALLOC,	/* don't allocate any cached pages */
>   	DELAYEDALLOC,	/* delayed allocation (at the time of submitting io) */
> +	/*
> +	 * try to use cached I/O if page allocation succeeds or fallback
> +	 * to in-place I/O instead to avoid any direct reclaim.
> +	 */
> +	TRYALLOC,
>   };
>   
>   /*
> @@ -154,13 +159,15 @@ static DEFINE_MUTEX(z_pagemap_global_lock);
>   
>   static void preload_compressed_pages(struct z_erofs_collector *clt,
>   				     struct address_space *mc,
> -				     enum z_erofs_cache_alloctype type)
> +				     enum z_erofs_cache_alloctype type,
> +				     struct list_head *pagepool)
>   {
>   	const struct z_erofs_pcluster *pcl = clt->pcl;
>   	const unsigned int clusterpages = BIT(pcl->clusterbits);
>   	struct page **pages = clt->compressedpages;
>   	pgoff_t index = pcl->obj.index + (pages - pcl->compressed_pages);
>   	bool standalone = true;
> +	gfp_t gfp = mapping_gfp_constraint(mc, GFP_KERNEL) & ~__GFP_DIRECT_RECLAIM;

Could be local as there is only one place uses it.

Reviewed-by: Chao Yu <yuchao0@huawei.com>

Thanks,

>   
>   	if (clt->mode < COLLECT_PRIMARY_FOLLOWED)
>   		return;
> @@ -168,6 +175,7 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
>   	for (; pages < pcl->compressed_pages + clusterpages; ++pages) {
>   		struct page *page;
>   		compressed_page_t t;
> +		struct page *newpage = NULL;
>   
>   		/* the compressed page was loaded before */
>   		if (READ_ONCE(*pages))
> @@ -179,7 +187,17 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
>   			t = tag_compressed_page_justfound(page);
>   		} else if (type == DELAYEDALLOC) {
>   			t = tagptr_init(compressed_page_t, PAGE_UNALLOCATED);
> +		} else if (type == TRYALLOC) {
> +			gfp |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
> +
> +			newpage = erofs_allocpage(pagepool, gfp);
> +			if (!newpage)
> +				goto dontalloc;
> +
> +			set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE);
> +			t = tag_compressed_page_justfound(newpage);
>   		} else {	/* DONTALLOC */
> +dontalloc:
>   			if (standalone)
>   				clt->compressedpages = pages;
>   			standalone = false;
> @@ -189,8 +207,12 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
>   		if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t)))
>   			continue;
>   
> -		if (page)
> +		if (page) {
>   			put_page(page);
> +		} else if (newpage) {
> +			set_page_private(newpage, 0);
> +			list_add(&newpage->lru, pagepool);
> +		}
>   	}
>   
>   	if (standalone)		/* downgrade to PRIMARY_FOLLOWED_NOINPLACE */
> @@ -560,7 +582,7 @@ static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe,
>   }
>   
>   static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
> -				struct page *page)
> +				struct page *page, struct list_head *pagepool)
>   {
>   	struct inode *const inode = fe->inode;
>   	struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
> @@ -613,11 +635,12 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
>   
>   	/* preload all compressed pages (maybe downgrade role if necessary) */
>   	if (should_alloc_managed_pages(fe, sbi->ctx.cache_strategy, map->m_la))
> -		cache_strategy = DELAYEDALLOC;
> +		cache_strategy = TRYALLOC;
>   	else
>   		cache_strategy = DONTALLOC;
>   
> -	preload_compressed_pages(clt, MNGD_MAPPING(sbi), cache_strategy);
> +	preload_compressed_pages(clt, MNGD_MAPPING(sbi),
> +				 cache_strategy, pagepool);
>   
>   hitted:
>   	/*
> @@ -1011,6 +1034,16 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
>   	justfound = tagptr_unfold_tags(t);
>   	page = tagptr_unfold_ptr(t);
>   
> +	/*
> +	 * preallocated cached pages, which is used to avoid direct reclaim
> +	 * otherwise, it will go inplace I/O path instead.
> +	 */
> +	if (page->private == Z_EROFS_PREALLOCATED_PAGE) {
> +		WRITE_ONCE(pcl->compressed_pages[nr], page);
> +		set_page_private(page, 0);
> +		tocache = true;
> +		goto out_tocache;
> +	}
>   	mapping = READ_ONCE(page->mapping);
>   
>   	/*
> @@ -1073,7 +1106,7 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
>   		cond_resched();
>   		goto repeat;
>   	}
> -
> +out_tocache:
>   	if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
>   		/* turn into temporary page if fails */
>   		set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
> @@ -1282,7 +1315,7 @@ static int z_erofs_readpage(struct file *file, struct page *page)
>   
>   	f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT;
>   
> -	err = z_erofs_do_read_page(&f, page);
> +	err = z_erofs_do_read_page(&f, page, &pagepool);
>   	(void)z_erofs_collector_end(&f.clt);
>   
>   	/* if some compressed cluster ready, need submit them anyway */
> @@ -1336,7 +1369,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
>   		/* traversal in reverse order */
>   		head = (void *)page_private(page);
>   
> -		err = z_erofs_do_read_page(&f, page);
> +		err = z_erofs_do_read_page(&f, page, &pagepool);
>   		if (err)
>   			erofs_err(inode->i_sb,
>   				  "readahead error at page %lu @ nid %llu",
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] erofs: force inplace I/O under low memory scenario
@ 2020-12-09 10:07     ` Chao Yu
  0 siblings, 0 replies; 8+ messages in thread
From: Chao Yu @ 2020-12-09 10:07 UTC (permalink / raw)
  To: Gao Xiang, linux-erofs; +Cc: LKML

On 2020/12/8 13:46, Gao Xiang wrote:
> From: Gao Xiang <hsiangkao@redhat.com>
> 
> Try to forcely switch to inplace I/O under low memory scenario in
> order to avoid direct memory reclaim due to cached page allocation.
> 
> Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
> ---
> This was commercially used internally for years, but due to customized
> page->mapping before, it cannot cleanly upstream till now. Since magical
> page->mapping is now gone, adapt this to the latest dev branch for
> better low-memory performance (fully use inplace I/O instead.)
> 
>   fs/erofs/compress.h |  3 +++
>   fs/erofs/zdata.c    | 49 +++++++++++++++++++++++++++++++++++++--------
>   2 files changed, 44 insertions(+), 8 deletions(-)
> 
> diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
> index 2bbf47f353ef..c51a741a1232 100644
> --- a/fs/erofs/compress.h
> +++ b/fs/erofs/compress.h
> @@ -27,11 +27,13 @@ struct z_erofs_decompress_req {
>   };
>   
>   #define Z_EROFS_SHORTLIVED_PAGE		(-1UL << 2)
> +#define Z_EROFS_PREALLOCATED_PAGE	(-2UL << 2)
>   
>   /*
>    * For all pages in a pcluster, page->private should be one of
>    * Type                         Last 2bits      page->private
>    * short-lived page             00              Z_EROFS_SHORTLIVED_PAGE
> + * preallocated page (tryalloc) 00              Z_EROFS_PREALLOCATED_PAGE
>    * cached/managed page          00              pointer to z_erofs_pcluster
>    * online page (file-backed,    01/10/11        sub-index << 2 | count
>    *              some pages can be used for inplace I/O)
> @@ -39,6 +41,7 @@ struct z_erofs_decompress_req {
>    * page->mapping should be one of
>    * Type                 page->mapping
>    * short-lived page     NULL
> + * preallocated page    NULL
>    * cached/managed page  non-NULL or NULL (invalidated/truncated page)
>    * online page          non-NULL
>    *
> diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
> index b1b6cd03046f..b84e6a2fb00c 100644
> --- a/fs/erofs/zdata.c
> +++ b/fs/erofs/zdata.c
> @@ -20,6 +20,11 @@
>   enum z_erofs_cache_alloctype {
>   	DONTALLOC,	/* don't allocate any cached pages */
>   	DELAYEDALLOC,	/* delayed allocation (at the time of submitting io) */
> +	/*
> +	 * try to use cached I/O if page allocation succeeds or fallback
> +	 * to in-place I/O instead to avoid any direct reclaim.
> +	 */
> +	TRYALLOC,
>   };
>   
>   /*
> @@ -154,13 +159,15 @@ static DEFINE_MUTEX(z_pagemap_global_lock);
>   
>   static void preload_compressed_pages(struct z_erofs_collector *clt,
>   				     struct address_space *mc,
> -				     enum z_erofs_cache_alloctype type)
> +				     enum z_erofs_cache_alloctype type,
> +				     struct list_head *pagepool)
>   {
>   	const struct z_erofs_pcluster *pcl = clt->pcl;
>   	const unsigned int clusterpages = BIT(pcl->clusterbits);
>   	struct page **pages = clt->compressedpages;
>   	pgoff_t index = pcl->obj.index + (pages - pcl->compressed_pages);
>   	bool standalone = true;
> +	gfp_t gfp = mapping_gfp_constraint(mc, GFP_KERNEL) & ~__GFP_DIRECT_RECLAIM;

Could be local as there is only one place uses it.

Reviewed-by: Chao Yu <yuchao0@huawei.com>

Thanks,

>   
>   	if (clt->mode < COLLECT_PRIMARY_FOLLOWED)
>   		return;
> @@ -168,6 +175,7 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
>   	for (; pages < pcl->compressed_pages + clusterpages; ++pages) {
>   		struct page *page;
>   		compressed_page_t t;
> +		struct page *newpage = NULL;
>   
>   		/* the compressed page was loaded before */
>   		if (READ_ONCE(*pages))
> @@ -179,7 +187,17 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
>   			t = tag_compressed_page_justfound(page);
>   		} else if (type == DELAYEDALLOC) {
>   			t = tagptr_init(compressed_page_t, PAGE_UNALLOCATED);
> +		} else if (type == TRYALLOC) {
> +			gfp |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
> +
> +			newpage = erofs_allocpage(pagepool, gfp);
> +			if (!newpage)
> +				goto dontalloc;
> +
> +			set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE);
> +			t = tag_compressed_page_justfound(newpage);
>   		} else {	/* DONTALLOC */
> +dontalloc:
>   			if (standalone)
>   				clt->compressedpages = pages;
>   			standalone = false;
> @@ -189,8 +207,12 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
>   		if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t)))
>   			continue;
>   
> -		if (page)
> +		if (page) {
>   			put_page(page);
> +		} else if (newpage) {
> +			set_page_private(newpage, 0);
> +			list_add(&newpage->lru, pagepool);
> +		}
>   	}
>   
>   	if (standalone)		/* downgrade to PRIMARY_FOLLOWED_NOINPLACE */
> @@ -560,7 +582,7 @@ static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe,
>   }
>   
>   static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
> -				struct page *page)
> +				struct page *page, struct list_head *pagepool)
>   {
>   	struct inode *const inode = fe->inode;
>   	struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
> @@ -613,11 +635,12 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
>   
>   	/* preload all compressed pages (maybe downgrade role if necessary) */
>   	if (should_alloc_managed_pages(fe, sbi->ctx.cache_strategy, map->m_la))
> -		cache_strategy = DELAYEDALLOC;
> +		cache_strategy = TRYALLOC;
>   	else
>   		cache_strategy = DONTALLOC;
>   
> -	preload_compressed_pages(clt, MNGD_MAPPING(sbi), cache_strategy);
> +	preload_compressed_pages(clt, MNGD_MAPPING(sbi),
> +				 cache_strategy, pagepool);
>   
>   hitted:
>   	/*
> @@ -1011,6 +1034,16 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
>   	justfound = tagptr_unfold_tags(t);
>   	page = tagptr_unfold_ptr(t);
>   
> +	/*
> +	 * preallocated cached pages, which is used to avoid direct reclaim
> +	 * otherwise, it will go inplace I/O path instead.
> +	 */
> +	if (page->private == Z_EROFS_PREALLOCATED_PAGE) {
> +		WRITE_ONCE(pcl->compressed_pages[nr], page);
> +		set_page_private(page, 0);
> +		tocache = true;
> +		goto out_tocache;
> +	}
>   	mapping = READ_ONCE(page->mapping);
>   
>   	/*
> @@ -1073,7 +1106,7 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
>   		cond_resched();
>   		goto repeat;
>   	}
> -
> +out_tocache:
>   	if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
>   		/* turn into temporary page if fails */
>   		set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
> @@ -1282,7 +1315,7 @@ static int z_erofs_readpage(struct file *file, struct page *page)
>   
>   	f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT;
>   
> -	err = z_erofs_do_read_page(&f, page);
> +	err = z_erofs_do_read_page(&f, page, &pagepool);
>   	(void)z_erofs_collector_end(&f.clt);
>   
>   	/* if some compressed cluster ready, need submit them anyway */
> @@ -1336,7 +1369,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
>   		/* traversal in reverse order */
>   		head = (void *)page_private(page);
>   
> -		err = z_erofs_do_read_page(&f, page);
> +		err = z_erofs_do_read_page(&f, page, &pagepool);
>   		if (err)
>   			erofs_err(inode->i_sb,
>   				  "readahead error at page %lu @ nid %llu",
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] erofs: force inplace I/O under low memory scenario
  2020-12-09 10:07     ` Chao Yu
@ 2020-12-09 11:36       ` Gao Xiang
  -1 siblings, 0 replies; 8+ messages in thread
From: Gao Xiang @ 2020-12-09 11:36 UTC (permalink / raw)
  To: Chao Yu; +Cc: Gao Xiang, linux-erofs, LKML, Chao Yu

Hi Chao,

On Wed, Dec 09, 2020 at 06:07:08PM +0800, Chao Yu wrote:
> On 2020/12/8 13:46, Gao Xiang wrote:

...

> >   	bool standalone = true;
> > +	gfp_t gfp = mapping_gfp_constraint(mc, GFP_KERNEL) & ~__GFP_DIRECT_RECLAIM;
> 
> Could be local as there is only one place uses it.

This line is somewhat too long, I have no idea how to deal
with it inlined properly... I think I might leave it as-is
or find a better way to fold in it without generating too
long lines....

Thanks,
Gao Xiang

> 
> Reviewed-by: Chao Yu <yuchao0@huawei.com>
> 
> Thanks,
> 
> >   	if (clt->mode < COLLECT_PRIMARY_FOLLOWED)
> >   		return;
> > @@ -168,6 +175,7 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
> >   	for (; pages < pcl->compressed_pages + clusterpages; ++pages) {
> >   		struct page *page;
> >   		compressed_page_t t;
> > +		struct page *newpage = NULL;
> >   		/* the compressed page was loaded before */
> >   		if (READ_ONCE(*pages))
> > @@ -179,7 +187,17 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
> >   			t = tag_compressed_page_justfound(page);
> >   		} else if (type == DELAYEDALLOC) {
> >   			t = tagptr_init(compressed_page_t, PAGE_UNALLOCATED);
> > +		} else if (type == TRYALLOC) {
> > +			gfp |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
> > +
> > +			newpage = erofs_allocpage(pagepool, gfp);
> > +			if (!newpage)
> > +				goto dontalloc;
> > +
> > +			set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE);
> > +			t = tag_compressed_page_justfound(newpage);
> >   		} else {	/* DONTALLOC */
> > +dontalloc:
> >   			if (standalone)
> >   				clt->compressedpages = pages;
> >   			standalone = false;
> > @@ -189,8 +207,12 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
> >   		if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t)))
> >   			continue;
> > -		if (page)
> > +		if (page) {
> >   			put_page(page);
> > +		} else if (newpage) {
> > +			set_page_private(newpage, 0);
> > +			list_add(&newpage->lru, pagepool);
> > +		}
> >   	}
> >   	if (standalone)		/* downgrade to PRIMARY_FOLLOWED_NOINPLACE */
> > @@ -560,7 +582,7 @@ static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe,
> >   }
> >   static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
> > -				struct page *page)
> > +				struct page *page, struct list_head *pagepool)
> >   {
> >   	struct inode *const inode = fe->inode;
> >   	struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
> > @@ -613,11 +635,12 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
> >   	/* preload all compressed pages (maybe downgrade role if necessary) */
> >   	if (should_alloc_managed_pages(fe, sbi->ctx.cache_strategy, map->m_la))
> > -		cache_strategy = DELAYEDALLOC;
> > +		cache_strategy = TRYALLOC;
> >   	else
> >   		cache_strategy = DONTALLOC;
> > -	preload_compressed_pages(clt, MNGD_MAPPING(sbi), cache_strategy);
> > +	preload_compressed_pages(clt, MNGD_MAPPING(sbi),
> > +				 cache_strategy, pagepool);
> >   hitted:
> >   	/*
> > @@ -1011,6 +1034,16 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
> >   	justfound = tagptr_unfold_tags(t);
> >   	page = tagptr_unfold_ptr(t);
> > +	/*
> > +	 * preallocated cached pages, which is used to avoid direct reclaim
> > +	 * otherwise, it will go inplace I/O path instead.
> > +	 */
> > +	if (page->private == Z_EROFS_PREALLOCATED_PAGE) {
> > +		WRITE_ONCE(pcl->compressed_pages[nr], page);
> > +		set_page_private(page, 0);
> > +		tocache = true;
> > +		goto out_tocache;
> > +	}
> >   	mapping = READ_ONCE(page->mapping);
> >   	/*
> > @@ -1073,7 +1106,7 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
> >   		cond_resched();
> >   		goto repeat;
> >   	}
> > -
> > +out_tocache:
> >   	if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
> >   		/* turn into temporary page if fails */
> >   		set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
> > @@ -1282,7 +1315,7 @@ static int z_erofs_readpage(struct file *file, struct page *page)
> >   	f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT;
> > -	err = z_erofs_do_read_page(&f, page);
> > +	err = z_erofs_do_read_page(&f, page, &pagepool);
> >   	(void)z_erofs_collector_end(&f.clt);
> >   	/* if some compressed cluster ready, need submit them anyway */
> > @@ -1336,7 +1369,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
> >   		/* traversal in reverse order */
> >   		head = (void *)page_private(page);
> > -		err = z_erofs_do_read_page(&f, page);
> > +		err = z_erofs_do_read_page(&f, page, &pagepool);
> >   		if (err)
> >   			erofs_err(inode->i_sb,
> >   				  "readahead error at page %lu @ nid %llu",
> > 
> 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] erofs: force inplace I/O under low memory scenario
@ 2020-12-09 11:36       ` Gao Xiang
  0 siblings, 0 replies; 8+ messages in thread
From: Gao Xiang @ 2020-12-09 11:36 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-erofs, LKML

Hi Chao,

On Wed, Dec 09, 2020 at 06:07:08PM +0800, Chao Yu wrote:
> On 2020/12/8 13:46, Gao Xiang wrote:

...

> >   	bool standalone = true;
> > +	gfp_t gfp = mapping_gfp_constraint(mc, GFP_KERNEL) & ~__GFP_DIRECT_RECLAIM;
> 
> Could be local as there is only one place uses it.

This line is somewhat too long, I have no idea how to deal
with it inlined properly... I think I might leave it as-is
or find a better way to fold in it without generating too
long lines....

Thanks,
Gao Xiang

> 
> Reviewed-by: Chao Yu <yuchao0@huawei.com>
> 
> Thanks,
> 
> >   	if (clt->mode < COLLECT_PRIMARY_FOLLOWED)
> >   		return;
> > @@ -168,6 +175,7 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
> >   	for (; pages < pcl->compressed_pages + clusterpages; ++pages) {
> >   		struct page *page;
> >   		compressed_page_t t;
> > +		struct page *newpage = NULL;
> >   		/* the compressed page was loaded before */
> >   		if (READ_ONCE(*pages))
> > @@ -179,7 +187,17 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
> >   			t = tag_compressed_page_justfound(page);
> >   		} else if (type == DELAYEDALLOC) {
> >   			t = tagptr_init(compressed_page_t, PAGE_UNALLOCATED);
> > +		} else if (type == TRYALLOC) {
> > +			gfp |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
> > +
> > +			newpage = erofs_allocpage(pagepool, gfp);
> > +			if (!newpage)
> > +				goto dontalloc;
> > +
> > +			set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE);
> > +			t = tag_compressed_page_justfound(newpage);
> >   		} else {	/* DONTALLOC */
> > +dontalloc:
> >   			if (standalone)
> >   				clt->compressedpages = pages;
> >   			standalone = false;
> > @@ -189,8 +207,12 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
> >   		if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t)))
> >   			continue;
> > -		if (page)
> > +		if (page) {
> >   			put_page(page);
> > +		} else if (newpage) {
> > +			set_page_private(newpage, 0);
> > +			list_add(&newpage->lru, pagepool);
> > +		}
> >   	}
> >   	if (standalone)		/* downgrade to PRIMARY_FOLLOWED_NOINPLACE */
> > @@ -560,7 +582,7 @@ static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe,
> >   }
> >   static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
> > -				struct page *page)
> > +				struct page *page, struct list_head *pagepool)
> >   {
> >   	struct inode *const inode = fe->inode;
> >   	struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
> > @@ -613,11 +635,12 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
> >   	/* preload all compressed pages (maybe downgrade role if necessary) */
> >   	if (should_alloc_managed_pages(fe, sbi->ctx.cache_strategy, map->m_la))
> > -		cache_strategy = DELAYEDALLOC;
> > +		cache_strategy = TRYALLOC;
> >   	else
> >   		cache_strategy = DONTALLOC;
> > -	preload_compressed_pages(clt, MNGD_MAPPING(sbi), cache_strategy);
> > +	preload_compressed_pages(clt, MNGD_MAPPING(sbi),
> > +				 cache_strategy, pagepool);
> >   hitted:
> >   	/*
> > @@ -1011,6 +1034,16 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
> >   	justfound = tagptr_unfold_tags(t);
> >   	page = tagptr_unfold_ptr(t);
> > +	/*
> > +	 * preallocated cached pages, which is used to avoid direct reclaim
> > +	 * otherwise, it will go inplace I/O path instead.
> > +	 */
> > +	if (page->private == Z_EROFS_PREALLOCATED_PAGE) {
> > +		WRITE_ONCE(pcl->compressed_pages[nr], page);
> > +		set_page_private(page, 0);
> > +		tocache = true;
> > +		goto out_tocache;
> > +	}
> >   	mapping = READ_ONCE(page->mapping);
> >   	/*
> > @@ -1073,7 +1106,7 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
> >   		cond_resched();
> >   		goto repeat;
> >   	}
> > -
> > +out_tocache:
> >   	if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
> >   		/* turn into temporary page if fails */
> >   		set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
> > @@ -1282,7 +1315,7 @@ static int z_erofs_readpage(struct file *file, struct page *page)
> >   	f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT;
> > -	err = z_erofs_do_read_page(&f, page);
> > +	err = z_erofs_do_read_page(&f, page, &pagepool);
> >   	(void)z_erofs_collector_end(&f.clt);
> >   	/* if some compressed cluster ready, need submit them anyway */
> > @@ -1336,7 +1369,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
> >   		/* traversal in reverse order */
> >   		head = (void *)page_private(page);
> > -		err = z_erofs_do_read_page(&f, page);
> > +		err = z_erofs_do_read_page(&f, page, &pagepool);
> >   		if (err)
> >   			erofs_err(inode->i_sb,
> >   				  "readahead error at page %lu @ nid %llu",
> > 
> 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2] erofs: force inplace I/O under low memory scenario
  2020-12-08  5:46   ` Gao Xiang via Linux-erofs
@ 2020-12-09 12:37     ` Gao Xiang via Linux-erofs
  -1 siblings, 0 replies; 8+ messages in thread
From: Gao Xiang @ 2020-12-09 12:37 UTC (permalink / raw)
  To: linux-erofs, Chao Yu; +Cc: Chao Yu, LKML, Gao Xiang

From: Gao Xiang <hsiangkao@redhat.com>

Try to forcely switch to inplace I/O under low memory scenario in
order to avoid direct memory reclaim due to cached page allocation.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
---
v2:
 refine the gfp definition.

 fs/erofs/compress.h |  3 +++
 fs/erofs/zdata.c    | 48 +++++++++++++++++++++++++++++++++++++--------
 2 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
index 4dadde18cdf1..aea129ddda74 100644
--- a/fs/erofs/compress.h
+++ b/fs/erofs/compress.h
@@ -28,11 +28,13 @@ struct z_erofs_decompress_req {
 
 /* some special page->private (unsigned long, see below) */
 #define Z_EROFS_SHORTLIVED_PAGE		(-1UL << 2)
+#define Z_EROFS_PREALLOCATED_PAGE	(-2UL << 2)
 
 /*
  * For all pages in a pcluster, page->private should be one of
  * Type                         Last 2bits      page->private
  * short-lived page             00              Z_EROFS_SHORTLIVED_PAGE
+ * preallocated page (tryalloc) 00              Z_EROFS_PREALLOCATED_PAGE
  * cached/managed page          00              pointer to z_erofs_pcluster
  * online page (file-backed,    01/10/11        sub-index << 2 | count
  *              some pages can be used for inplace I/O)
@@ -40,6 +42,7 @@ struct z_erofs_decompress_req {
  * page->mapping should be one of
  * Type                 page->mapping
  * short-lived page     NULL
+ * preallocated page    NULL
  * cached/managed page  non-NULL or NULL (invalidated/truncated page)
  * online page          non-NULL
  *
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 777790038bc9..6cb356c4217b 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -20,6 +20,11 @@
 enum z_erofs_cache_alloctype {
 	DONTALLOC,	/* don't allocate any cached pages */
 	DELAYEDALLOC,	/* delayed allocation (at the time of submitting io) */
+	/*
+	 * try to use cached I/O if page allocation succeeds or fallback
+	 * to in-place I/O instead to avoid any direct reclaim.
+	 */
+	TRYALLOC,
 };
 
 /*
@@ -154,13 +159,16 @@ static DEFINE_MUTEX(z_pagemap_global_lock);
 
 static void preload_compressed_pages(struct z_erofs_collector *clt,
 				     struct address_space *mc,
-				     enum z_erofs_cache_alloctype type)
+				     enum z_erofs_cache_alloctype type,
+				     struct list_head *pagepool)
 {
 	const struct z_erofs_pcluster *pcl = clt->pcl;
 	const unsigned int clusterpages = BIT(pcl->clusterbits);
 	struct page **pages = clt->compressedpages;
 	pgoff_t index = pcl->obj.index + (pages - pcl->compressed_pages);
 	bool standalone = true;
+	gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) |
+			__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
 
 	if (clt->mode < COLLECT_PRIMARY_FOLLOWED)
 		return;
@@ -168,6 +176,7 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
 	for (; pages < pcl->compressed_pages + clusterpages; ++pages) {
 		struct page *page;
 		compressed_page_t t;
+		struct page *newpage = NULL;
 
 		/* the compressed page was loaded before */
 		if (READ_ONCE(*pages))
@@ -179,7 +188,15 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
 			t = tag_compressed_page_justfound(page);
 		} else if (type == DELAYEDALLOC) {
 			t = tagptr_init(compressed_page_t, PAGE_UNALLOCATED);
+		} else if (type == TRYALLOC) {
+			newpage = erofs_allocpage(pagepool, gfp);
+			if (!newpage)
+				goto dontalloc;
+
+			set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE);
+			t = tag_compressed_page_justfound(newpage);
 		} else {	/* DONTALLOC */
+dontalloc:
 			if (standalone)
 				clt->compressedpages = pages;
 			standalone = false;
@@ -189,8 +206,12 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
 		if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t)))
 			continue;
 
-		if (page)
+		if (page) {
 			put_page(page);
+		} else if (newpage) {
+			set_page_private(newpage, 0);
+			list_add(&newpage->lru, pagepool);
+		}
 	}
 
 	if (standalone)		/* downgrade to PRIMARY_FOLLOWED_NOINPLACE */
@@ -554,7 +575,7 @@ static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe,
 }
 
 static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
-				struct page *page)
+				struct page *page, struct list_head *pagepool)
 {
 	struct inode *const inode = fe->inode;
 	struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
@@ -607,11 +628,12 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
 
 	/* preload all compressed pages (maybe downgrade role if necessary) */
 	if (should_alloc_managed_pages(fe, sbi->ctx.cache_strategy, map->m_la))
-		cache_strategy = DELAYEDALLOC;
+		cache_strategy = TRYALLOC;
 	else
 		cache_strategy = DONTALLOC;
 
-	preload_compressed_pages(clt, MNGD_MAPPING(sbi), cache_strategy);
+	preload_compressed_pages(clt, MNGD_MAPPING(sbi),
+				 cache_strategy, pagepool);
 
 hitted:
 	/*
@@ -1005,6 +1027,16 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
 	justfound = tagptr_unfold_tags(t);
 	page = tagptr_unfold_ptr(t);
 
+	/*
+	 * preallocated cached pages, which is used to avoid direct reclaim
+	 * otherwise, it will go inplace I/O path instead.
+	 */
+	if (page->private == Z_EROFS_PREALLOCATED_PAGE) {
+		WRITE_ONCE(pcl->compressed_pages[nr], page);
+		set_page_private(page, 0);
+		tocache = true;
+		goto out_tocache;
+	}
 	mapping = READ_ONCE(page->mapping);
 
 	/*
@@ -1067,7 +1099,7 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
 		cond_resched();
 		goto repeat;
 	}
-
+out_tocache:
 	if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
 		/* turn into temporary page if fails (1 ref) */
 		set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
@@ -1278,7 +1310,7 @@ static int z_erofs_readpage(struct file *file, struct page *page)
 
 	f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT;
 
-	err = z_erofs_do_read_page(&f, page);
+	err = z_erofs_do_read_page(&f, page, &pagepool);
 	(void)z_erofs_collector_end(&f.clt);
 
 	/* if some compressed cluster ready, need submit them anyway */
@@ -1332,7 +1364,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
 		/* traversal in reverse order */
 		head = (void *)page_private(page);
 
-		err = z_erofs_do_read_page(&f, page);
+		err = z_erofs_do_read_page(&f, page, &pagepool);
 		if (err)
 			erofs_err(inode->i_sb,
 				  "readahead error at page %lu @ nid %llu",
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2] erofs: force inplace I/O under low memory scenario
@ 2020-12-09 12:37     ` Gao Xiang via Linux-erofs
  0 siblings, 0 replies; 8+ messages in thread
From: Gao Xiang via Linux-erofs @ 2020-12-09 12:37 UTC (permalink / raw)
  To: linux-erofs, Chao Yu; +Cc: LKML

From: Gao Xiang <hsiangkao@redhat.com>

Try to forcely switch to inplace I/O under low memory scenario in
order to avoid direct memory reclaim due to cached page allocation.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
---
v2:
 refine the gfp definition.

 fs/erofs/compress.h |  3 +++
 fs/erofs/zdata.c    | 48 +++++++++++++++++++++++++++++++++++++--------
 2 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
index 4dadde18cdf1..aea129ddda74 100644
--- a/fs/erofs/compress.h
+++ b/fs/erofs/compress.h
@@ -28,11 +28,13 @@ struct z_erofs_decompress_req {
 
 /* some special page->private (unsigned long, see below) */
 #define Z_EROFS_SHORTLIVED_PAGE		(-1UL << 2)
+#define Z_EROFS_PREALLOCATED_PAGE	(-2UL << 2)
 
 /*
  * For all pages in a pcluster, page->private should be one of
  * Type                         Last 2bits      page->private
  * short-lived page             00              Z_EROFS_SHORTLIVED_PAGE
+ * preallocated page (tryalloc) 00              Z_EROFS_PREALLOCATED_PAGE
  * cached/managed page          00              pointer to z_erofs_pcluster
  * online page (file-backed,    01/10/11        sub-index << 2 | count
  *              some pages can be used for inplace I/O)
@@ -40,6 +42,7 @@ struct z_erofs_decompress_req {
  * page->mapping should be one of
  * Type                 page->mapping
  * short-lived page     NULL
+ * preallocated page    NULL
  * cached/managed page  non-NULL or NULL (invalidated/truncated page)
  * online page          non-NULL
  *
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 777790038bc9..6cb356c4217b 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -20,6 +20,11 @@
 enum z_erofs_cache_alloctype {
 	DONTALLOC,	/* don't allocate any cached pages */
 	DELAYEDALLOC,	/* delayed allocation (at the time of submitting io) */
+	/*
+	 * try to use cached I/O if page allocation succeeds or fallback
+	 * to in-place I/O instead to avoid any direct reclaim.
+	 */
+	TRYALLOC,
 };
 
 /*
@@ -154,13 +159,16 @@ static DEFINE_MUTEX(z_pagemap_global_lock);
 
 static void preload_compressed_pages(struct z_erofs_collector *clt,
 				     struct address_space *mc,
-				     enum z_erofs_cache_alloctype type)
+				     enum z_erofs_cache_alloctype type,
+				     struct list_head *pagepool)
 {
 	const struct z_erofs_pcluster *pcl = clt->pcl;
 	const unsigned int clusterpages = BIT(pcl->clusterbits);
 	struct page **pages = clt->compressedpages;
 	pgoff_t index = pcl->obj.index + (pages - pcl->compressed_pages);
 	bool standalone = true;
+	gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) |
+			__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
 
 	if (clt->mode < COLLECT_PRIMARY_FOLLOWED)
 		return;
@@ -168,6 +176,7 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
 	for (; pages < pcl->compressed_pages + clusterpages; ++pages) {
 		struct page *page;
 		compressed_page_t t;
+		struct page *newpage = NULL;
 
 		/* the compressed page was loaded before */
 		if (READ_ONCE(*pages))
@@ -179,7 +188,15 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
 			t = tag_compressed_page_justfound(page);
 		} else if (type == DELAYEDALLOC) {
 			t = tagptr_init(compressed_page_t, PAGE_UNALLOCATED);
+		} else if (type == TRYALLOC) {
+			newpage = erofs_allocpage(pagepool, gfp);
+			if (!newpage)
+				goto dontalloc;
+
+			set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE);
+			t = tag_compressed_page_justfound(newpage);
 		} else {	/* DONTALLOC */
+dontalloc:
 			if (standalone)
 				clt->compressedpages = pages;
 			standalone = false;
@@ -189,8 +206,12 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
 		if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t)))
 			continue;
 
-		if (page)
+		if (page) {
 			put_page(page);
+		} else if (newpage) {
+			set_page_private(newpage, 0);
+			list_add(&newpage->lru, pagepool);
+		}
 	}
 
 	if (standalone)		/* downgrade to PRIMARY_FOLLOWED_NOINPLACE */
@@ -554,7 +575,7 @@ static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe,
 }
 
 static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
-				struct page *page)
+				struct page *page, struct list_head *pagepool)
 {
 	struct inode *const inode = fe->inode;
 	struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
@@ -607,11 +628,12 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
 
 	/* preload all compressed pages (maybe downgrade role if necessary) */
 	if (should_alloc_managed_pages(fe, sbi->ctx.cache_strategy, map->m_la))
-		cache_strategy = DELAYEDALLOC;
+		cache_strategy = TRYALLOC;
 	else
 		cache_strategy = DONTALLOC;
 
-	preload_compressed_pages(clt, MNGD_MAPPING(sbi), cache_strategy);
+	preload_compressed_pages(clt, MNGD_MAPPING(sbi),
+				 cache_strategy, pagepool);
 
 hitted:
 	/*
@@ -1005,6 +1027,16 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
 	justfound = tagptr_unfold_tags(t);
 	page = tagptr_unfold_ptr(t);
 
+	/*
+	 * preallocated cached pages, which is used to avoid direct reclaim
+	 * otherwise, it will go inplace I/O path instead.
+	 */
+	if (page->private == Z_EROFS_PREALLOCATED_PAGE) {
+		WRITE_ONCE(pcl->compressed_pages[nr], page);
+		set_page_private(page, 0);
+		tocache = true;
+		goto out_tocache;
+	}
 	mapping = READ_ONCE(page->mapping);
 
 	/*
@@ -1067,7 +1099,7 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
 		cond_resched();
 		goto repeat;
 	}
-
+out_tocache:
 	if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
 		/* turn into temporary page if fails (1 ref) */
 		set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
@@ -1278,7 +1310,7 @@ static int z_erofs_readpage(struct file *file, struct page *page)
 
 	f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT;
 
-	err = z_erofs_do_read_page(&f, page);
+	err = z_erofs_do_read_page(&f, page, &pagepool);
 	(void)z_erofs_collector_end(&f.clt);
 
 	/* if some compressed cluster ready, need submit them anyway */
@@ -1332,7 +1364,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
 		/* traversal in reverse order */
 		head = (void *)page_private(page);
 
-		err = z_erofs_do_read_page(&f, page);
+		err = z_erofs_do_read_page(&f, page, &pagepool);
 		if (err)
 			erofs_err(inode->i_sb,
 				  "readahead error at page %lu @ nid %llu",
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2020-12-09 12:42 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20201208054600.16302-1-hsiangkao.ref@aol.com>
2020-12-08  5:46 ` [PATCH] erofs: force inplace I/O under low memory scenario Gao Xiang
2020-12-08  5:46   ` Gao Xiang via Linux-erofs
2020-12-09 10:07   ` Chao Yu
2020-12-09 10:07     ` Chao Yu
2020-12-09 11:36     ` Gao Xiang
2020-12-09 11:36       ` Gao Xiang
2020-12-09 12:37   ` [PATCH v2] " Gao Xiang
2020-12-09 12:37     ` Gao Xiang via Linux-erofs

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.