All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC] ext4: use __GFP_NOFAIL in ext4_free_blocks()
@ 2016-02-24 17:09 ` Konstantin Khlebnikov
  0 siblings, 0 replies; 12+ messages in thread
From: Konstantin Khlebnikov @ 2016-02-24 17:09 UTC (permalink / raw)
  To: Theodore Ts'o
  Cc: Michal Hocko, linux-kernel, linux-mm, Dmitry Monakhov,
	Johannes Weiner, linux-fsdevel, linux-ext4

This might be unexpected but pages allocated for sbi->s_buddy_cache are
charged to current memory cgroup. So, GFP_NOFS allocation could fail if
current task has been killed by OOM or if current memory cgroup has no
free memory left. Block allocator cannot handle such failures here yet.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
---
 fs/ext4/mballoc.c |   47 ++++++++++++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 19 deletions(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 4424b7bf8ac6..8b7e573eaf97 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -815,7 +815,7 @@ static void mb_regenerate_buddy(struct ext4_buddy *e4b)
  * for this page; do not hold this lock when calling this routine!
  */
 
-static int ext4_mb_init_cache(struct page *page, char *incore)
+static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
 {
 	ext4_group_t ngroups;
 	int blocksize;
@@ -848,7 +848,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
 	/* allocate buffer_heads to read bitmaps */
 	if (groups_per_page > 1) {
 		i = sizeof(struct buffer_head *) * groups_per_page;
-		bh = kzalloc(i, GFP_NOFS);
+		bh = kzalloc(i, gfp);
 		if (bh == NULL) {
 			err = -ENOMEM;
 			goto out;
@@ -983,7 +983,7 @@ out:
  * are on the same page e4b->bd_buddy_page is NULL and return value is 0.
  */
 static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
-		ext4_group_t group, struct ext4_buddy *e4b)
+		ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp)
 {
 	struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
 	int block, pnum, poff;
@@ -1002,7 +1002,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
 	block = group * 2;
 	pnum = block / blocks_per_page;
 	poff = block % blocks_per_page;
-	page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+	page = find_or_create_page(inode->i_mapping, pnum, gfp);
 	if (!page)
 		return -ENOMEM;
 	BUG_ON(page->mapping != inode->i_mapping);
@@ -1016,7 +1016,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
 
 	block++;
 	pnum = block / blocks_per_page;
-	page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+	page = find_or_create_page(inode->i_mapping, pnum, gfp);
 	if (!page)
 		return -ENOMEM;
 	BUG_ON(page->mapping != inode->i_mapping);
@@ -1042,7 +1042,7 @@ static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
  * calling this routine!
  */
 static noinline_for_stack
-int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
+int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
 {
 
 	struct ext4_group_info *this_grp;
@@ -1062,7 +1062,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
 	 * The call to ext4_mb_get_buddy_page_lock will mark the
 	 * page accessed.
 	 */
-	ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b);
+	ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b, gfp);
 	if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
 		/*
 		 * somebody initialized the group
@@ -1072,7 +1072,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
 	}
 
 	page = e4b.bd_bitmap_page;
-	ret = ext4_mb_init_cache(page, NULL);
+	ret = ext4_mb_init_cache(page, NULL, gfp);
 	if (ret)
 		goto err;
 	if (!PageUptodate(page)) {
@@ -1091,7 +1091,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
 	}
 	/* init buddy cache */
 	page = e4b.bd_buddy_page;
-	ret = ext4_mb_init_cache(page, e4b.bd_bitmap);
+	ret = ext4_mb_init_cache(page, e4b.bd_bitmap, gfp);
 	if (ret)
 		goto err;
 	if (!PageUptodate(page)) {
@@ -1109,8 +1109,8 @@ err:
  * calling this routine!
  */
 static noinline_for_stack int
-ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
-					struct ext4_buddy *e4b)
+ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
+		       struct ext4_buddy *e4b, gfp_t gfp)
 {
 	int blocks_per_page;
 	int block;
@@ -1140,7 +1140,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
 		 * we need full data about the group
 		 * to make a good selection
 		 */
-		ret = ext4_mb_init_group(sb, group);
+		ret = ext4_mb_init_group(sb, group, gfp);
 		if (ret)
 			return ret;
 	}
@@ -1168,11 +1168,11 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
 			 * wait for it to initialize.
 			 */
 			page_cache_release(page);
-		page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+		page = find_or_create_page(inode->i_mapping, pnum, gfp);
 		if (page) {
 			BUG_ON(page->mapping != inode->i_mapping);
 			if (!PageUptodate(page)) {
-				ret = ext4_mb_init_cache(page, NULL);
+				ret = ext4_mb_init_cache(page, NULL, gfp);
 				if (ret) {
 					unlock_page(page);
 					goto err;
@@ -1204,11 +1204,12 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
 	if (page == NULL || !PageUptodate(page)) {
 		if (page)
 			page_cache_release(page);
-		page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+		page = find_or_create_page(inode->i_mapping, pnum, gfp);
 		if (page) {
 			BUG_ON(page->mapping != inode->i_mapping);
 			if (!PageUptodate(page)) {
-				ret = ext4_mb_init_cache(page, e4b->bd_bitmap);
+				ret = ext4_mb_init_cache(page, e4b->bd_bitmap,
+							 gfp);
 				if (ret) {
 					unlock_page(page);
 					goto err;
@@ -1247,6 +1248,12 @@ err:
 	return ret;
 }
 
+static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
+			      struct ext4_buddy *e4b)
+{
+	return ext4_mb_load_buddy_gfp(sb, group, e4b, GFP_NOFS);
+}
+
 static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
 {
 	if (e4b->bd_bitmap_page)
@@ -2045,7 +2052,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
 
 	/* We only do this if the grp has never been initialized */
 	if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
-		int ret = ext4_mb_init_group(ac->ac_sb, group);
+		int ret = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS);
 		if (ret)
 			return ret;
 	}
@@ -4815,7 +4822,9 @@ do_more:
 #endif
 	trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
 
-	err = ext4_mb_load_buddy(sb, block_group, &e4b);
+	/* __GFP_NOFAIL: retry infinitely, ignore TIF_MEMDIE and memcg limit. */
+	err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b,
+				     GFP_NOFS|__GFP_NOFAIL);
 	if (err)
 		goto error_return;
 
@@ -5217,7 +5226,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
 		grp = ext4_get_group_info(sb, group);
 		/* We only do this if the grp has never been initialized */
 		if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
-			ret = ext4_mb_init_group(sb, group);
+			ret = ext4_mb_init_group(sb, group, GFP_NOFS);
 			if (ret)
 				break;
 		}

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH RFC] ext4: use __GFP_NOFAIL in ext4_free_blocks()
@ 2016-02-24 17:09 ` Konstantin Khlebnikov
  0 siblings, 0 replies; 12+ messages in thread
From: Konstantin Khlebnikov @ 2016-02-24 17:09 UTC (permalink / raw)
  To: Theodore Ts'o
  Cc: Michal Hocko, linux-kernel, linux-mm, Dmitry Monakhov,
	Johannes Weiner, linux-fsdevel, linux-ext4

This might be unexpected but pages allocated for sbi->s_buddy_cache are
charged to current memory cgroup. So, GFP_NOFS allocation could fail if
current task has been killed by OOM or if current memory cgroup has no
free memory left. Block allocator cannot handle such failures here yet.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
---
 fs/ext4/mballoc.c |   47 ++++++++++++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 19 deletions(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 4424b7bf8ac6..8b7e573eaf97 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -815,7 +815,7 @@ static void mb_regenerate_buddy(struct ext4_buddy *e4b)
  * for this page; do not hold this lock when calling this routine!
  */
 
-static int ext4_mb_init_cache(struct page *page, char *incore)
+static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
 {
 	ext4_group_t ngroups;
 	int blocksize;
@@ -848,7 +848,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
 	/* allocate buffer_heads to read bitmaps */
 	if (groups_per_page > 1) {
 		i = sizeof(struct buffer_head *) * groups_per_page;
-		bh = kzalloc(i, GFP_NOFS);
+		bh = kzalloc(i, gfp);
 		if (bh == NULL) {
 			err = -ENOMEM;
 			goto out;
@@ -983,7 +983,7 @@ out:
  * are on the same page e4b->bd_buddy_page is NULL and return value is 0.
  */
 static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
-		ext4_group_t group, struct ext4_buddy *e4b)
+		ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp)
 {
 	struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
 	int block, pnum, poff;
@@ -1002,7 +1002,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
 	block = group * 2;
 	pnum = block / blocks_per_page;
 	poff = block % blocks_per_page;
-	page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+	page = find_or_create_page(inode->i_mapping, pnum, gfp);
 	if (!page)
 		return -ENOMEM;
 	BUG_ON(page->mapping != inode->i_mapping);
@@ -1016,7 +1016,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
 
 	block++;
 	pnum = block / blocks_per_page;
-	page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+	page = find_or_create_page(inode->i_mapping, pnum, gfp);
 	if (!page)
 		return -ENOMEM;
 	BUG_ON(page->mapping != inode->i_mapping);
@@ -1042,7 +1042,7 @@ static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
  * calling this routine!
  */
 static noinline_for_stack
-int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
+int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
 {
 
 	struct ext4_group_info *this_grp;
@@ -1062,7 +1062,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
 	 * The call to ext4_mb_get_buddy_page_lock will mark the
 	 * page accessed.
 	 */
-	ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b);
+	ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b, gfp);
 	if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
 		/*
 		 * somebody initialized the group
@@ -1072,7 +1072,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
 	}
 
 	page = e4b.bd_bitmap_page;
-	ret = ext4_mb_init_cache(page, NULL);
+	ret = ext4_mb_init_cache(page, NULL, gfp);
 	if (ret)
 		goto err;
 	if (!PageUptodate(page)) {
@@ -1091,7 +1091,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
 	}
 	/* init buddy cache */
 	page = e4b.bd_buddy_page;
-	ret = ext4_mb_init_cache(page, e4b.bd_bitmap);
+	ret = ext4_mb_init_cache(page, e4b.bd_bitmap, gfp);
 	if (ret)
 		goto err;
 	if (!PageUptodate(page)) {
@@ -1109,8 +1109,8 @@ err:
  * calling this routine!
  */
 static noinline_for_stack int
-ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
-					struct ext4_buddy *e4b)
+ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
+		       struct ext4_buddy *e4b, gfp_t gfp)
 {
 	int blocks_per_page;
 	int block;
@@ -1140,7 +1140,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
 		 * we need full data about the group
 		 * to make a good selection
 		 */
-		ret = ext4_mb_init_group(sb, group);
+		ret = ext4_mb_init_group(sb, group, gfp);
 		if (ret)
 			return ret;
 	}
@@ -1168,11 +1168,11 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
 			 * wait for it to initialize.
 			 */
 			page_cache_release(page);
-		page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+		page = find_or_create_page(inode->i_mapping, pnum, gfp);
 		if (page) {
 			BUG_ON(page->mapping != inode->i_mapping);
 			if (!PageUptodate(page)) {
-				ret = ext4_mb_init_cache(page, NULL);
+				ret = ext4_mb_init_cache(page, NULL, gfp);
 				if (ret) {
 					unlock_page(page);
 					goto err;
@@ -1204,11 +1204,12 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
 	if (page == NULL || !PageUptodate(page)) {
 		if (page)
 			page_cache_release(page);
-		page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+		page = find_or_create_page(inode->i_mapping, pnum, gfp);
 		if (page) {
 			BUG_ON(page->mapping != inode->i_mapping);
 			if (!PageUptodate(page)) {
-				ret = ext4_mb_init_cache(page, e4b->bd_bitmap);
+				ret = ext4_mb_init_cache(page, e4b->bd_bitmap,
+							 gfp);
 				if (ret) {
 					unlock_page(page);
 					goto err;
@@ -1247,6 +1248,12 @@ err:
 	return ret;
 }
 
+static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
+			      struct ext4_buddy *e4b)
+{
+	return ext4_mb_load_buddy_gfp(sb, group, e4b, GFP_NOFS);
+}
+
 static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
 {
 	if (e4b->bd_bitmap_page)
@@ -2045,7 +2052,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
 
 	/* We only do this if the grp has never been initialized */
 	if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
-		int ret = ext4_mb_init_group(ac->ac_sb, group);
+		int ret = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS);
 		if (ret)
 			return ret;
 	}
@@ -4815,7 +4822,9 @@ do_more:
 #endif
 	trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
 
-	err = ext4_mb_load_buddy(sb, block_group, &e4b);
+	/* __GFP_NOFAIL: retry infinitely, ignore TIF_MEMDIE and memcg limit. */
+	err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b,
+				     GFP_NOFS|__GFP_NOFAIL);
 	if (err)
 		goto error_return;
 
@@ -5217,7 +5226,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
 		grp = ext4_get_group_info(sb, group);
 		/* We only do this if the grp has never been initialized */
 		if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
-			ret = ext4_mb_init_group(sb, group);
+			ret = ext4_mb_init_group(sb, group, GFP_NOFS);
 			if (ret)
 				break;
 		}

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH RFC] ext4: use __GFP_NOFAIL in ext4_free_blocks()
  2016-02-24 17:09 ` Konstantin Khlebnikov
@ 2016-02-25  9:01   ` Nikolay Borisov
  -1 siblings, 0 replies; 12+ messages in thread
From: Nikolay Borisov @ 2016-02-25  9:01 UTC (permalink / raw)
  To: Konstantin Khlebnikov, Theodore Ts'o
  Cc: Michal Hocko, linux-kernel, linux-mm, Dmitry Monakhov,
	Johannes Weiner, linux-fsdevel, linux-ext4



On 02/24/2016 07:09 PM, Konstantin Khlebnikov wrote:
> This might be unexpected but pages allocated for sbi->s_buddy_cache are
> charged to current memory cgroup. So, GFP_NOFS allocation could fail if
> current task has been killed by OOM or if current memory cgroup has no
> free memory left. Block allocator cannot handle such failures here yet.
> 
> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>

Adding new users of GFP_NOFAIL is deprecated. Where exactly does the
block allocator fail, I skimmed the code and failing ext4_mb_load_buddy
seems to be handled at all call sites. There are some BUG_ONs but from
the comments there I guess they should occur when we try to find a page
and not allocate a new one?

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFC] ext4: use __GFP_NOFAIL in ext4_free_blocks()
@ 2016-02-25  9:01   ` Nikolay Borisov
  0 siblings, 0 replies; 12+ messages in thread
From: Nikolay Borisov @ 2016-02-25  9:01 UTC (permalink / raw)
  To: Konstantin Khlebnikov, Theodore Ts'o
  Cc: Michal Hocko, linux-kernel, linux-mm, Dmitry Monakhov,
	Johannes Weiner, linux-fsdevel, linux-ext4



On 02/24/2016 07:09 PM, Konstantin Khlebnikov wrote:
> This might be unexpected but pages allocated for sbi->s_buddy_cache are
> charged to current memory cgroup. So, GFP_NOFS allocation could fail if
> current task has been killed by OOM or if current memory cgroup has no
> free memory left. Block allocator cannot handle such failures here yet.
> 
> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>

Adding new users of GFP_NOFAIL is deprecated. Where exactly does the
block allocator fail, I skimmed the code and failing ext4_mb_load_buddy
seems to be handled at all call sites. There are some BUG_ONs but from
the comments there I guess they should occur when we try to find a page
and not allocate a new one?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFC] ext4: use __GFP_NOFAIL in ext4_free_blocks()
  2016-02-25  9:01   ` Nikolay Borisov
@ 2016-02-25  9:08     ` Michal Hocko
  -1 siblings, 0 replies; 12+ messages in thread
From: Michal Hocko @ 2016-02-25  9:08 UTC (permalink / raw)
  To: Nikolay Borisov
  Cc: Konstantin Khlebnikov, Theodore Ts'o, linux-kernel, linux-mm,
	Dmitry Monakhov, Johannes Weiner, linux-fsdevel, linux-ext4

On Thu 25-02-16 11:01:32, Nikolay Borisov wrote:
> 
> 
> On 02/24/2016 07:09 PM, Konstantin Khlebnikov wrote:
> > This might be unexpected but pages allocated for sbi->s_buddy_cache are
> > charged to current memory cgroup. So, GFP_NOFS allocation could fail if
> > current task has been killed by OOM or if current memory cgroup has no
> > free memory left. Block allocator cannot handle such failures here yet.
> > 
> > Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
> 
> Adding new users of GFP_NOFAIL is deprecated.

This is not true. GFP_NOFAIL should be used where the allocation failure
is no tolleratable and it is much more preferrable to doing an opencoded
endless loop over page allocator.

> Where exactly does the
> block allocator fail, I skimmed the code and failing ext4_mb_load_buddy
> seems to be handled at all call sites. There are some BUG_ONs but from
> the comments there I guess they should occur when we try to find a page
> and not allocate a new one?

I have posted a similar patch last year:
http://lkml.kernel.org/r/1438768284-30927-6-git-send-email-mhocko@kernel.org
because I could see emergency reboots when GFP_NOFS allocations were
allowed to fail.

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFC] ext4: use __GFP_NOFAIL in ext4_free_blocks()
@ 2016-02-25  9:08     ` Michal Hocko
  0 siblings, 0 replies; 12+ messages in thread
From: Michal Hocko @ 2016-02-25  9:08 UTC (permalink / raw)
  To: Nikolay Borisov
  Cc: Konstantin Khlebnikov, Theodore Ts'o, linux-kernel, linux-mm,
	Dmitry Monakhov, Johannes Weiner, linux-fsdevel, linux-ext4

On Thu 25-02-16 11:01:32, Nikolay Borisov wrote:
> 
> 
> On 02/24/2016 07:09 PM, Konstantin Khlebnikov wrote:
> > This might be unexpected but pages allocated for sbi->s_buddy_cache are
> > charged to current memory cgroup. So, GFP_NOFS allocation could fail if
> > current task has been killed by OOM or if current memory cgroup has no
> > free memory left. Block allocator cannot handle such failures here yet.
> > 
> > Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
> 
> Adding new users of GFP_NOFAIL is deprecated.

This is not true. GFP_NOFAIL should be used where the allocation failure
is no tolleratable and it is much more preferrable to doing an opencoded
endless loop over page allocator.

> Where exactly does the
> block allocator fail, I skimmed the code and failing ext4_mb_load_buddy
> seems to be handled at all call sites. There are some BUG_ONs but from
> the comments there I guess they should occur when we try to find a page
> and not allocate a new one?

I have posted a similar patch last year:
http://lkml.kernel.org/r/1438768284-30927-6-git-send-email-mhocko@kernel.org
because I could see emergency reboots when GFP_NOFS allocations were
allowed to fail.

-- 
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFC] ext4: use __GFP_NOFAIL in ext4_free_blocks()
  2016-02-25  9:08     ` Michal Hocko
@ 2016-02-25  9:12       ` Nikolay Borisov
  -1 siblings, 0 replies; 12+ messages in thread
From: Nikolay Borisov @ 2016-02-25  9:12 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Konstantin Khlebnikov, Theodore Ts'o, linux-kernel, linux-mm,
	Dmitry Monakhov, Johannes Weiner, linux-fsdevel, linux-ext4



On 02/25/2016 11:08 AM, Michal Hocko wrote:
> On Thu 25-02-16 11:01:32, Nikolay Borisov wrote:
>>
>>
>> On 02/24/2016 07:09 PM, Konstantin Khlebnikov wrote:
>>> This might be unexpected but pages allocated for sbi->s_buddy_cache are
>>> charged to current memory cgroup. So, GFP_NOFS allocation could fail if
>>> current task has been killed by OOM or if current memory cgroup has no
>>> free memory left. Block allocator cannot handle such failures here yet.
>>>
>>> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
>>
>> Adding new users of GFP_NOFAIL is deprecated.
> 
> This is not true. GFP_NOFAIL should be used where the allocation failure
> is no tolleratable and it is much more preferrable to doing an opencoded
> endless loop over page allocator.

In that case the comments in buffered_rmqueue, and the WARN_ON in
__alloc_pages_may_oom and __alloc_pages_slowpath perhaps should be
removed since they are misleading?

> 
>> Where exactly does the
>> block allocator fail, I skimmed the code and failing ext4_mb_load_buddy
>> seems to be handled at all call sites. There are some BUG_ONs but from
>> the comments there I guess they should occur when we try to find a page
>> and not allocate a new one?
> 
> I have posted a similar patch last year:
> http://lkml.kernel.org/r/1438768284-30927-6-git-send-email-mhocko@kernel.org
> because I could see emergency reboots when GFP_NOFS allocations were
> allowed to fail.
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFC] ext4: use __GFP_NOFAIL in ext4_free_blocks()
@ 2016-02-25  9:12       ` Nikolay Borisov
  0 siblings, 0 replies; 12+ messages in thread
From: Nikolay Borisov @ 2016-02-25  9:12 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Konstantin Khlebnikov, Theodore Ts'o, linux-kernel, linux-mm,
	Dmitry Monakhov, Johannes Weiner, linux-fsdevel, linux-ext4



On 02/25/2016 11:08 AM, Michal Hocko wrote:
> On Thu 25-02-16 11:01:32, Nikolay Borisov wrote:
>>
>>
>> On 02/24/2016 07:09 PM, Konstantin Khlebnikov wrote:
>>> This might be unexpected but pages allocated for sbi->s_buddy_cache are
>>> charged to current memory cgroup. So, GFP_NOFS allocation could fail if
>>> current task has been killed by OOM or if current memory cgroup has no
>>> free memory left. Block allocator cannot handle such failures here yet.
>>>
>>> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
>>
>> Adding new users of GFP_NOFAIL is deprecated.
> 
> This is not true. GFP_NOFAIL should be used where the allocation failure
> is no tolleratable and it is much more preferrable to doing an opencoded
> endless loop over page allocator.

In that case the comments in buffered_rmqueue, and the WARN_ON in
__alloc_pages_may_oom and __alloc_pages_slowpath perhaps should be
removed since they are misleading?

> 
>> Where exactly does the
>> block allocator fail, I skimmed the code and failing ext4_mb_load_buddy
>> seems to be handled at all call sites. There are some BUG_ONs but from
>> the comments there I guess they should occur when we try to find a page
>> and not allocate a new one?
> 
> I have posted a similar patch last year:
> http://lkml.kernel.org/r/1438768284-30927-6-git-send-email-mhocko@kernel.org
> because I could see emergency reboots when GFP_NOFS allocations were
> allowed to fail.
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFC] ext4: use __GFP_NOFAIL in ext4_free_blocks()
  2016-02-25  9:12       ` Nikolay Borisov
@ 2016-02-25 10:27         ` Michal Hocko
  -1 siblings, 0 replies; 12+ messages in thread
From: Michal Hocko @ 2016-02-25 10:27 UTC (permalink / raw)
  To: Nikolay Borisov
  Cc: Konstantin Khlebnikov, Theodore Ts'o, linux-kernel, linux-mm,
	Dmitry Monakhov, Johannes Weiner, linux-fsdevel, linux-ext4

On Thu 25-02-16 11:12:08, Nikolay Borisov wrote:
> 
> 
> On 02/25/2016 11:08 AM, Michal Hocko wrote:
> > On Thu 25-02-16 11:01:32, Nikolay Borisov wrote:
> >>
> >>
> >> On 02/24/2016 07:09 PM, Konstantin Khlebnikov wrote:
> >>> This might be unexpected but pages allocated for sbi->s_buddy_cache are
> >>> charged to current memory cgroup. So, GFP_NOFS allocation could fail if
> >>> current task has been killed by OOM or if current memory cgroup has no
> >>> free memory left. Block allocator cannot handle such failures here yet.
> >>>
> >>> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
> >>
> >> Adding new users of GFP_NOFAIL is deprecated.
> > 
> > This is not true. GFP_NOFAIL should be used where the allocation failure
> > is no tolleratable and it is much more preferrable to doing an opencoded
> > endless loop over page allocator.
> 
> In that case the comments in buffered_rmqueue,

yes, will post the patch. The warning for order > 1 is still valid.

> and the WARN_ON in
> __alloc_pages_may_oom and __alloc_pages_slowpath perhaps should be
> removed since they are misleading?

We are only warning about absurd cases where __GFP_NOFAIL doesn't make
any sense.
-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFC] ext4: use __GFP_NOFAIL in ext4_free_blocks()
@ 2016-02-25 10:27         ` Michal Hocko
  0 siblings, 0 replies; 12+ messages in thread
From: Michal Hocko @ 2016-02-25 10:27 UTC (permalink / raw)
  To: Nikolay Borisov
  Cc: Konstantin Khlebnikov, Theodore Ts'o, linux-kernel, linux-mm,
	Dmitry Monakhov, Johannes Weiner, linux-fsdevel, linux-ext4

On Thu 25-02-16 11:12:08, Nikolay Borisov wrote:
> 
> 
> On 02/25/2016 11:08 AM, Michal Hocko wrote:
> > On Thu 25-02-16 11:01:32, Nikolay Borisov wrote:
> >>
> >>
> >> On 02/24/2016 07:09 PM, Konstantin Khlebnikov wrote:
> >>> This might be unexpected but pages allocated for sbi->s_buddy_cache are
> >>> charged to current memory cgroup. So, GFP_NOFS allocation could fail if
> >>> current task has been killed by OOM or if current memory cgroup has no
> >>> free memory left. Block allocator cannot handle such failures here yet.
> >>>
> >>> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
> >>
> >> Adding new users of GFP_NOFAIL is deprecated.
> > 
> > This is not true. GFP_NOFAIL should be used where the allocation failure
> > is no tolleratable and it is much more preferrable to doing an opencoded
> > endless loop over page allocator.
> 
> In that case the comments in buffered_rmqueue,

yes, will post the patch. The warning for order > 1 is still valid.

> and the WARN_ON in
> __alloc_pages_may_oom and __alloc_pages_slowpath perhaps should be
> removed since they are misleading?

We are only warning about absurd cases where __GFP_NOFAIL doesn't make
any sense.
-- 
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFC] ext4: use __GFP_NOFAIL in ext4_free_blocks()
  2016-02-24 17:09 ` Konstantin Khlebnikov
@ 2016-03-13 21:30   ` Theodore Ts'o
  -1 siblings, 0 replies; 12+ messages in thread
From: Theodore Ts'o @ 2016-03-13 21:30 UTC (permalink / raw)
  To: Konstantin Khlebnikov
  Cc: Michal Hocko, linux-kernel, linux-mm, Dmitry Monakhov,
	Johannes Weiner, linux-fsdevel, linux-ext4

On Wed, Feb 24, 2016 at 08:09:12PM +0300, Konstantin Khlebnikov wrote:
> This might be unexpected but pages allocated for sbi->s_buddy_cache are
> charged to current memory cgroup. So, GFP_NOFS allocation could fail if
> current task has been killed by OOM or if current memory cgroup has no
> free memory left. Block allocator cannot handle such failures here yet.
> 
> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>

Thanks, applied.

					- Ted

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFC] ext4: use __GFP_NOFAIL in ext4_free_blocks()
@ 2016-03-13 21:30   ` Theodore Ts'o
  0 siblings, 0 replies; 12+ messages in thread
From: Theodore Ts'o @ 2016-03-13 21:30 UTC (permalink / raw)
  To: Konstantin Khlebnikov
  Cc: Michal Hocko, linux-kernel, linux-mm, Dmitry Monakhov,
	Johannes Weiner, linux-fsdevel, linux-ext4

On Wed, Feb 24, 2016 at 08:09:12PM +0300, Konstantin Khlebnikov wrote:
> This might be unexpected but pages allocated for sbi->s_buddy_cache are
> charged to current memory cgroup. So, GFP_NOFS allocation could fail if
> current task has been killed by OOM or if current memory cgroup has no
> free memory left. Block allocator cannot handle such failures here yet.
> 
> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>

Thanks, applied.

					- Ted

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2016-03-13 21:30 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-02-24 17:09 [PATCH RFC] ext4: use __GFP_NOFAIL in ext4_free_blocks() Konstantin Khlebnikov
2016-02-24 17:09 ` Konstantin Khlebnikov
2016-02-25  9:01 ` Nikolay Borisov
2016-02-25  9:01   ` Nikolay Borisov
2016-02-25  9:08   ` Michal Hocko
2016-02-25  9:08     ` Michal Hocko
2016-02-25  9:12     ` Nikolay Borisov
2016-02-25  9:12       ` Nikolay Borisov
2016-02-25 10:27       ` Michal Hocko
2016-02-25 10:27         ` Michal Hocko
2016-03-13 21:30 ` Theodore Ts'o
2016-03-13 21:30   ` Theodore Ts'o

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.