linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] mm, memcg: Add memory.transparent_hugepage_disabled
@ 2020-03-24 10:31 Hui Zhu
  2020-03-24 11:00 ` Michal Hocko
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Hui Zhu @ 2020-03-24 10:31 UTC (permalink / raw)
  To: hannes, mhocko, vdavydov.dev, akpm, hughd, yang.shi, kirill,
	dan.j.williams, aneesh.kumar, sean.j.christopherson, thellstrom,
	guro, shakeelb, chris, tj, tglx, linux-kernel, cgroups, linux-mm
  Cc: Hui Zhu, Hui Zhu

/sys/kernel/mm/transparent_hugepage/enabled is the only interface to
control if the application can use THP in system level.
Sometime, we would not want an application use THP even if
transparent_hugepage/enabled is set to "always" or "madvise" because
thp may need more cpu and memory resources in some cases.

This commit add a new interface memory.transparent_hugepage_disabled
in memcg.
When it set to 1, the application inside the cgroup cannot use THP
except dax.

Signed-off-by: Hui Zhu <teawaterz@linux.alibaba.com>
---
 include/linux/huge_mm.h    | 18 ++++++++++++++++--
 include/linux/memcontrol.h |  2 ++
 mm/memcontrol.c            | 42 ++++++++++++++++++++++++++++++++++++++++++
 mm/shmem.c                 |  4 ++++
 4 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 5aca3d1..fd81479 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -91,6 +91,16 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
 
 extern unsigned long transparent_hugepage_flags;
 
+#ifdef CONFIG_MEMCG
+extern bool memcg_transparent_hugepage_disabled(struct vm_area_struct *vma);
+#else
+static inline bool
+memcg_transparent_hugepage_disabled(struct vm_area_struct *vma)
+{
+	return false;
+}
+#endif
+
 /*
  * to be used on vmas which are known to support THP.
  * Use transparent_hugepage_enabled otherwise
@@ -106,8 +116,6 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
 	if (test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
 		return false;
 
-	if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG))
-		return true;
 	/*
 	 * For dax vmas, try to always use hugepage mappings. If the kernel does
 	 * not support hugepages, fsdax mappings will fallback to PAGE_SIZE
@@ -117,6 +125,12 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
 	if (vma_is_dax(vma))
 		return true;
 
+	if (memcg_transparent_hugepage_disabled(vma))
+		return false;
+
+	if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG))
+		return true;
+
 	if (transparent_hugepage_flags &
 				(1 << TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG))
 		return !!(vma->vm_flags & VM_HUGEPAGE);
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index a7a0a1a5..abc3142 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -320,6 +320,8 @@ struct mem_cgroup {
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	struct deferred_split deferred_split_queue;
+
+	bool transparent_hugepage_disabled;
 #endif
 
 	struct mem_cgroup_per_node *nodeinfo[0];
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7a4bd8b..b6d91b6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5011,6 +5011,14 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 	if (parent) {
 		memcg->swappiness = mem_cgroup_swappiness(parent);
 		memcg->oom_kill_disable = parent->oom_kill_disable;
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+		memcg->transparent_hugepage_disabled
+			= parent->transparent_hugepage_disabled;
+#endif
+	} else {
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+		memcg->transparent_hugepage_disabled = false;
+#endif
 	}
 	if (parent && parent->use_hierarchy) {
 		memcg->use_hierarchy = true;
@@ -6126,6 +6134,24 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
 	return nbytes;
 }
 
+static u64 transparent_hugepage_disabled_read(struct cgroup_subsys_state *css,
+					      struct cftype *cft)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
+	return memcg->transparent_hugepage_disabled;
+}
+
+static int transparent_hugepage_disabled_write(struct cgroup_subsys_state *css,
+					       struct cftype *cft, u64 val)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
+	memcg->transparent_hugepage_disabled = !!val;
+
+	return 0;
+}
+
 static struct cftype memory_files[] = {
 	{
 		.name = "current",
@@ -6179,6 +6205,12 @@ static struct cftype memory_files[] = {
 		.seq_show = memory_oom_group_show,
 		.write = memory_oom_group_write,
 	},
+	{
+		.name = "transparent_hugepage_disabled",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.read_u64 = transparent_hugepage_disabled_read,
+		.write_u64 = transparent_hugepage_disabled_write,
+	},
 	{ }	/* terminate */
 };
 
@@ -6787,6 +6819,16 @@ void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
 	refill_stock(memcg, nr_pages);
 }
 
+bool memcg_transparent_hugepage_disabled(struct vm_area_struct *vma)
+{
+	struct mem_cgroup *memcg = get_mem_cgroup_from_mm(vma->vm_mm);
+
+	if (memcg && memcg->transparent_hugepage_disabled)
+		return true;
+
+	return false;
+}
+
 static int __init cgroup_memory(char *s)
 {
 	char *token;
diff --git a/mm/shmem.c b/mm/shmem.c
index aad3ba7..253b63b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1810,6 +1810,10 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
 		goto alloc_nohuge;
 	if (shmem_huge == SHMEM_HUGE_DENY || sgp_huge == SGP_NOHUGE)
 		goto alloc_nohuge;
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	if (memcg_transparent_hugepage_disabled(vma))
+		goto alloc_nohuge;
+#endif
 	if (shmem_huge == SHMEM_HUGE_FORCE)
 		goto alloc_huge;
 	switch (sbinfo->huge) {
-- 
2.7.4



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] mm, memcg: Add memory.transparent_hugepage_disabled
  2020-03-24 10:31 [PATCH] mm, memcg: Add memory.transparent_hugepage_disabled Hui Zhu
@ 2020-03-24 11:00 ` Michal Hocko
  2020-03-24 12:30   ` teawater
  2020-03-24 12:17 ` Kirill A. Shutemov
  2020-03-24 13:23 ` Chris Down
  2 siblings, 1 reply; 7+ messages in thread
From: Michal Hocko @ 2020-03-24 11:00 UTC (permalink / raw)
  To: Hui Zhu
  Cc: hannes, vdavydov.dev, akpm, hughd, yang.shi, kirill,
	dan.j.williams, aneesh.kumar, sean.j.christopherson, thellstrom,
	guro, shakeelb, chris, tj, tglx, linux-kernel, cgroups, linux-mm,
	Hui Zhu

On Tue 24-03-20 18:31:56, Hui Zhu wrote:
> /sys/kernel/mm/transparent_hugepage/enabled is the only interface to
> control if the application can use THP in system level.
> Sometime, we would not want an application use THP even if
> transparent_hugepage/enabled is set to "always" or "madvise" because
> thp may need more cpu and memory resources in some cases.

Could you specify that sometime by a real usecase in the memcg context
please?

> This commit add a new interface memory.transparent_hugepage_disabled
> in memcg.
> When it set to 1, the application inside the cgroup cannot use THP
> except dax.

Why should this interface differ from the global semantic. How does it
relate to the kcompactd. Your patch also doesn't seem to define this
knob to have hierarchical semantic. Why?

All that being said, this patch is lacking both proper justification and
the semantic is dubious to be honest. I have also say that I am not a
great fan. THP semantic is overly complex already and adding more on top
would require really strong usecase.

> Signed-off-by: Hui Zhu <teawaterz@linux.alibaba.com>
> ---
>  include/linux/huge_mm.h    | 18 ++++++++++++++++--
>  include/linux/memcontrol.h |  2 ++
>  mm/memcontrol.c            | 42 ++++++++++++++++++++++++++++++++++++++++++
>  mm/shmem.c                 |  4 ++++
>  4 files changed, 64 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index 5aca3d1..fd81479 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -91,6 +91,16 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
>  
>  extern unsigned long transparent_hugepage_flags;
>  
> +#ifdef CONFIG_MEMCG
> +extern bool memcg_transparent_hugepage_disabled(struct vm_area_struct *vma);
> +#else
> +static inline bool
> +memcg_transparent_hugepage_disabled(struct vm_area_struct *vma)
> +{
> +	return false;
> +}
> +#endif
> +
>  /*
>   * to be used on vmas which are known to support THP.
>   * Use transparent_hugepage_enabled otherwise
> @@ -106,8 +116,6 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
>  	if (test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
>  		return false;
>  
> -	if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG))
> -		return true;
>  	/*
>  	 * For dax vmas, try to always use hugepage mappings. If the kernel does
>  	 * not support hugepages, fsdax mappings will fallback to PAGE_SIZE
> @@ -117,6 +125,12 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
>  	if (vma_is_dax(vma))
>  		return true;
>  
> +	if (memcg_transparent_hugepage_disabled(vma))
> +		return false;
> +
> +	if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG))
> +		return true;
> +
>  	if (transparent_hugepage_flags &
>  				(1 << TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG))
>  		return !!(vma->vm_flags & VM_HUGEPAGE);
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index a7a0a1a5..abc3142 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -320,6 +320,8 @@ struct mem_cgroup {
>  
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>  	struct deferred_split deferred_split_queue;
> +
> +	bool transparent_hugepage_disabled;
>  #endif
>  
>  	struct mem_cgroup_per_node *nodeinfo[0];
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 7a4bd8b..b6d91b6 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -5011,6 +5011,14 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
>  	if (parent) {
>  		memcg->swappiness = mem_cgroup_swappiness(parent);
>  		memcg->oom_kill_disable = parent->oom_kill_disable;
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> +		memcg->transparent_hugepage_disabled
> +			= parent->transparent_hugepage_disabled;
> +#endif
> +	} else {
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> +		memcg->transparent_hugepage_disabled = false;
> +#endif
>  	}
>  	if (parent && parent->use_hierarchy) {
>  		memcg->use_hierarchy = true;
> @@ -6126,6 +6134,24 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
>  	return nbytes;
>  }
>  
> +static u64 transparent_hugepage_disabled_read(struct cgroup_subsys_state *css,
> +					      struct cftype *cft)
> +{
> +	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
> +
> +	return memcg->transparent_hugepage_disabled;
> +}
> +
> +static int transparent_hugepage_disabled_write(struct cgroup_subsys_state *css,
> +					       struct cftype *cft, u64 val)
> +{
> +	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
> +
> +	memcg->transparent_hugepage_disabled = !!val;
> +
> +	return 0;
> +}
> +
>  static struct cftype memory_files[] = {
>  	{
>  		.name = "current",
> @@ -6179,6 +6205,12 @@ static struct cftype memory_files[] = {
>  		.seq_show = memory_oom_group_show,
>  		.write = memory_oom_group_write,
>  	},
> +	{
> +		.name = "transparent_hugepage_disabled",
> +		.flags = CFTYPE_NOT_ON_ROOT,
> +		.read_u64 = transparent_hugepage_disabled_read,
> +		.write_u64 = transparent_hugepage_disabled_write,
> +	},
>  	{ }	/* terminate */
>  };
>  
> @@ -6787,6 +6819,16 @@ void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
>  	refill_stock(memcg, nr_pages);
>  }
>  
> +bool memcg_transparent_hugepage_disabled(struct vm_area_struct *vma)
> +{
> +	struct mem_cgroup *memcg = get_mem_cgroup_from_mm(vma->vm_mm);
> +
> +	if (memcg && memcg->transparent_hugepage_disabled)
> +		return true;
> +
> +	return false;
> +}
> +
>  static int __init cgroup_memory(char *s)
>  {
>  	char *token;
> diff --git a/mm/shmem.c b/mm/shmem.c
> index aad3ba7..253b63b 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -1810,6 +1810,10 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
>  		goto alloc_nohuge;
>  	if (shmem_huge == SHMEM_HUGE_DENY || sgp_huge == SGP_NOHUGE)
>  		goto alloc_nohuge;
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> +	if (memcg_transparent_hugepage_disabled(vma))
> +		goto alloc_nohuge;
> +#endif
>  	if (shmem_huge == SHMEM_HUGE_FORCE)
>  		goto alloc_huge;
>  	switch (sbinfo->huge) {
> -- 
> 2.7.4

-- 
Michal Hocko
SUSE Labs


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] mm, memcg: Add memory.transparent_hugepage_disabled
  2020-03-24 10:31 [PATCH] mm, memcg: Add memory.transparent_hugepage_disabled Hui Zhu
  2020-03-24 11:00 ` Michal Hocko
@ 2020-03-24 12:17 ` Kirill A. Shutemov
  2020-03-24 19:17   ` David Rientjes
  2020-03-24 13:23 ` Chris Down
  2 siblings, 1 reply; 7+ messages in thread
From: Kirill A. Shutemov @ 2020-03-24 12:17 UTC (permalink / raw)
  To: Hui Zhu
  Cc: hannes, mhocko, vdavydov.dev, akpm, hughd, yang.shi,
	dan.j.williams, aneesh.kumar, sean.j.christopherson, thellstrom,
	guro, shakeelb, chris, tj, tglx, linux-kernel, cgroups, linux-mm,
	Hui Zhu

On Tue, Mar 24, 2020 at 06:31:56PM +0800, Hui Zhu wrote:
> /sys/kernel/mm/transparent_hugepage/enabled is the only interface to
> control if the application can use THP in system level.

No. We have prctl(PR_SET_THP_DISABLE) too.

-- 
 Kirill A. Shutemov


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] mm, memcg: Add memory.transparent_hugepage_disabled
  2020-03-24 11:00 ` Michal Hocko
@ 2020-03-24 12:30   ` teawater
  2020-03-24 13:17     ` Michal Hocko
  0 siblings, 1 reply; 7+ messages in thread
From: teawater @ 2020-03-24 12:30 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Hui Zhu, Johannes Weiner, Vladimir Davydov, Andrew Morton, hughd,
	yang.shi, kirill, dan.j.williams, aneesh.kumar,
	sean.j.christopherson, thellstrom, guro, shakeelb, chris, tj,
	tglx, linux-kernel, cgroups, linux-mm



> 2020年3月24日 19:00,Michal Hocko <mhocko@kernel.org> 写道:
> 
> On Tue 24-03-20 18:31:56, Hui Zhu wrote:
>> /sys/kernel/mm/transparent_hugepage/enabled is the only interface to
>> control if the application can use THP in system level.
>> Sometime, we would not want an application use THP even if
>> transparent_hugepage/enabled is set to "always" or "madvise" because
>> thp may need more cpu and memory resources in some cases.
> 
> Could you specify that sometime by a real usecase in the memcg context
> please?


Thanks for your review.

We use thp+balloon to supply more memory flexibility for vm.
https://lore.kernel.org/linux-mm/1584893097-12317-1-git-send-email-teawater@gmail.com/
This is another thread that I am working around thp+balloon.

Other applications are already deployed on these machines.  The transparent_hugepage/enabled is set to never because they used to have a lot of THP related performance issues.
And some of them may call madvise thp with itself.

Then even if I set transparent_hugepage/enabled to madvise.  These programs are still at risk of THP-related performance issues.  That is why I need this cgroup thp switch.

> 
>> This commit add a new interface memory.transparent_hugepage_disabled
>> in memcg.
>> When it set to 1, the application inside the cgroup cannot use THP
>> except dax.
> 
> Why should this interface differ from the global semantic. How does it
> relate to the kcompactd. Your patch also doesn't seem to define this
> knob to have hierarchical semantic. Why?
> 

According to my previous description, I didn’t get any need to add transparent_hugepage/enabled like interface.
That is why just add a switch.

What about add a transparent_hugepage/enabled like interface?

Thanks,
Hui

> All that being said, this patch is lacking both proper justification and
> the semantic is dubious to be honest. I have also say that I am not a
> great fan. THP semantic is overly complex already and adding more on top
> would require really strong usecase.
> 
>> Signed-off-by: Hui Zhu <teawaterz@linux.alibaba.com>
>> ---
>> include/linux/huge_mm.h    | 18 ++++++++++++++++--
>> include/linux/memcontrol.h |  2 ++
>> mm/memcontrol.c            | 42 ++++++++++++++++++++++++++++++++++++++++++
>> mm/shmem.c                 |  4 ++++
>> 4 files changed, 64 insertions(+), 2 deletions(-)
>> 
>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>> index 5aca3d1..fd81479 100644
>> --- a/include/linux/huge_mm.h
>> +++ b/include/linux/huge_mm.h
>> @@ -91,6 +91,16 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
>> 
>> extern unsigned long transparent_hugepage_flags;
>> 
>> +#ifdef CONFIG_MEMCG
>> +extern bool memcg_transparent_hugepage_disabled(struct vm_area_struct *vma);
>> +#else
>> +static inline bool
>> +memcg_transparent_hugepage_disabled(struct vm_area_struct *vma)
>> +{
>> +	return false;
>> +}
>> +#endif
>> +
>> /*
>>  * to be used on vmas which are known to support THP.
>>  * Use transparent_hugepage_enabled otherwise
>> @@ -106,8 +116,6 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
>> 	if (test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
>> 		return false;
>> 
>> -	if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG))
>> -		return true;
>> 	/*
>> 	 * For dax vmas, try to always use hugepage mappings. If the kernel does
>> 	 * not support hugepages, fsdax mappings will fallback to PAGE_SIZE
>> @@ -117,6 +125,12 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
>> 	if (vma_is_dax(vma))
>> 		return true;
>> 
>> +	if (memcg_transparent_hugepage_disabled(vma))
>> +		return false;
>> +
>> +	if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG))
>> +		return true;
>> +
>> 	if (transparent_hugepage_flags &
>> 				(1 << TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG))
>> 		return !!(vma->vm_flags & VM_HUGEPAGE);
>> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
>> index a7a0a1a5..abc3142 100644
>> --- a/include/linux/memcontrol.h
>> +++ b/include/linux/memcontrol.h
>> @@ -320,6 +320,8 @@ struct mem_cgroup {
>> 
>> #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> 	struct deferred_split deferred_split_queue;
>> +
>> +	bool transparent_hugepage_disabled;
>> #endif
>> 
>> 	struct mem_cgroup_per_node *nodeinfo[0];
>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>> index 7a4bd8b..b6d91b6 100644
>> --- a/mm/memcontrol.c
>> +++ b/mm/memcontrol.c
>> @@ -5011,6 +5011,14 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
>> 	if (parent) {
>> 		memcg->swappiness = mem_cgroup_swappiness(parent);
>> 		memcg->oom_kill_disable = parent->oom_kill_disable;
>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> +		memcg->transparent_hugepage_disabled
>> +			= parent->transparent_hugepage_disabled;
>> +#endif
>> +	} else {
>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> +		memcg->transparent_hugepage_disabled = false;
>> +#endif
>> 	}
>> 	if (parent && parent->use_hierarchy) {
>> 		memcg->use_hierarchy = true;
>> @@ -6126,6 +6134,24 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
>> 	return nbytes;
>> }
>> 
>> +static u64 transparent_hugepage_disabled_read(struct cgroup_subsys_state *css,
>> +					      struct cftype *cft)
>> +{
>> +	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
>> +
>> +	return memcg->transparent_hugepage_disabled;
>> +}
>> +
>> +static int transparent_hugepage_disabled_write(struct cgroup_subsys_state *css,
>> +					       struct cftype *cft, u64 val)
>> +{
>> +	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
>> +
>> +	memcg->transparent_hugepage_disabled = !!val;
>> +
>> +	return 0;
>> +}
>> +
>> static struct cftype memory_files[] = {
>> 	{
>> 		.name = "current",
>> @@ -6179,6 +6205,12 @@ static struct cftype memory_files[] = {
>> 		.seq_show = memory_oom_group_show,
>> 		.write = memory_oom_group_write,
>> 	},
>> +	{
>> +		.name = "transparent_hugepage_disabled",
>> +		.flags = CFTYPE_NOT_ON_ROOT,
>> +		.read_u64 = transparent_hugepage_disabled_read,
>> +		.write_u64 = transparent_hugepage_disabled_write,
>> +	},
>> 	{ }	/* terminate */
>> };
>> 
>> @@ -6787,6 +6819,16 @@ void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
>> 	refill_stock(memcg, nr_pages);
>> }
>> 
>> +bool memcg_transparent_hugepage_disabled(struct vm_area_struct *vma)
>> +{
>> +	struct mem_cgroup *memcg = get_mem_cgroup_from_mm(vma->vm_mm);
>> +
>> +	if (memcg && memcg->transparent_hugepage_disabled)
>> +		return true;
>> +
>> +	return false;
>> +}
>> +
>> static int __init cgroup_memory(char *s)
>> {
>> 	char *token;
>> diff --git a/mm/shmem.c b/mm/shmem.c
>> index aad3ba7..253b63b 100644
>> --- a/mm/shmem.c
>> +++ b/mm/shmem.c
>> @@ -1810,6 +1810,10 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
>> 		goto alloc_nohuge;
>> 	if (shmem_huge == SHMEM_HUGE_DENY || sgp_huge == SGP_NOHUGE)
>> 		goto alloc_nohuge;
>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> +	if (memcg_transparent_hugepage_disabled(vma))
>> +		goto alloc_nohuge;
>> +#endif
>> 	if (shmem_huge == SHMEM_HUGE_FORCE)
>> 		goto alloc_huge;
>> 	switch (sbinfo->huge) {
>> -- 
>> 2.7.4
> 
> -- 
> Michal Hocko
> SUSE Labs



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] mm, memcg: Add memory.transparent_hugepage_disabled
  2020-03-24 12:30   ` teawater
@ 2020-03-24 13:17     ` Michal Hocko
  0 siblings, 0 replies; 7+ messages in thread
From: Michal Hocko @ 2020-03-24 13:17 UTC (permalink / raw)
  To: teawater
  Cc: Hui Zhu, Johannes Weiner, Vladimir Davydov, Andrew Morton, hughd,
	yang.shi, kirill, dan.j.williams, aneesh.kumar,
	sean.j.christopherson, thellstrom, guro, shakeelb, chris, tj,
	tglx, linux-kernel, cgroups, linux-mm

On Tue 24-03-20 20:30:32, teawater wrote:
> 
> 
> > 2020年3月24日 19:00,Michal Hocko <mhocko@kernel.org> 写道:
> > 
> > On Tue 24-03-20 18:31:56, Hui Zhu wrote:
> >> /sys/kernel/mm/transparent_hugepage/enabled is the only interface to
> >> control if the application can use THP in system level.
> >> Sometime, we would not want an application use THP even if
> >> transparent_hugepage/enabled is set to "always" or "madvise" because
> >> thp may need more cpu and memory resources in some cases.
> > 
> > Could you specify that sometime by a real usecase in the memcg context
> > please?
> 
> 
> Thanks for your review.
> 
> We use thp+balloon to supply more memory flexibility for vm.
> https://lore.kernel.org/linux-mm/1584893097-12317-1-git-send-email-teawater@gmail.com/
> This is another thread that I am working around thp+balloon.
> 
> Other applications are already deployed on these machines.  The
> transparent_hugepage/enabled is set to never because they used to have
> a lot of THP related performance issues.  And some of them may call
> madvise thp with itself.

If they call madvise then they clearly indicate they prefer THP
regardless the cost. So I really fail to see what memcg specific tuning
brings in.

Could you be more specific about the usecase that cannot work with the
existing THP tuning?
-- 
Michal Hocko
SUSE Labs


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] mm, memcg: Add memory.transparent_hugepage_disabled
  2020-03-24 10:31 [PATCH] mm, memcg: Add memory.transparent_hugepage_disabled Hui Zhu
  2020-03-24 11:00 ` Michal Hocko
  2020-03-24 12:17 ` Kirill A. Shutemov
@ 2020-03-24 13:23 ` Chris Down
  2 siblings, 0 replies; 7+ messages in thread
From: Chris Down @ 2020-03-24 13:23 UTC (permalink / raw)
  To: Hui Zhu
  Cc: hannes, mhocko, vdavydov.dev, akpm, hughd, yang.shi, kirill,
	dan.j.williams, aneesh.kumar, sean.j.christopherson, thellstrom,
	guro, shakeelb, tj, tglx, linux-kernel, cgroups, linux-mm,
	Hui Zhu

Hui Zhu writes:
>/sys/kernel/mm/transparent_hugepage/enabled is the only interface to
>control if the application can use THP in system level.
>Sometime, we would not want an application use THP even if
>transparent_hugepage/enabled is set to "always" or "madvise" because
>thp may need more cpu and memory resources in some cases.
>
>This commit add a new interface memory.transparent_hugepage_disabled
>in memcg.
>When it set to 1, the application inside the cgroup cannot use THP
>except dax.

I'm against this patch even in the abstract -- this adds an extremely niche use 
case to a general interface, and there are plenty of ways to already indicate 
THP preference.

Perhaps there is some new interface desirable for your use case, but adding a 
new global file to memcg certainly isn't it.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] mm, memcg: Add memory.transparent_hugepage_disabled
  2020-03-24 12:17 ` Kirill A. Shutemov
@ 2020-03-24 19:17   ` David Rientjes
  0 siblings, 0 replies; 7+ messages in thread
From: David Rientjes @ 2020-03-24 19:17 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Hui Zhu, hannes, mhocko, vdavydov.dev, akpm, hughd, yang.shi,
	dan.j.williams, aneesh.kumar, sean.j.christopherson, thellstrom,
	guro, shakeelb, chris, tj, tglx, linux-kernel, cgroups, linux-mm,
	Hui Zhu

On Tue, 24 Mar 2020, Kirill A. Shutemov wrote:

> On Tue, Mar 24, 2020 at 06:31:56PM +0800, Hui Zhu wrote:
> > /sys/kernel/mm/transparent_hugepage/enabled is the only interface to
> > control if the application can use THP in system level.
> 
> No. We have prctl(PR_SET_THP_DISABLE) too.
> 

Yeah, I think this is the key point since this can be inherited across 
fork and thus you already have the ability to disable thp for a process 
where you cannot even modify the binary to do MADV_NOHUGEPAGE.


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2020-03-24 19:17 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-24 10:31 [PATCH] mm, memcg: Add memory.transparent_hugepage_disabled Hui Zhu
2020-03-24 11:00 ` Michal Hocko
2020-03-24 12:30   ` teawater
2020-03-24 13:17     ` Michal Hocko
2020-03-24 12:17 ` Kirill A. Shutemov
2020-03-24 19:17   ` David Rientjes
2020-03-24 13:23 ` Chris Down

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).