linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] mm: optimize memory allocation
@ 2021-04-12  7:49 ultrachin
  2021-04-12 11:28 ` kernel test robot
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: ultrachin @ 2021-04-12  7:49 UTC (permalink / raw)
  To: linux-kernel
  Cc: akpm, hannes, mhocko, vdavydov.dev, linux-mm, cgroups,
	Chen Xiaoguang, Chen He

From: Chen Xiaoguang <xiaoggchen@tencent.com>

Check memory cgroup limit before allocating real memory. This may
improve performance especially in slow path when memory allocation
exceeds cgroup limitation.

Signed-off-by: Chen Xiaoguang <xiaoggchen@tencent.com>
Signed-off-by: Chen He <heddchen@tencent.com>
---
 include/linux/memcontrol.h | 30 ++++++++++++++++++++++--------
 mm/memcontrol.c            | 34 ++++++++++++++++------------------
 mm/page_alloc.c            | 24 +++++++++++++++++-------
 3 files changed, 55 insertions(+), 33 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0c04d39..59bb3ba 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1583,8 +1583,9 @@ static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
 #endif
 
 #ifdef CONFIG_MEMCG_KMEM
-int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order);
-void __memcg_kmem_uncharge_page(struct page *page, int order);
+int __memcg_kmem_charge_page(struct mem_cgroup *memcg, gfp_t gfp, int order);
+void __memcg_kmem_uncharge_page(struct page *page, int order,
+				struct mem_cgroup *memcg);
 
 struct obj_cgroup *get_obj_cgroup_from_current(void);
 
@@ -1610,18 +1611,30 @@ static inline bool memcg_kmem_enabled(void)
 	return static_branch_likely(&memcg_kmem_enabled_key);
 }
 
+extern struct mem_cgroup *get_mem_cgroup_from_current(void);
+
 static inline int memcg_kmem_charge_page(struct page *page, gfp_t gfp,
 					 int order)
 {
-	if (memcg_kmem_enabled())
-		return __memcg_kmem_charge_page(page, gfp, order);
-	return 0;
+	struct mem_cgroup *memcg;
+	int ret = 0;
+
+	memcg = get_mem_cgroup_from_current();
+	if (memcg && memcg_kmem_enabled() && !mem_cgroup_is_root(memcg)) {
+		ret = __memcg_kmem_charge_page(memcg, gfp, order);
+		if (!ret) {
+			page->memcg_data = (unsigned long)memcg | MEMCG_DATA_KMEM;
+			return 0;
+		}
+		css_put(&memcg->css);
+	}
+	return ret;
 }
 
 static inline void memcg_kmem_uncharge_page(struct page *page, int order)
 {
 	if (memcg_kmem_enabled())
-		__memcg_kmem_uncharge_page(page, order);
+		__memcg_kmem_uncharge_page(page, order, NULL);
 }
 
 /*
@@ -1647,13 +1660,14 @@ static inline void memcg_kmem_uncharge_page(struct page *page, int order)
 {
 }
 
-static inline int __memcg_kmem_charge_page(struct page *page, gfp_t gfp,
+static inline int __memcg_kmem_charge_page(struct mem_cgroup *memcg, gfp_t gfp,
 					   int order)
 {
 	return 0;
 }
 
-static inline void __memcg_kmem_uncharge_page(struct page *page, int order)
+static inline void __memcg_kmem_uncharge_page(struct page *page, int order,
+					    struct mem_cgroup *memcg)
 {
 }
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e064ac0d..8df57b7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1085,7 +1085,7 @@ static __always_inline bool memcg_kmem_bypass(void)
 /**
  * If active memcg is set, do not fallback to current->mm->memcg.
  */
-static __always_inline struct mem_cgroup *get_mem_cgroup_from_current(void)
+struct mem_cgroup *get_mem_cgroup_from_current(void)
 {
 	if (memcg_kmem_bypass())
 		return NULL;
@@ -3113,21 +3113,11 @@ static void __memcg_kmem_uncharge(struct mem_cgroup *memcg, unsigned int nr_page
  *
  * Returns 0 on success, an error code on failure.
  */
-int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
+int __memcg_kmem_charge_page(struct mem_cgroup *memcg, gfp_t gfp, int order)
 {
-	struct mem_cgroup *memcg;
-	int ret = 0;
+	int ret;
 
-	memcg = get_mem_cgroup_from_current();
-	if (memcg && !mem_cgroup_is_root(memcg)) {
-		ret = __memcg_kmem_charge(memcg, gfp, 1 << order);
-		if (!ret) {
-			page->memcg_data = (unsigned long)memcg |
-				MEMCG_DATA_KMEM;
-			return 0;
-		}
-		css_put(&memcg->css);
-	}
+	ret = __memcg_kmem_charge(memcg, gfp, 1 << order);
 	return ret;
 }
 
@@ -3136,17 +3126,25 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
  * @page: page to uncharge
  * @order: allocation order
  */
-void __memcg_kmem_uncharge_page(struct page *page, int order)
+void __memcg_kmem_uncharge_page(struct page *page, int order,
+		struct mem_cgroup *memcg_in)
 {
-	struct mem_cgroup *memcg = page_memcg(page);
+	struct mem_cgroup *memcg;
 	unsigned int nr_pages = 1 << order;
 
+	if (page)
+		memcg = page_memcg(page);
+	else
+		memcg = memcg_in;
+
 	if (!memcg)
 		return;
 
-	VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page);
+	if (page)
+		VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page);
 	__memcg_kmem_uncharge(memcg, nr_pages);
-	page->memcg_data = 0;
+	if (page)
+		page->memcg_data = 0;
 	css_put(&memcg->css);
 }
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cfc7287..c3d1d0c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1230,7 +1230,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
 		 * Untie memcg state and reset page's owner
 		 */
 		if (memcg_kmem_enabled() && PageMemcgKmem(page))
-			__memcg_kmem_uncharge_page(page, order);
+			__memcg_kmem_uncharge_page(page, order, NULL);
 		reset_page_owner(page, order);
 		return false;
 	}
@@ -1260,7 +1260,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
 	if (PageMappingFlags(page))
 		page->mapping = NULL;
 	if (memcg_kmem_enabled() && PageMemcgKmem(page))
-		__memcg_kmem_uncharge_page(page, order);
+		__memcg_kmem_uncharge_page(page, order, NULL);
 	if (check_free)
 		bad += check_free_page(page);
 	if (bad)
@@ -4976,6 +4976,8 @@ struct page *
 	unsigned int alloc_flags = ALLOC_WMARK_LOW;
 	gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */
 	struct alloc_context ac = { };
+	struct mem_cgroup *memcg;
+	bool charged = false;
 
 	/*
 	 * There are several places where we assume that the order value is sane
@@ -4987,6 +4989,15 @@ struct page *
 	}
 
 	gfp_mask &= gfp_allowed_mask;
+	memcg = get_mem_cgroup_from_current();
+	if (memcg && memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) &&
+	    !mem_cgroup_is_root(memcg)) {
+		if (unlikely(__memcg_kmem_charge_page(memcg, gfp_mask, order) != 0)) {
+			css_put(&memcg->css);
+			return NULL;
+		}
+		charged = true;
+	}
 	alloc_mask = gfp_mask;
 	if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags))
 		return NULL;
@@ -5020,11 +5031,10 @@ struct page *
 	page = __alloc_pages_slowpath(alloc_mask, order, &ac);
 
 out:
-	if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page &&
-	    unlikely(__memcg_kmem_charge_page(page, gfp_mask, order) != 0)) {
-		__free_pages(page, order);
-		page = NULL;
-	}
+	if (page && charged)
+		page->memcg_data = (unsigned long)memcg | MEMCG_DATA_KMEM;
+	else if (charged)
+		__memcg_kmem_uncharge_page(NULL, order, memcg);
 
 	trace_mm_page_alloc(page, order, alloc_mask, ac.migratetype);
 
-- 
1.8.3.1



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] mm: optimize memory allocation
  2021-04-12  7:49 [PATCH] mm: optimize memory allocation ultrachin
@ 2021-04-12 11:28 ` kernel test robot
  2021-04-12 11:36 ` kernel test robot
  2021-04-13  6:56 ` Michal Hocko
  2 siblings, 0 replies; 4+ messages in thread
From: kernel test robot @ 2021-04-12 11:28 UTC (permalink / raw)
  To: ultrachin, linux-kernel
  Cc: kbuild-all, akpm, hannes, mhocko, vdavydov.dev, linux-mm,
	cgroups, Chen Xiaoguang, Chen He

[-- Attachment #1: Type: text/plain, Size: 6033 bytes --]

Hi,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linux/master]
[also build test ERROR on linus/master v5.12-rc7]
[cannot apply to hnaz-linux-mm/master next-20210409]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/ultrachin-163-com/mm-optimize-memory-allocation/20210412-155259
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 5e46d1b78a03d52306f21f77a4e4a144b6d31486
config: openrisc-randconfig-r032-20210412 (attached as .config)
compiler: or1k-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/6994280da115271cf4083439e5d4dcdb3ce00720
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review ultrachin-163-com/mm-optimize-memory-allocation/20210412-155259
        git checkout 6994280da115271cf4083439e5d4dcdb3ce00720
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=openrisc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   mm/page_alloc.c:3605:15: warning: no previous prototype for 'should_fail_alloc_page' [-Wmissing-prototypes]
    3605 | noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
         |               ^~~~~~~~~~~~~~~~~~~~~~
   mm/page_alloc.c: In function '__alloc_pages_nodemask':
   mm/page_alloc.c:4992:10: error: implicit declaration of function 'get_mem_cgroup_from_current'; did you mean 'get_mem_cgroup_from_mm'? [-Werror=implicit-function-declaration]
    4992 |  memcg = get_mem_cgroup_from_current();
         |          ^~~~~~~~~~~~~~~~~~~~~~~~~~~
         |          get_mem_cgroup_from_mm
   mm/page_alloc.c:4992:8: warning: assignment to 'struct mem_cgroup *' from 'int' makes pointer from integer without a cast [-Wint-conversion]
    4992 |  memcg = get_mem_cgroup_from_current();
         |        ^
>> mm/page_alloc.c:4996:18: error: dereferencing pointer to incomplete type 'struct mem_cgroup'
    4996 |    css_put(&memcg->css);
         |                  ^~
>> mm/page_alloc.c:5035:7: error: 'struct page' has no member named 'memcg_data'
    5035 |   page->memcg_data = (unsigned long)memcg | MEMCG_DATA_KMEM;
         |       ^~
>> mm/page_alloc.c:5035:45: error: 'MEMCG_DATA_KMEM' undeclared (first use in this function)
    5035 |   page->memcg_data = (unsigned long)memcg | MEMCG_DATA_KMEM;
         |                                             ^~~~~~~~~~~~~~~
   mm/page_alloc.c:5035:45: note: each undeclared identifier is reported only once for each function it appears in
   cc1: some warnings being treated as errors


vim +4996 mm/page_alloc.c

  4967	
  4968	/*
  4969	 * This is the 'heart' of the zoned buddy allocator.
  4970	 */
  4971	struct page *
  4972	__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
  4973								nodemask_t *nodemask)
  4974	{
  4975		struct page *page;
  4976		unsigned int alloc_flags = ALLOC_WMARK_LOW;
  4977		gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */
  4978		struct alloc_context ac = { };
  4979		struct mem_cgroup *memcg;
  4980		bool charged = false;
  4981	
  4982		/*
  4983		 * There are several places where we assume that the order value is sane
  4984		 * so bail out early if the request is out of bound.
  4985		 */
  4986		if (unlikely(order >= MAX_ORDER)) {
  4987			WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN));
  4988			return NULL;
  4989		}
  4990	
  4991		gfp_mask &= gfp_allowed_mask;
> 4992		memcg = get_mem_cgroup_from_current();
  4993		if (memcg && memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) &&
  4994		    !mem_cgroup_is_root(memcg)) {
  4995			if (unlikely(__memcg_kmem_charge_page(memcg, gfp_mask, order) != 0)) {
> 4996				css_put(&memcg->css);
  4997				return NULL;
  4998			}
  4999			charged = true;
  5000		}
  5001		alloc_mask = gfp_mask;
  5002		if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags))
  5003			return NULL;
  5004	
  5005		/*
  5006		 * Forbid the first pass from falling back to types that fragment
  5007		 * memory until all local zones are considered.
  5008		 */
  5009		alloc_flags |= alloc_flags_nofragment(ac.preferred_zoneref->zone, gfp_mask);
  5010	
  5011		/* First allocation attempt */
  5012		page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac);
  5013		if (likely(page))
  5014			goto out;
  5015	
  5016		/*
  5017		 * Apply scoped allocation constraints. This is mainly about GFP_NOFS
  5018		 * resp. GFP_NOIO which has to be inherited for all allocation requests
  5019		 * from a particular context which has been marked by
  5020		 * memalloc_no{fs,io}_{save,restore}.
  5021		 */
  5022		alloc_mask = current_gfp_context(gfp_mask);
  5023		ac.spread_dirty_pages = false;
  5024	
  5025		/*
  5026		 * Restore the original nodemask if it was potentially replaced with
  5027		 * &cpuset_current_mems_allowed to optimize the fast-path attempt.
  5028		 */
  5029		ac.nodemask = nodemask;
  5030	
  5031		page = __alloc_pages_slowpath(alloc_mask, order, &ac);
  5032	
  5033	out:
  5034		if (page && charged)
> 5035			page->memcg_data = (unsigned long)memcg | MEMCG_DATA_KMEM;
  5036		else if (charged)
  5037			__memcg_kmem_uncharge_page(NULL, order, memcg);
  5038	
  5039		trace_mm_page_alloc(page, order, alloc_mask, ac.migratetype);
  5040	
  5041		return page;
  5042	}
  5043	EXPORT_SYMBOL(__alloc_pages_nodemask);
  5044	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 21562 bytes --]

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] mm: optimize memory allocation
  2021-04-12  7:49 [PATCH] mm: optimize memory allocation ultrachin
  2021-04-12 11:28 ` kernel test robot
@ 2021-04-12 11:36 ` kernel test robot
  2021-04-13  6:56 ` Michal Hocko
  2 siblings, 0 replies; 4+ messages in thread
From: kernel test robot @ 2021-04-12 11:36 UTC (permalink / raw)
  To: ultrachin, linux-kernel
  Cc: kbuild-all, akpm, hannes, mhocko, vdavydov.dev, linux-mm,
	cgroups, Chen Xiaoguang, Chen He

[-- Attachment #1: Type: text/plain, Size: 5396 bytes --]

Hi,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linux/master]
[also build test ERROR on linus/master v5.12-rc7]
[cannot apply to hnaz-linux-mm/master next-20210409]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/ultrachin-163-com/mm-optimize-memory-allocation/20210412-155259
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 5e46d1b78a03d52306f21f77a4e4a144b6d31486
config: i386-randconfig-r036-20210412 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
        # https://github.com/0day-ci/linux/commit/6994280da115271cf4083439e5d4dcdb3ce00720
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review ultrachin-163-com/mm-optimize-memory-allocation/20210412-155259
        git checkout 6994280da115271cf4083439e5d4dcdb3ce00720
        # save the attached .config to linux build tree
        make W=1 ARCH=i386 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All error/warnings (new ones prefixed by >>):

   mm/page_alloc.c:3605:15: warning: no previous prototype for 'should_fail_alloc_page' [-Wmissing-prototypes]
    3605 | noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
         |               ^~~~~~~~~~~~~~~~~~~~~~
   mm/page_alloc.c: In function '__alloc_pages_nodemask':
>> mm/page_alloc.c:4992:10: error: implicit declaration of function 'get_mem_cgroup_from_current'; did you mean 'get_mem_cgroup_from_mm'? [-Werror=implicit-function-declaration]
    4992 |  memcg = get_mem_cgroup_from_current();
         |          ^~~~~~~~~~~~~~~~~~~~~~~~~~~
         |          get_mem_cgroup_from_mm
>> mm/page_alloc.c:4992:8: warning: assignment to 'struct mem_cgroup *' from 'int' makes pointer from integer without a cast [-Wint-conversion]
    4992 |  memcg = get_mem_cgroup_from_current();
         |        ^
   cc1: some warnings being treated as errors
--
>> mm/memcontrol.c:1088:20: warning: no previous prototype for 'get_mem_cgroup_from_current' [-Wmissing-prototypes]
    1088 | struct mem_cgroup *get_mem_cgroup_from_current(void)
         |                    ^~~~~~~~~~~~~~~~~~~~~~~~~~~


vim +4992 mm/page_alloc.c

  4967	
  4968	/*
  4969	 * This is the 'heart' of the zoned buddy allocator.
  4970	 */
  4971	struct page *
  4972	__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
  4973								nodemask_t *nodemask)
  4974	{
  4975		struct page *page;
  4976		unsigned int alloc_flags = ALLOC_WMARK_LOW;
  4977		gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */
  4978		struct alloc_context ac = { };
  4979		struct mem_cgroup *memcg;
  4980		bool charged = false;
  4981	
  4982		/*
  4983		 * There are several places where we assume that the order value is sane
  4984		 * so bail out early if the request is out of bound.
  4985		 */
  4986		if (unlikely(order >= MAX_ORDER)) {
  4987			WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN));
  4988			return NULL;
  4989		}
  4990	
  4991		gfp_mask &= gfp_allowed_mask;
> 4992		memcg = get_mem_cgroup_from_current();
  4993		if (memcg && memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) &&
  4994		    !mem_cgroup_is_root(memcg)) {
  4995			if (unlikely(__memcg_kmem_charge_page(memcg, gfp_mask, order) != 0)) {
  4996				css_put(&memcg->css);
  4997				return NULL;
  4998			}
  4999			charged = true;
  5000		}
  5001		alloc_mask = gfp_mask;
  5002		if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags))
  5003			return NULL;
  5004	
  5005		/*
  5006		 * Forbid the first pass from falling back to types that fragment
  5007		 * memory until all local zones are considered.
  5008		 */
  5009		alloc_flags |= alloc_flags_nofragment(ac.preferred_zoneref->zone, gfp_mask);
  5010	
  5011		/* First allocation attempt */
  5012		page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac);
  5013		if (likely(page))
  5014			goto out;
  5015	
  5016		/*
  5017		 * Apply scoped allocation constraints. This is mainly about GFP_NOFS
  5018		 * resp. GFP_NOIO which has to be inherited for all allocation requests
  5019		 * from a particular context which has been marked by
  5020		 * memalloc_no{fs,io}_{save,restore}.
  5021		 */
  5022		alloc_mask = current_gfp_context(gfp_mask);
  5023		ac.spread_dirty_pages = false;
  5024	
  5025		/*
  5026		 * Restore the original nodemask if it was potentially replaced with
  5027		 * &cpuset_current_mems_allowed to optimize the fast-path attempt.
  5028		 */
  5029		ac.nodemask = nodemask;
  5030	
  5031		page = __alloc_pages_slowpath(alloc_mask, order, &ac);
  5032	
  5033	out:
  5034		if (page && charged)
  5035			page->memcg_data = (unsigned long)memcg | MEMCG_DATA_KMEM;
  5036		else if (charged)
  5037			__memcg_kmem_uncharge_page(NULL, order, memcg);
  5038	
  5039		trace_mm_page_alloc(page, order, alloc_mask, ac.migratetype);
  5040	
  5041		return page;
  5042	}
  5043	EXPORT_SYMBOL(__alloc_pages_nodemask);
  5044	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 34924 bytes --]

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] mm: optimize memory allocation
  2021-04-12  7:49 [PATCH] mm: optimize memory allocation ultrachin
  2021-04-12 11:28 ` kernel test robot
  2021-04-12 11:36 ` kernel test robot
@ 2021-04-13  6:56 ` Michal Hocko
  2 siblings, 0 replies; 4+ messages in thread
From: Michal Hocko @ 2021-04-13  6:56 UTC (permalink / raw)
  To: ultrachin
  Cc: linux-kernel, akpm, hannes, vdavydov.dev, linux-mm, cgroups,
	Chen Xiaoguang, Chen He

On Mon 12-04-21 15:49:53, ultrachin@163.com wrote:
> From: Chen Xiaoguang <xiaoggchen@tencent.com>
> 
> Check memory cgroup limit before allocating real memory. This may
> improve performance especially in slow path when memory allocation
> exceeds cgroup limitation.

I would be really curious about any actual numbers because I have really
hard times to see scenarios when this would lead to an improvement.
Effectitelly only non-oom allocations would benefit theoretically (e.g.
atomic or GFP_NORETRY etc). All others will trigger the memcg oom killer
to help forward progress.

Besides that I really dislike kmem and LRU pages to be handled
differently so for that reason
Nacked-by: Michal Hocko <mhocko@suse.com>

If the optimization really can be provent then the patch would require
to be much more invasive.

> Signed-off-by: Chen Xiaoguang <xiaoggchen@tencent.com>
> Signed-off-by: Chen He <heddchen@tencent.com>
> ---
>  include/linux/memcontrol.h | 30 ++++++++++++++++++++++--------
>  mm/memcontrol.c            | 34 ++++++++++++++++------------------
>  mm/page_alloc.c            | 24 +++++++++++++++++-------
>  3 files changed, 55 insertions(+), 33 deletions(-)
> 
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 0c04d39..59bb3ba 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -1583,8 +1583,9 @@ static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
>  #endif
>  
>  #ifdef CONFIG_MEMCG_KMEM
> -int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order);
> -void __memcg_kmem_uncharge_page(struct page *page, int order);
> +int __memcg_kmem_charge_page(struct mem_cgroup *memcg, gfp_t gfp, int order);
> +void __memcg_kmem_uncharge_page(struct page *page, int order,
> +				struct mem_cgroup *memcg);
>  
>  struct obj_cgroup *get_obj_cgroup_from_current(void);
>  
> @@ -1610,18 +1611,30 @@ static inline bool memcg_kmem_enabled(void)
>  	return static_branch_likely(&memcg_kmem_enabled_key);
>  }
>  
> +extern struct mem_cgroup *get_mem_cgroup_from_current(void);
> +
>  static inline int memcg_kmem_charge_page(struct page *page, gfp_t gfp,
>  					 int order)
>  {
> -	if (memcg_kmem_enabled())
> -		return __memcg_kmem_charge_page(page, gfp, order);
> -	return 0;
> +	struct mem_cgroup *memcg;
> +	int ret = 0;
> +
> +	memcg = get_mem_cgroup_from_current();
> +	if (memcg && memcg_kmem_enabled() && !mem_cgroup_is_root(memcg)) {
> +		ret = __memcg_kmem_charge_page(memcg, gfp, order);
> +		if (!ret) {
> +			page->memcg_data = (unsigned long)memcg | MEMCG_DATA_KMEM;
> +			return 0;
> +		}
> +		css_put(&memcg->css);
> +	}
> +	return ret;
>  }
>  
>  static inline void memcg_kmem_uncharge_page(struct page *page, int order)
>  {
>  	if (memcg_kmem_enabled())
> -		__memcg_kmem_uncharge_page(page, order);
> +		__memcg_kmem_uncharge_page(page, order, NULL);
>  }
>  
>  /*
> @@ -1647,13 +1660,14 @@ static inline void memcg_kmem_uncharge_page(struct page *page, int order)
>  {
>  }
>  
> -static inline int __memcg_kmem_charge_page(struct page *page, gfp_t gfp,
> +static inline int __memcg_kmem_charge_page(struct mem_cgroup *memcg, gfp_t gfp,
>  					   int order)
>  {
>  	return 0;
>  }
>  
> -static inline void __memcg_kmem_uncharge_page(struct page *page, int order)
> +static inline void __memcg_kmem_uncharge_page(struct page *page, int order,
> +					    struct mem_cgroup *memcg)
>  {
>  }
>  
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index e064ac0d..8df57b7 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1085,7 +1085,7 @@ static __always_inline bool memcg_kmem_bypass(void)
>  /**
>   * If active memcg is set, do not fallback to current->mm->memcg.
>   */
> -static __always_inline struct mem_cgroup *get_mem_cgroup_from_current(void)
> +struct mem_cgroup *get_mem_cgroup_from_current(void)
>  {
>  	if (memcg_kmem_bypass())
>  		return NULL;
> @@ -3113,21 +3113,11 @@ static void __memcg_kmem_uncharge(struct mem_cgroup *memcg, unsigned int nr_page
>   *
>   * Returns 0 on success, an error code on failure.
>   */
> -int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
> +int __memcg_kmem_charge_page(struct mem_cgroup *memcg, gfp_t gfp, int order)
>  {
> -	struct mem_cgroup *memcg;
> -	int ret = 0;
> +	int ret;
>  
> -	memcg = get_mem_cgroup_from_current();
> -	if (memcg && !mem_cgroup_is_root(memcg)) {
> -		ret = __memcg_kmem_charge(memcg, gfp, 1 << order);
> -		if (!ret) {
> -			page->memcg_data = (unsigned long)memcg |
> -				MEMCG_DATA_KMEM;
> -			return 0;
> -		}
> -		css_put(&memcg->css);
> -	}
> +	ret = __memcg_kmem_charge(memcg, gfp, 1 << order);
>  	return ret;
>  }
>  
> @@ -3136,17 +3126,25 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
>   * @page: page to uncharge
>   * @order: allocation order
>   */
> -void __memcg_kmem_uncharge_page(struct page *page, int order)
> +void __memcg_kmem_uncharge_page(struct page *page, int order,
> +		struct mem_cgroup *memcg_in)
>  {
> -	struct mem_cgroup *memcg = page_memcg(page);
> +	struct mem_cgroup *memcg;
>  	unsigned int nr_pages = 1 << order;
>  
> +	if (page)
> +		memcg = page_memcg(page);
> +	else
> +		memcg = memcg_in;
> +
>  	if (!memcg)
>  		return;
>  
> -	VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page);
> +	if (page)
> +		VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page);
>  	__memcg_kmem_uncharge(memcg, nr_pages);
> -	page->memcg_data = 0;
> +	if (page)
> +		page->memcg_data = 0;
>  	css_put(&memcg->css);
>  }
>  
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index cfc7287..c3d1d0c 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -1230,7 +1230,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
>  		 * Untie memcg state and reset page's owner
>  		 */
>  		if (memcg_kmem_enabled() && PageMemcgKmem(page))
> -			__memcg_kmem_uncharge_page(page, order);
> +			__memcg_kmem_uncharge_page(page, order, NULL);
>  		reset_page_owner(page, order);
>  		return false;
>  	}
> @@ -1260,7 +1260,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
>  	if (PageMappingFlags(page))
>  		page->mapping = NULL;
>  	if (memcg_kmem_enabled() && PageMemcgKmem(page))
> -		__memcg_kmem_uncharge_page(page, order);
> +		__memcg_kmem_uncharge_page(page, order, NULL);
>  	if (check_free)
>  		bad += check_free_page(page);
>  	if (bad)
> @@ -4976,6 +4976,8 @@ struct page *
>  	unsigned int alloc_flags = ALLOC_WMARK_LOW;
>  	gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */
>  	struct alloc_context ac = { };
> +	struct mem_cgroup *memcg;
> +	bool charged = false;
>  
>  	/*
>  	 * There are several places where we assume that the order value is sane
> @@ -4987,6 +4989,15 @@ struct page *
>  	}
>  
>  	gfp_mask &= gfp_allowed_mask;
> +	memcg = get_mem_cgroup_from_current();
> +	if (memcg && memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) &&
> +	    !mem_cgroup_is_root(memcg)) {
> +		if (unlikely(__memcg_kmem_charge_page(memcg, gfp_mask, order) != 0)) {
> +			css_put(&memcg->css);
> +			return NULL;
> +		}
> +		charged = true;
> +	}
>  	alloc_mask = gfp_mask;
>  	if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags))
>  		return NULL;
> @@ -5020,11 +5031,10 @@ struct page *
>  	page = __alloc_pages_slowpath(alloc_mask, order, &ac);
>  
>  out:
> -	if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page &&
> -	    unlikely(__memcg_kmem_charge_page(page, gfp_mask, order) != 0)) {
> -		__free_pages(page, order);
> -		page = NULL;
> -	}
> +	if (page && charged)
> +		page->memcg_data = (unsigned long)memcg | MEMCG_DATA_KMEM;
> +	else if (charged)
> +		__memcg_kmem_uncharge_page(NULL, order, memcg);
>  
>  	trace_mm_page_alloc(page, order, alloc_mask, ac.migratetype);
>  
> -- 
> 1.8.3.1
> 

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-04-13  6:56 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-12  7:49 [PATCH] mm: optimize memory allocation ultrachin
2021-04-12 11:28 ` kernel test robot
2021-04-12 11:36 ` kernel test robot
2021-04-13  6:56 ` Michal Hocko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).