Re: [PATCH] mm/page_alloc: try oom if reclaim is unable to make forward progress

From: kernel test robot <lkp@intel.com>
To: Aaron Tomlin <atomlin@redhat.com>, linux-mm@kvack.org
Cc: kbuild-all@lists.01.org, akpm@linux-foundation.org,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH] mm/page_alloc: try oom if reclaim is unable to make forward progress
Date: Tue, 16 Mar 2021 03:54:31 +0800	[thread overview]
Message-ID: <202103160339.z59TH5v7-lkp@intel.com> (raw)
In-Reply-To: <20210315165837.789593-1-atomlin@redhat.com>

[-- Attachment #1: Type: text/plain, Size: 18187 bytes --]

Hi Aaron,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on hnaz-linux-mm/master]

url:    https://github.com/0day-ci/linux/commits/Aaron-Tomlin/mm-page_alloc-try-oom-if-reclaim-is-unable-to-make-forward-progress/20210316-010203
base:   https://github.com/hnaz/linux-mm master
config: arc-randconfig-r024-20210315 (attached as .config)
compiler: arceb-elf-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/77338aaff2606a7715c832545e79370e849e3b4e
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Aaron-Tomlin/mm-page_alloc-try-oom-if-reclaim-is-unable-to-make-forward-progress/20210316-010203
        git checkout 77338aaff2606a7715c832545e79370e849e3b4e
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=arc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All error/warnings (new ones prefixed by >>):

   mm/page_alloc.c: In function 'should_reclaim_retry':
>> mm/page_alloc.c:4444:3: error: 'result' undeclared (first use in this function)
    4444 |   result false;
         |   ^~~~~~
   mm/page_alloc.c:4444:3: note: each undeclared identifier is reported only once for each function it appears in
>> mm/page_alloc.c:4444:9: error: expected ';' before 'false'
    4444 |   result false;
         |         ^~~~~~
         |         ;
>> mm/page_alloc.c:4447:50: error: expected ';' before 'for'
    4447 |   return unreserve_highatomic_pageblock(ac, true)
         |                                                  ^
         |                                                  ;
   mm/page_alloc.c:4426:18: warning: unused variable 'z' [-Wunused-variable]
    4426 |  struct zoneref *z;
         |                  ^
   mm/page_alloc.c:4425:15: warning: unused variable 'zone' [-Wunused-variable]
    4425 |  struct zone *zone;
         |               ^~~~
   mm/page_alloc.c: In function '__alloc_pages_slowpath':
>> mm/page_alloc.c:4720:11: error: expected ';' before ':' token
    4720 |   goto oom:
         |           ^
         |           ;
>> mm/page_alloc.c:4556:6: warning: variable 'compaction_retries' set but not used [-Wunused-but-set-variable]
    4556 |  int compaction_retries;
         |      ^~~~~~~~~~~~~~~~~~
   mm/page_alloc.c: At top level:
   mm/page_alloc.c:6136:23: warning: no previous prototype for 'memmap_init' [-Wmissing-prototypes]
    6136 | void __meminit __weak memmap_init(unsigned long size, int nid,
         |                       ^~~~~~~~~~~

vim +/result +4444 mm/page_alloc.c

  4409	
  4410	/*
  4411	 * Checks whether it makes sense to retry the reclaim to make a forward progress
  4412	 * for the given allocation request.
  4413	 *
  4414	 * We give up when we either have tried MAX_RECLAIM_RETRIES in a row
  4415	 * without success, or when we couldn't even meet the watermark if we
  4416	 * reclaimed all remaining pages on the LRU lists.
  4417	 *
  4418	 * Returns true if a retry is viable or false to enter the oom path.
  4419	 */
  4420	static inline bool
  4421	should_reclaim_retry(gfp_t gfp_mask, unsigned order,
  4422			     struct alloc_context *ac, int alloc_flags,
  4423			     bool did_some_progress, int *no_progress_loops)
  4424	{
  4425		struct zone *zone;
  4426		struct zoneref *z;
  4427		bool ret = false;
  4428	
  4429		/*
  4430		 * Costly allocations might have made a progress but this doesn't mean
  4431		 * their order will become available due to high fragmentation so
  4432		 * always increment the no progress counter for them
  4433		 */
  4434		if (did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER)
  4435			*no_progress_loops = 0;
  4436		else
  4437			(*no_progress_loops)++;
  4438	
  4439		/*
  4440		 * Make sure we converge to OOM if we cannot make any progress
  4441		 * several times in the row.
  4442		 */
  4443		if (*no_progress_loops > MAX_RECLAIM_RETRIES)
> 4444			result false;
  4445		/* Last chance before OOM, try draining highatomic_reserve once */
  4446		else if (*no_progress_loops == MAX_RECLAIM_RETRIES)
> 4447			return unreserve_highatomic_pageblock(ac, true)
  4448	
  4449		/*
  4450		 * Keep reclaiming pages while there is a chance this will lead
  4451		 * somewhere.  If none of the target zones can satisfy our allocation
  4452		 * request even if all reclaimable pages are considered then we are
  4453		 * screwed and have to go OOM.
  4454		 */
  4455		for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
  4456					ac->highest_zoneidx, ac->nodemask) {
  4457			unsigned long available;
  4458			unsigned long reclaimable;
  4459			unsigned long min_wmark = min_wmark_pages(zone);
  4460			bool wmark;
  4461	
  4462			available = reclaimable = zone_reclaimable_pages(zone);
  4463			available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
  4464	
  4465			/*
  4466			 * Would the allocation succeed if we reclaimed all
  4467			 * reclaimable pages?
  4468			 */
  4469			wmark = __zone_watermark_ok(zone, order, min_wmark,
  4470					ac->highest_zoneidx, alloc_flags, available);
  4471			trace_reclaim_retry_zone(z, order, reclaimable,
  4472					available, min_wmark, *no_progress_loops, wmark);
  4473			if (wmark) {
  4474				/*
  4475				 * If we didn't make any progress and have a lot of
  4476				 * dirty + writeback pages then we should wait for
  4477				 * an IO to complete to slow down the reclaim and
  4478				 * prevent from pre mature OOM
  4479				 */
  4480				if (!did_some_progress) {
  4481					unsigned long write_pending;
  4482	
  4483					write_pending = zone_page_state_snapshot(zone,
  4484								NR_ZONE_WRITE_PENDING);
  4485	
  4486					if (2 * write_pending > reclaimable) {
  4487						congestion_wait(BLK_RW_ASYNC, HZ/10);
  4488						return true;
  4489					}
  4490				}
  4491	
  4492				ret = true;
  4493				goto out;
  4494			}
  4495		}
  4496	
  4497	out:
  4498		/*
  4499		 * Memory allocation/reclaim might be called from a WQ context and the
  4500		 * current implementation of the WQ concurrency control doesn't
  4501		 * recognize that a particular WQ is congested if the worker thread is
  4502		 * looping without ever sleeping. Therefore we have to do a short sleep
  4503		 * here rather than calling cond_resched().
  4504		 */
  4505		if (current->flags & PF_WQ_WORKER)
  4506			schedule_timeout_uninterruptible(1);
  4507		else
  4508			cond_resched();
  4509		return ret;
  4510	}
  4511	
  4512	static inline bool
  4513	check_retry_cpuset(int cpuset_mems_cookie, struct alloc_context *ac)
  4514	{
  4515		/*
  4516		 * It's possible that cpuset's mems_allowed and the nodemask from
  4517		 * mempolicy don't intersect. This should be normally dealt with by
  4518		 * policy_nodemask(), but it's possible to race with cpuset update in
  4519		 * such a way the check therein was true, and then it became false
  4520		 * before we got our cpuset_mems_cookie here.
  4521		 * This assumes that for all allocations, ac->nodemask can come only
  4522		 * from MPOL_BIND mempolicy (whose documented semantics is to be ignored
  4523		 * when it does not intersect with the cpuset restrictions) or the
  4524		 * caller can deal with a violated nodemask.
  4525		 */
  4526		if (cpusets_enabled() && ac->nodemask &&
  4527				!cpuset_nodemask_valid_mems_allowed(ac->nodemask)) {
  4528			ac->nodemask = NULL;
  4529			return true;
  4530		}
  4531	
  4532		/*
  4533		 * When updating a task's mems_allowed or mempolicy nodemask, it is
  4534		 * possible to race with parallel threads in such a way that our
  4535		 * allocation can fail while the mask is being updated. If we are about
  4536		 * to fail, check if the cpuset changed during allocation and if so,
  4537		 * retry.
  4538		 */
  4539		if (read_mems_allowed_retry(cpuset_mems_cookie))
  4540			return true;
  4541	
  4542		return false;
  4543	}
  4544	
  4545	static inline struct page *
  4546	__alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
  4547							struct alloc_context *ac)
  4548	{
  4549		bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM;
  4550		const bool costly_order = order > PAGE_ALLOC_COSTLY_ORDER;
  4551		struct page *page = NULL;
  4552		unsigned int alloc_flags;
  4553		unsigned long did_some_progress;
  4554		enum compact_priority compact_priority;
  4555		enum compact_result compact_result;
> 4556		int compaction_retries;
  4557		int no_progress_loops;
  4558		unsigned int cpuset_mems_cookie;
  4559		int reserve_flags;
  4560	
  4561		/*
  4562		 * We also sanity check to catch abuse of atomic reserves being used by
  4563		 * callers that are not in atomic context.
  4564		 */
  4565		if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
  4566					(__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
  4567			gfp_mask &= ~__GFP_ATOMIC;
  4568	
  4569	retry_cpuset:
  4570		compaction_retries = 0;
  4571		no_progress_loops = 0;
  4572		compact_priority = DEF_COMPACT_PRIORITY;
  4573		cpuset_mems_cookie = read_mems_allowed_begin();
  4574	
  4575		/*
  4576		 * The fast path uses conservative alloc_flags to succeed only until
  4577		 * kswapd needs to be woken up, and to avoid the cost of setting up
  4578		 * alloc_flags precisely. So we do that now.
  4579		 */
  4580		alloc_flags = gfp_to_alloc_flags(gfp_mask);
  4581	
  4582		/*
  4583		 * We need to recalculate the starting point for the zonelist iterator
  4584		 * because we might have used different nodemask in the fast path, or
  4585		 * there was a cpuset modification and we are retrying - otherwise we
  4586		 * could end up iterating over non-eligible zones endlessly.
  4587		 */
  4588		ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
  4589						ac->highest_zoneidx, ac->nodemask);
  4590		if (!ac->preferred_zoneref->zone)
  4591			goto nopage;
  4592	
  4593		if (alloc_flags & ALLOC_KSWAPD)
  4594			wake_all_kswapds(order, gfp_mask, ac);
  4595	
  4596		/*
  4597		 * The adjusted alloc_flags might result in immediate success, so try
  4598		 * that first
  4599		 */
  4600		page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
  4601		if (page)
  4602			goto got_pg;
  4603	
  4604		/*
  4605		 * For costly allocations, try direct compaction first, as it's likely
  4606		 * that we have enough base pages and don't need to reclaim. For non-
  4607		 * movable high-order allocations, do that as well, as compaction will
  4608		 * try prevent permanent fragmentation by migrating from blocks of the
  4609		 * same migratetype.
  4610		 * Don't try this for allocations that are allowed to ignore
  4611		 * watermarks, as the ALLOC_NO_WATERMARKS attempt didn't yet happen.
  4612		 */
  4613		if (can_direct_reclaim &&
  4614				(costly_order ||
  4615				   (order > 0 && ac->migratetype != MIGRATE_MOVABLE))
  4616				&& !gfp_pfmemalloc_allowed(gfp_mask)) {
  4617			page = __alloc_pages_direct_compact(gfp_mask, order,
  4618							alloc_flags, ac,
  4619							INIT_COMPACT_PRIORITY,
  4620							&compact_result);
  4621			if (page)
  4622				goto got_pg;
  4623	
  4624			/*
  4625			 * Checks for costly allocations with __GFP_NORETRY, which
  4626			 * includes some THP page fault allocations
  4627			 */
  4628			if (costly_order && (gfp_mask & __GFP_NORETRY)) {
  4629				/*
  4630				 * If allocating entire pageblock(s) and compaction
  4631				 * failed because all zones are below low watermarks
  4632				 * or is prohibited because it recently failed at this
  4633				 * order, fail immediately unless the allocator has
  4634				 * requested compaction and reclaim retry.
  4635				 *
  4636				 * Reclaim is
  4637				 *  - potentially very expensive because zones are far
  4638				 *    below their low watermarks or this is part of very
  4639				 *    bursty high order allocations,
  4640				 *  - not guaranteed to help because isolate_freepages()
  4641				 *    may not iterate over freed pages as part of its
  4642				 *    linear scan, and
  4643				 *  - unlikely to make entire pageblocks free on its
  4644				 *    own.
  4645				 */
  4646				if (compact_result == COMPACT_SKIPPED ||
  4647				    compact_result == COMPACT_DEFERRED)
  4648					goto nopage;
  4649	
  4650				/*
  4651				 * Looks like reclaim/compaction is worth trying, but
  4652				 * sync compaction could be very expensive, so keep
  4653				 * using async compaction.
  4654				 */
  4655				compact_priority = INIT_COMPACT_PRIORITY;
  4656			}
  4657		}
  4658	
  4659	retry:
  4660		/* Ensure kswapd doesn't accidentally go to sleep as long as we loop */
  4661		if (alloc_flags & ALLOC_KSWAPD)
  4662			wake_all_kswapds(order, gfp_mask, ac);
  4663	
  4664		reserve_flags = __gfp_pfmemalloc_flags(gfp_mask);
  4665		if (reserve_flags)
  4666			alloc_flags = current_alloc_flags(gfp_mask, reserve_flags);
  4667	
  4668		/*
  4669		 * Reset the nodemask and zonelist iterators if memory policies can be
  4670		 * ignored. These allocations are high priority and system rather than
  4671		 * user oriented.
  4672		 */
  4673		if (!(alloc_flags & ALLOC_CPUSET) || reserve_flags) {
  4674			ac->nodemask = NULL;
  4675			ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
  4676						ac->highest_zoneidx, ac->nodemask);
  4677		}
  4678	
  4679		/* Attempt with potentially adjusted zonelist and alloc_flags */
  4680		page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
  4681		if (page)
  4682			goto got_pg;
  4683	
  4684		/* Caller is not willing to reclaim, we can't balance anything */
  4685		if (!can_direct_reclaim)
  4686			goto nopage;
  4687	
  4688		/* Avoid recursion of direct reclaim */
  4689		if (current->flags & PF_MEMALLOC)
  4690			goto nopage;
  4691	
  4692		/* Try direct reclaim and then allocating */
  4693		page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac,
  4694								&did_some_progress);
  4695		if (page)
  4696			goto got_pg;
  4697	
  4698		/* Try direct compaction and then allocating */
  4699		page = __alloc_pages_direct_compact(gfp_mask, order, alloc_flags, ac,
  4700						compact_priority, &compact_result);
  4701		if (page)
  4702			goto got_pg;
  4703	
  4704		/* Do not loop if specifically requested */
  4705		if (gfp_mask & __GFP_NORETRY)
  4706			goto nopage;
  4707	
  4708		/*
  4709		 * Do not retry costly high order allocations unless they are
  4710		 * __GFP_RETRY_MAYFAIL
  4711		 */
  4712		if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL))
  4713			goto nopage;
  4714	
  4715		if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
  4716					 did_some_progress > 0, &no_progress_loops))
  4717			goto retry;
  4718	
  4719		if (should_try_oom(no_progress_loops, compact_result))
  4720			goto oom:
  4721		/*
  4722		 * It doesn't make any sense to retry for the compaction if the order-0
  4723		 * reclaim is not able to make any progress because the current
  4724		 * implementation of the compaction depends on the sufficient amount
  4725		 * of free memory (see __compaction_suitable)
  4726		 */
  4727		if (did_some_progress > 0 &&
  4728				should_compact_retry(ac, order, alloc_flags,
  4729					compact_result, &compact_priority,
  4730					&compaction_retries))
  4731			goto retry;
  4732	
  4733	
  4734		/* Deal with possible cpuset update races before we start OOM killing */
  4735		if (check_retry_cpuset(cpuset_mems_cookie, ac))
  4736			goto retry_cpuset;
  4737	
  4738	oom:
  4739		/* Reclaim has failed us, start killing things */
  4740		page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress);
  4741		if (page)
  4742			goto got_pg;
  4743	
  4744		/* Avoid allocations with no watermarks from looping endlessly */
  4745		if (tsk_is_oom_victim(current) &&
  4746		    (alloc_flags & ALLOC_OOM ||
  4747		     (gfp_mask & __GFP_NOMEMALLOC)))
  4748			goto nopage;
  4749	
  4750		/* Retry as long as the OOM killer is making progress */
  4751		if (did_some_progress) {
  4752			no_progress_loops = 0;
  4753			goto retry;
  4754		}
  4755	
  4756	nopage:
  4757		/* Deal with possible cpuset update races before we fail */
  4758		if (check_retry_cpuset(cpuset_mems_cookie, ac))
  4759			goto retry_cpuset;
  4760	
  4761		/*
  4762		 * Make sure that __GFP_NOFAIL request doesn't leak out and make sure
  4763		 * we always retry
  4764		 */
  4765		if (gfp_mask & __GFP_NOFAIL) {
  4766			/*
  4767			 * All existing users of the __GFP_NOFAIL are blockable, so warn
  4768			 * of any new users that actually require GFP_NOWAIT
  4769			 */
  4770			if (WARN_ON_ONCE(!can_direct_reclaim))
  4771				goto fail;
  4772	
  4773			/*
  4774			 * PF_MEMALLOC request from this context is rather bizarre
  4775			 * because we cannot reclaim anything and only can loop waiting
  4776			 * for somebody to do a work for us
  4777			 */
  4778			WARN_ON_ONCE(current->flags & PF_MEMALLOC);
  4779	
  4780			/*
  4781			 * non failing costly orders are a hard requirement which we
  4782			 * are not prepared for much so let's warn about these users
  4783			 * so that we can identify them and convert them to something
  4784			 * else.
  4785			 */
  4786			WARN_ON_ONCE(order > PAGE_ALLOC_COSTLY_ORDER);
  4787	
  4788			/*
  4789			 * Help non-failing allocations by giving them access to memory
  4790			 * reserves but do not use ALLOC_NO_WATERMARKS because this
  4791			 * could deplete whole memory reserves which would just make
  4792			 * the situation worse
  4793			 */
  4794			page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_HARDER, ac);
  4795			if (page)
  4796				goto got_pg;
  4797	
  4798			cond_resched();
  4799			goto retry;
  4800		}
  4801	fail:
  4802		warn_alloc(gfp_mask, ac->nodemask,
  4803				"page allocation failure: order:%u", order);
  4804	got_pg:
  4805		return page;
  4806	}
  4807	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 22374 bytes --]