linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Vlastimil Babka <vbabka@suse.cz>
To: Stefan Priebe - Profihost AG <s.priebe@profihost.ag>,
	Michal Hocko <mhocko@kernel.org>
Cc: "linux-mm@kvack.org" <linux-mm@kvack.org>,
	l.roehrs@profihost.ag, cgroups@vger.kernel.org,
	Johannes Weiner <hannes@cmpxchg.org>
Subject: Re: lot of MemAvailable but falling cache and raising PSI
Date: Tue, 22 Oct 2019 12:21:24 +0200	[thread overview]
Message-ID: <f533e547-996d-5a05-5b25-92862c8f9057@suse.cz> (raw)
In-Reply-To: <1430bb64-ef9b-f6a1-fb2c-1ca351e7950e@profihost.ag>

On 10/22/19 12:02 PM, Stefan Priebe - Profihost AG wrote:
> 
> Am 22.10.19 um 09:48 schrieb Vlastimil Babka:
>> On 10/22/19 9:41 AM, Stefan Priebe - Profihost AG wrote:
>>>> Hi, could you try the patch below? I suspect you're hitting a corner
>>>> case where compaction_suitable() returns COMPACT_SKIPPED for the
>>>> ZONE_DMA, triggering reclaim even if other zones have plenty of free
>>>> memory. And should_continue_reclaim() then returns true until twice the
>>>> requested page size is reclaimed (compact_gap()). That means 4MB
>>>> reclaimed for each THP allocation attempt, which roughly matches the
>>>> trace data you preovided previously.
>>>>
>>>> The amplification to 4MB should be removed in patches merged for 5.4, so
>>>> it would be only 32 pages reclaimed per THP allocation. The patch below
>>>> tries to remove this corner case completely, and it should be more
>>>> visible on your 5.2.x, so please apply it there.
>>>>
>>> is there any reason to not apply that one on top of 4.19?
>>>
>>> Greets,
>>> Stefan
>>>
>>
>> It should work, cherrypicks fine without conflict here.
> 
> OK but does not work ;-)
> 
> 
> mm/compaction.c: In function '__compaction_suitable':
> mm/compaction.c:1451:19: error: implicit declaration of function
> 'zone_managed_pages'; did you mean 'node_spanned_pages'?
> [-Werror=implicit-function-declaration]
>       alloc_flags, zone_managed_pages(zone)))
>                    ^~~~~~~~~~~~~~~~~~
>                    node_spanned_pages

Ah, this?

----8<----
From f1335e1c0d4b74205fc0cc40b5960223d6f1dec7 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Thu, 12 Sep 2019 13:40:46 +0200
Subject: [PATCH] WIP

---
 include/linux/compaction.h     |  7 ++++++-
 include/trace/events/mmflags.h |  1 +
 mm/compaction.c                | 16 +++++++++++++--
 mm/vmscan.c                    | 36 ++++++++++++++++++++++++----------
 4 files changed, 47 insertions(+), 13 deletions(-)

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 68250a57aace..2f3b331c5239 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -17,8 +17,13 @@ enum compact_priority {
 };
 
 /* Return values for compact_zone() and try_to_compact_pages() */
-/* When adding new states, please adjust include/trace/events/compaction.h */
+/* When adding new states, please adjust include/trace/events/mmflags.h */
 enum compact_result {
+	/*
+	 * The zone is too small to provide the requested allocation even if
+	 * fully freed (i.e. ZONE_DMA for THP allocation due to lowmem reserves)
+	 */
+	COMPACT_IMPOSSIBLE,
 	/* For more detailed tracepoint output - internal to compaction */
 	COMPACT_NOT_SUITABLE_ZONE,
 	/*
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index a81cffb76d89..d7aa9cece234 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -169,6 +169,7 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY,	"softdirty"	)		\
 
 #ifdef CONFIG_COMPACTION
 #define COMPACTION_STATUS					\
+	EM( COMPACT_IMPOSSIBLE,		"impossible")		\
 	EM( COMPACT_SKIPPED,		"skipped")		\
 	EM( COMPACT_DEFERRED,		"deferred")		\
 	EM( COMPACT_CONTINUE,		"continue")		\
diff --git a/mm/compaction.c b/mm/compaction.c
index 5079ddbec8f9..7d2299c7faa2 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1416,6 +1416,7 @@ static enum compact_result compact_finished(struct zone *zone,
 /*
  * compaction_suitable: Is this suitable to run compaction on this zone now?
  * Returns
+ *   COMPACT_IMPOSSIBLE If the allocation would fail even with all pages free
  *   COMPACT_SKIPPED  - If there are too few free pages for compaction
  *   COMPACT_SUCCESS  - If the allocation would succeed without compaction
  *   COMPACT_CONTINUE - If compaction should run now
@@ -1439,6 +1440,16 @@ static enum compact_result __compaction_suitable(struct zone *zone, int order,
 								alloc_flags))
 		return COMPACT_SUCCESS;
 
+	/*
+	 * If the allocation would not succeed even with a fully free zone
+	 * due to e.g. lowmem reserves, indicate that compaction can't possibly
+	 * help and it would be pointless to reclaim.
+	 */
+	watermark += 1UL << order;
+	if (!__zone_watermark_ok(zone, 0, watermark, classzone_idx,
+				 alloc_flags, zone->managed_pages))
+		return COMPACT_IMPOSSIBLE;
+
 	/*
 	 * Watermarks for order-0 must be met for compaction to be able to
 	 * isolate free pages for migration targets. This means that the
@@ -1526,7 +1537,7 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
 		available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
 		compact_result = __compaction_suitable(zone, order, alloc_flags,
 				ac_classzone_idx(ac), available);
-		if (compact_result != COMPACT_SKIPPED)
+		if (compact_result > COMPACT_SKIPPED)
 			return true;
 	}
 
@@ -1555,7 +1566,8 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro
 	ret = compaction_suitable(zone, cc->order, cc->alloc_flags,
 							cc->classzone_idx);
 	/* Compaction is likely to fail */
-	if (ret == COMPACT_SUCCESS || ret == COMPACT_SKIPPED)
+	if (ret == COMPACT_SUCCESS || ret == COMPACT_SKIPPED
+	    || ret == COMPACT_IMPOSSIBLE)
 		return ret;
 
 	/* huh, compaction_suitable is returning something unexpected */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b37610c0eac6..7ad331a64fc5 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2849,11 +2849,12 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 }
 
 /*
- * Returns true if compaction should go ahead for a costly-order request, or
- * the allocation would already succeed without compaction. Return false if we
- * should reclaim first.
+ * Returns 1 if compaction should go ahead for a costly-order request, or the
+ * allocation would already succeed without compaction. Return 0 if we should
+ * reclaim first. Return -1 when compaction can't help at all due to zone being
+ * too small, which means there's no point in reclaim nor compaction.
  */
-static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
+static inline int compaction_ready(struct zone *zone, struct scan_control *sc)
 {
 	unsigned long watermark;
 	enum compact_result suitable;
@@ -2861,10 +2862,16 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
 	suitable = compaction_suitable(zone, sc->order, 0, sc->reclaim_idx);
 	if (suitable == COMPACT_SUCCESS)
 		/* Allocation should succeed already. Don't reclaim. */
-		return true;
+		return 1;
 	if (suitable == COMPACT_SKIPPED)
 		/* Compaction cannot yet proceed. Do reclaim. */
-		return false;
+		return 0;
+	if (suitable == COMPACT_IMPOSSIBLE)
+		/*
+		 * Compaction can't possibly help. So don't reclaim, but keep
+		 * checking other zones.
+		 */
+		return -1;
 
 	/*
 	 * Compaction is already possible, but it takes time to run and there
@@ -2910,6 +2917,7 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 
 	for_each_zone_zonelist_nodemask(zone, z, zonelist,
 					sc->reclaim_idx, sc->nodemask) {
+		int compact_ready;
 		/*
 		 * Take care memory controller reclaiming has small influence
 		 * to global LRU.
@@ -2929,10 +2937,18 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 			 * page allocations.
 			 */
 			if (IS_ENABLED(CONFIG_COMPACTION) &&
-			    sc->order > PAGE_ALLOC_COSTLY_ORDER &&
-			    compaction_ready(zone, sc)) {
-				sc->compaction_ready = true;
-				continue;
+			    sc->order > PAGE_ALLOC_COSTLY_ORDER) {
+				compact_ready = compaction_ready(zone, sc);
+				if (compact_ready == 1) {
+					sc->compaction_ready = true;
+					continue;
+				} else if (compact_ready == -1) {
+					/*
+					 * In this zone, neither reclaim nor
+					 * compaction can help.
+					 */
+					continue;
+				}
 			}
 
 			/*
-- 
2.23.0




  parent reply	other threads:[~2019-10-22 10:21 UTC|newest]

Thread overview: 61+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-09-05 11:27 lot of MemAvailable but falling cache and raising PSI Stefan Priebe - Profihost AG
2019-09-05 11:40 ` Michal Hocko
2019-09-05 11:56   ` Stefan Priebe - Profihost AG
2019-09-05 16:28     ` Yang Shi
2019-09-05 17:26       ` Stefan Priebe - Profihost AG
2019-09-05 18:46         ` Yang Shi
2019-09-05 19:31           ` Stefan Priebe - Profihost AG
2019-09-06 10:08     ` Stefan Priebe - Profihost AG
2019-09-06 10:25       ` Vlastimil Babka
2019-09-06 18:52       ` Yang Shi
2019-09-07  7:32         ` Stefan Priebe - Profihost AG
2019-09-09  8:27       ` Michal Hocko
2019-09-09  8:54         ` Stefan Priebe - Profihost AG
2019-09-09 11:01           ` Michal Hocko
2019-09-09 12:08             ` Michal Hocko
2019-09-09 12:10               ` Stefan Priebe - Profihost AG
2019-09-09 12:28                 ` Michal Hocko
2019-09-09 12:37                   ` Stefan Priebe - Profihost AG
2019-09-09 12:49                     ` Michal Hocko
2019-09-09 12:56                       ` Stefan Priebe - Profihost AG
     [not found]                         ` <52235eda-ffe2-721c-7ad7-575048e2d29d@profihost.ag>
2019-09-10  5:58                           ` Stefan Priebe - Profihost AG
2019-09-10  8:29                           ` Michal Hocko
2019-09-10  8:38                             ` Stefan Priebe - Profihost AG
2019-09-10  9:02                               ` Michal Hocko
2019-09-10  9:37                                 ` Stefan Priebe - Profihost AG
2019-09-10 11:07                                   ` Michal Hocko
2019-09-10 12:45                                     ` Stefan Priebe - Profihost AG
2019-09-10 12:57                                       ` Michal Hocko
2019-09-10 13:05                                         ` Stefan Priebe - Profihost AG
2019-09-10 13:14                                           ` Stefan Priebe - Profihost AG
2019-09-10 13:24                                             ` Michal Hocko
2019-09-11  6:12                                               ` Stefan Priebe - Profihost AG
2019-09-11  6:24                                                 ` Stefan Priebe - Profihost AG
2019-09-11 13:59                                                   ` Stefan Priebe - Profihost AG
2019-09-12 10:53                                                     ` Stefan Priebe - Profihost AG
2019-09-12 11:06                                                       ` Stefan Priebe - Profihost AG
2019-09-11  7:09                                                 ` 5.3-rc-8 hung task in IO (was: Re: lot of MemAvailable but falling cache and raising PSI) Michal Hocko
2019-09-11 14:09                                                   ` Stefan Priebe - Profihost AG
2019-09-11 14:56                                                   ` Filipe Manana
2019-09-11 15:39                                                     ` Stefan Priebe - Profihost AG
2019-09-11 15:56                                                       ` Filipe Manana
2019-09-11 16:15                                                         ` Stefan Priebe - Profihost AG
2019-09-11 16:19                                                           ` Filipe Manana
2019-09-19 10:21                                                 ` lot of MemAvailable but falling cache and raising PSI Stefan Priebe - Profihost AG
2019-09-23 12:08                                                   ` Michal Hocko
2019-09-27 12:45                                                   ` Vlastimil Babka
2019-09-30  6:56                                                     ` Stefan Priebe - Profihost AG
2019-09-30  7:21                                                       ` Vlastimil Babka
2019-10-22  7:41                                                     ` Stefan Priebe - Profihost AG
2019-10-22  7:48                                                       ` Vlastimil Babka
2019-10-22 10:02                                                         ` Stefan Priebe - Profihost AG
2019-10-22 10:20                                                           ` Oscar Salvador
2019-10-22 10:21                                                           ` Vlastimil Babka [this message]
2019-10-22 11:08                                                             ` Stefan Priebe - Profihost AG
2019-09-10  5:41                       ` Stefan Priebe - Profihost AG
2019-09-09 11:49           ` Vlastimil Babka
2019-09-09 12:09             ` Stefan Priebe - Profihost AG
2019-09-09 12:21               ` Vlastimil Babka
2019-09-09 12:31                 ` Stefan Priebe - Profihost AG
2019-09-05 12:15 ` Vlastimil Babka
2019-09-05 12:27   ` Stefan Priebe - Profihost AG

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f533e547-996d-5a05-5b25-92862c8f9057@suse.cz \
    --to=vbabka@suse.cz \
    --cc=cgroups@vger.kernel.org \
    --cc=hannes@cmpxchg.org \
    --cc=l.roehrs@profihost.ag \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=s.priebe@profihost.ag \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).