All of lore.kernel.org
 help / color / mirror / Atom feed
From: Vlastimil Babka <vbabka@suse.cz>
To: Stefan Priebe - Profihost AG <s.priebe@profihost.ag>,
	Michal Hocko <mhocko@kernel.org>
Cc: "linux-mm@kvack.org" <linux-mm@kvack.org>,
	l.roehrs@profihost.ag, cgroups@vger.kernel.org,
	Johannes Weiner <hannes@cmpxchg.org>
Subject: Re: lot of MemAvailable but falling cache and raising PSI
Date: Tue, 22 Oct 2019 12:21:24 +0200	[thread overview]
Message-ID: <f533e547-996d-5a05-5b25-92862c8f9057@suse.cz> (raw)
In-Reply-To: <1430bb64-ef9b-f6a1-fb2c-1ca351e7950e@profihost.ag>

On 10/22/19 12:02 PM, Stefan Priebe - Profihost AG wrote:
> 
> Am 22.10.19 um 09:48 schrieb Vlastimil Babka:
>> On 10/22/19 9:41 AM, Stefan Priebe - Profihost AG wrote:
>>>> Hi, could you try the patch below? I suspect you're hitting a corner
>>>> case where compaction_suitable() returns COMPACT_SKIPPED for the
>>>> ZONE_DMA, triggering reclaim even if other zones have plenty of free
>>>> memory. And should_continue_reclaim() then returns true until twice the
>>>> requested page size is reclaimed (compact_gap()). That means 4MB
>>>> reclaimed for each THP allocation attempt, which roughly matches the
>>>> trace data you preovided previously.
>>>>
>>>> The amplification to 4MB should be removed in patches merged for 5.4, so
>>>> it would be only 32 pages reclaimed per THP allocation. The patch below
>>>> tries to remove this corner case completely, and it should be more
>>>> visible on your 5.2.x, so please apply it there.
>>>>
>>> is there any reason to not apply that one on top of 4.19?
>>>
>>> Greets,
>>> Stefan
>>>
>>
>> It should work, cherrypicks fine without conflict here.
> 
> OK but does not work ;-)
> 
> 
> mm/compaction.c: In function '__compaction_suitable':
> mm/compaction.c:1451:19: error: implicit declaration of function
> 'zone_managed_pages'; did you mean 'node_spanned_pages'?
> [-Werror=implicit-function-declaration]
>       alloc_flags, zone_managed_pages(zone)))
>                    ^~~~~~~~~~~~~~~~~~
>                    node_spanned_pages

Ah, this?

----8<----
From f1335e1c0d4b74205fc0cc40b5960223d6f1dec7 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Thu, 12 Sep 2019 13:40:46 +0200
Subject: [PATCH] WIP

---
 include/linux/compaction.h     |  7 ++++++-
 include/trace/events/mmflags.h |  1 +
 mm/compaction.c                | 16 +++++++++++++--
 mm/vmscan.c                    | 36 ++++++++++++++++++++++++----------
 4 files changed, 47 insertions(+), 13 deletions(-)

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 68250a57aace..2f3b331c5239 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -17,8 +17,13 @@ enum compact_priority {
 };
 
 /* Return values for compact_zone() and try_to_compact_pages() */
-/* When adding new states, please adjust include/trace/events/compaction.h */
+/* When adding new states, please adjust include/trace/events/mmflags.h */
 enum compact_result {
+	/*
+	 * The zone is too small to provide the requested allocation even if
+	 * fully freed (i.e. ZONE_DMA for THP allocation due to lowmem reserves)
+	 */
+	COMPACT_IMPOSSIBLE,
 	/* For more detailed tracepoint output - internal to compaction */
 	COMPACT_NOT_SUITABLE_ZONE,
 	/*
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index a81cffb76d89..d7aa9cece234 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -169,6 +169,7 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY,	"softdirty"	)		\
 
 #ifdef CONFIG_COMPACTION
 #define COMPACTION_STATUS					\
+	EM( COMPACT_IMPOSSIBLE,		"impossible")		\
 	EM( COMPACT_SKIPPED,		"skipped")		\
 	EM( COMPACT_DEFERRED,		"deferred")		\
 	EM( COMPACT_CONTINUE,		"continue")		\
diff --git a/mm/compaction.c b/mm/compaction.c
index 5079ddbec8f9..7d2299c7faa2 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1416,6 +1416,7 @@ static enum compact_result compact_finished(struct zone *zone,
 /*
  * compaction_suitable: Is this suitable to run compaction on this zone now?
  * Returns
+ *   COMPACT_IMPOSSIBLE If the allocation would fail even with all pages free
  *   COMPACT_SKIPPED  - If there are too few free pages for compaction
  *   COMPACT_SUCCESS  - If the allocation would succeed without compaction
  *   COMPACT_CONTINUE - If compaction should run now
@@ -1439,6 +1440,16 @@ static enum compact_result __compaction_suitable(struct zone *zone, int order,
 								alloc_flags))
 		return COMPACT_SUCCESS;
 
+	/*
+	 * If the allocation would not succeed even with a fully free zone
+	 * due to e.g. lowmem reserves, indicate that compaction can't possibly
+	 * help and it would be pointless to reclaim.
+	 */
+	watermark += 1UL << order;
+	if (!__zone_watermark_ok(zone, 0, watermark, classzone_idx,
+				 alloc_flags, zone->managed_pages))
+		return COMPACT_IMPOSSIBLE;
+
 	/*
 	 * Watermarks for order-0 must be met for compaction to be able to
 	 * isolate free pages for migration targets. This means that the
@@ -1526,7 +1537,7 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
 		available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
 		compact_result = __compaction_suitable(zone, order, alloc_flags,
 				ac_classzone_idx(ac), available);
-		if (compact_result != COMPACT_SKIPPED)
+		if (compact_result > COMPACT_SKIPPED)
 			return true;
 	}
 
@@ -1555,7 +1566,8 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro
 	ret = compaction_suitable(zone, cc->order, cc->alloc_flags,
 							cc->classzone_idx);
 	/* Compaction is likely to fail */
-	if (ret == COMPACT_SUCCESS || ret == COMPACT_SKIPPED)
+	if (ret == COMPACT_SUCCESS || ret == COMPACT_SKIPPED
+	    || ret == COMPACT_IMPOSSIBLE)
 		return ret;
 
 	/* huh, compaction_suitable is returning something unexpected */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b37610c0eac6..7ad331a64fc5 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2849,11 +2849,12 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 }
 
 /*
- * Returns true if compaction should go ahead for a costly-order request, or
- * the allocation would already succeed without compaction. Return false if we
- * should reclaim first.
+ * Returns 1 if compaction should go ahead for a costly-order request, or the
+ * allocation would already succeed without compaction. Return 0 if we should
+ * reclaim first. Return -1 when compaction can't help at all due to zone being
+ * too small, which means there's no point in reclaim nor compaction.
  */
-static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
+static inline int compaction_ready(struct zone *zone, struct scan_control *sc)
 {
 	unsigned long watermark;
 	enum compact_result suitable;
@@ -2861,10 +2862,16 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
 	suitable = compaction_suitable(zone, sc->order, 0, sc->reclaim_idx);
 	if (suitable == COMPACT_SUCCESS)
 		/* Allocation should succeed already. Don't reclaim. */
-		return true;
+		return 1;
 	if (suitable == COMPACT_SKIPPED)
 		/* Compaction cannot yet proceed. Do reclaim. */
-		return false;
+		return 0;
+	if (suitable == COMPACT_IMPOSSIBLE)
+		/*
+		 * Compaction can't possibly help. So don't reclaim, but keep
+		 * checking other zones.
+		 */
+		return -1;
 
 	/*
 	 * Compaction is already possible, but it takes time to run and there
@@ -2910,6 +2917,7 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 
 	for_each_zone_zonelist_nodemask(zone, z, zonelist,
 					sc->reclaim_idx, sc->nodemask) {
+		int compact_ready;
 		/*
 		 * Take care memory controller reclaiming has small influence
 		 * to global LRU.
@@ -2929,10 +2937,18 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 			 * page allocations.
 			 */
 			if (IS_ENABLED(CONFIG_COMPACTION) &&
-			    sc->order > PAGE_ALLOC_COSTLY_ORDER &&
-			    compaction_ready(zone, sc)) {
-				sc->compaction_ready = true;
-				continue;
+			    sc->order > PAGE_ALLOC_COSTLY_ORDER) {
+				compact_ready = compaction_ready(zone, sc);
+				if (compact_ready == 1) {
+					sc->compaction_ready = true;
+					continue;
+				} else if (compact_ready == -1) {
+					/*
+					 * In this zone, neither reclaim nor
+					 * compaction can help.
+					 */
+					continue;
+				}
 			}
 
 			/*
-- 
2.23.0




  parent reply	other threads:[~2019-10-22 10:21 UTC|newest]

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-09-05 11:27 lot of MemAvailable but falling cache and raising PSI Stefan Priebe - Profihost AG
2019-09-05 11:40 ` Michal Hocko
2019-09-05 11:56   ` Stefan Priebe - Profihost AG
2019-09-05 16:28     ` Yang Shi
2019-09-05 17:26       ` Stefan Priebe - Profihost AG
2019-09-05 18:46         ` Yang Shi
2019-09-05 19:31           ` Stefan Priebe - Profihost AG
2019-09-06 10:08     ` Stefan Priebe - Profihost AG
2019-09-06 10:25       ` Vlastimil Babka
2019-09-06 18:52       ` Yang Shi
2019-09-07  7:32         ` Stefan Priebe - Profihost AG
2019-09-09  8:27       ` Michal Hocko
2019-09-09  8:54         ` Stefan Priebe - Profihost AG
2019-09-09 11:01           ` Michal Hocko
2019-09-09 12:08             ` Michal Hocko
2019-09-09 12:10               ` Stefan Priebe - Profihost AG
2019-09-09 12:28                 ` Michal Hocko
2019-09-09 12:37                   ` Stefan Priebe - Profihost AG
2019-09-09 12:49                     ` Michal Hocko
2019-09-09 12:56                       ` Stefan Priebe - Profihost AG
     [not found]                         ` <52235eda-ffe2-721c-7ad7-575048e2d29d@profihost.ag>
2019-09-10  5:58                           ` Stefan Priebe - Profihost AG
2019-09-10  8:29                           ` Michal Hocko
2019-09-10  8:38                             ` Stefan Priebe - Profihost AG
2019-09-10  9:02                               ` Michal Hocko
2019-09-10  9:37                                 ` Stefan Priebe - Profihost AG
2019-09-10 11:07                                   ` Michal Hocko
2019-09-10 12:45                                     ` Stefan Priebe - Profihost AG
2019-09-10 12:57                                       ` Michal Hocko
2019-09-10 13:05                                         ` Stefan Priebe - Profihost AG
2019-09-10 13:14                                           ` Stefan Priebe - Profihost AG
2019-09-10 13:24                                             ` Michal Hocko
2019-09-11  6:12                                               ` Stefan Priebe - Profihost AG
2019-09-11  6:24                                                 ` Stefan Priebe - Profihost AG
2019-09-11 13:59                                                   ` Stefan Priebe - Profihost AG
2019-09-12 10:53                                                     ` Stefan Priebe - Profihost AG
2019-09-12 11:06                                                       ` Stefan Priebe - Profihost AG
2019-09-11  7:09                                                 ` 5.3-rc-8 hung task in IO (was: Re: lot of MemAvailable but falling cache and raising PSI) Michal Hocko
2019-09-11 14:09                                                   ` Stefan Priebe - Profihost AG
2019-09-11 14:56                                                   ` Filipe Manana
2019-09-11 14:56                                                     ` Filipe Manana
2019-09-11 15:39                                                     ` Stefan Priebe - Profihost AG
2019-09-11 15:56                                                       ` Filipe Manana
2019-09-11 15:56                                                         ` Filipe Manana
2019-09-11 16:15                                                         ` Stefan Priebe - Profihost AG
2019-09-11 16:19                                                           ` Filipe Manana
2019-09-11 16:19                                                             ` Filipe Manana
2019-09-19 10:21                                                 ` lot of MemAvailable but falling cache and raising PSI Stefan Priebe - Profihost AG
2019-09-23 12:08                                                   ` Michal Hocko
2019-09-27 12:45                                                   ` Vlastimil Babka
2019-09-30  6:56                                                     ` Stefan Priebe - Profihost AG
2019-09-30  7:21                                                       ` Vlastimil Babka
2019-10-22  7:41                                                     ` Stefan Priebe - Profihost AG
2019-10-22  7:48                                                       ` Vlastimil Babka
2019-10-22 10:02                                                         ` Stefan Priebe - Profihost AG
2019-10-22 10:20                                                           ` Oscar Salvador
2019-10-22 10:21                                                           ` Vlastimil Babka [this message]
2019-10-22 11:08                                                             ` Stefan Priebe - Profihost AG
2019-09-10  5:41                       ` Stefan Priebe - Profihost AG
2019-09-09 11:49           ` Vlastimil Babka
2019-09-09 12:09             ` Stefan Priebe - Profihost AG
2019-09-09 12:21               ` Vlastimil Babka
2019-09-09 12:31                 ` Stefan Priebe - Profihost AG
2019-09-05 12:15 ` Vlastimil Babka
2019-09-05 12:27   ` Stefan Priebe - Profihost AG

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f533e547-996d-5a05-5b25-92862c8f9057@suse.cz \
    --to=vbabka@suse.cz \
    --cc=cgroups@vger.kernel.org \
    --cc=hannes@cmpxchg.org \
    --cc=l.roehrs@profihost.ag \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=s.priebe@profihost.ag \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.