* [PATCH 0/3] mm, oom: add oom detection tracepoints
@ 2016-12-14 14:53 ` Michal Hocko
0 siblings, 0 replies; 23+ messages in thread
From: Michal Hocko @ 2016-12-14 14:53 UTC (permalink / raw)
To: Andrew Morton
Cc: Vlastimil Babka, David Rientjes, Johannes Weiner, linux-mm, LKML
Hi,
this is a long overdue and I am really sorry about that. I just didn't
get to sit and come up with this earlier as there was always some
going on which preempted it. This patchset adds two tracepoints which
should help us to debug oom decision making. The first one is placed
in should_reclaim_retry and it tells us why do we keep retrying the
allocation and reclaim while the second is in should_compact_retry which
tells us the similar for the high order requests.
In combination with the existing compaction and reclaim tracepoints we
can draw a much better picture about what is going on and why we go
and declare the oom.
I am not really a tracepoint guy so I hope I didn't do anything
obviously stupid there. Thanks to Vlastimil for his help before I've
posted this.
Anywa feedback is of course welcome!
Michal Hocko (3):
mm, trace: extract COMPACTION_STATUS and ZONE_TYPE to a common header
oom, trace: Add oom detection tracepoints
oom, trace: add compaction retry tracepoint
include/trace/events/compaction.h | 56 ------------------------
include/trace/events/mmflags.h | 90 +++++++++++++++++++++++++++++++++++++++
include/trace/events/oom.h | 81 +++++++++++++++++++++++++++++++++++
mm/page_alloc.c | 32 ++++++++++----
4 files changed, 195 insertions(+), 64 deletions(-)
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH 1/3] mm, trace: extract COMPACTION_STATUS and ZONE_TYPE to a common header
2016-12-14 14:53 ` Michal Hocko
@ 2016-12-14 14:53 ` Michal Hocko
-1 siblings, 0 replies; 23+ messages in thread
From: Michal Hocko @ 2016-12-14 14:53 UTC (permalink / raw)
To: Andrew Morton
Cc: Vlastimil Babka, David Rientjes, Johannes Weiner, linux-mm, LKML,
Michal Hocko
From: Michal Hocko <mhocko@suse.com>
COMPACTION_STATUS resp. ZONE_TYPE are currently used to translate enum
compact_result resp. struct zone index into their symbolic names for
an easier post processing. The follow up patch would like to reuse
this as well. The code involves some preprocessor black magic which is
better not duplicated elsewhere so move it to a common mm tracing relate
header.
Signed-off-by: Michal Hocko <mhocko@suse.com>
---
include/trace/events/compaction.h | 56 ----------------------------------
include/trace/events/mmflags.h | 64 +++++++++++++++++++++++++++++++++++++++
2 files changed, 64 insertions(+), 56 deletions(-)
diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h
index cbdb90b6b308..2334faa56323 100644
--- a/include/trace/events/compaction.h
+++ b/include/trace/events/compaction.h
@@ -9,62 +9,6 @@
#include <linux/tracepoint.h>
#include <trace/events/mmflags.h>
-#define COMPACTION_STATUS \
- EM( COMPACT_SKIPPED, "skipped") \
- EM( COMPACT_DEFERRED, "deferred") \
- EM( COMPACT_CONTINUE, "continue") \
- EM( COMPACT_SUCCESS, "success") \
- EM( COMPACT_PARTIAL_SKIPPED, "partial_skipped") \
- EM( COMPACT_COMPLETE, "complete") \
- EM( COMPACT_NO_SUITABLE_PAGE, "no_suitable_page") \
- EM( COMPACT_NOT_SUITABLE_ZONE, "not_suitable_zone") \
- EMe(COMPACT_CONTENDED, "contended")
-
-#ifdef CONFIG_ZONE_DMA
-#define IFDEF_ZONE_DMA(X) X
-#else
-#define IFDEF_ZONE_DMA(X)
-#endif
-
-#ifdef CONFIG_ZONE_DMA32
-#define IFDEF_ZONE_DMA32(X) X
-#else
-#define IFDEF_ZONE_DMA32(X)
-#endif
-
-#ifdef CONFIG_HIGHMEM
-#define IFDEF_ZONE_HIGHMEM(X) X
-#else
-#define IFDEF_ZONE_HIGHMEM(X)
-#endif
-
-#define ZONE_TYPE \
- IFDEF_ZONE_DMA( EM (ZONE_DMA, "DMA")) \
- IFDEF_ZONE_DMA32( EM (ZONE_DMA32, "DMA32")) \
- EM (ZONE_NORMAL, "Normal") \
- IFDEF_ZONE_HIGHMEM( EM (ZONE_HIGHMEM,"HighMem")) \
- EMe(ZONE_MOVABLE,"Movable")
-
-/*
- * First define the enums in the above macros to be exported to userspace
- * via TRACE_DEFINE_ENUM().
- */
-#undef EM
-#undef EMe
-#define EM(a, b) TRACE_DEFINE_ENUM(a);
-#define EMe(a, b) TRACE_DEFINE_ENUM(a);
-
-COMPACTION_STATUS
-ZONE_TYPE
-
-/*
- * Now redefine the EM() and EMe() macros to map the enums to the strings
- * that will be printed in the output.
- */
-#undef EM
-#undef EMe
-#define EM(a, b) {a, b},
-#define EMe(a, b) {a, b}
DECLARE_EVENT_CLASS(mm_compaction_isolate_template,
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 5a81ab48a2fb..7e4cfede873c 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -1,3 +1,6 @@
+#include <linux/node.h>
+#include <linux/mmzone.h>
+#include <linux/compaction.h>
/*
* The order of these masks is important. Matching masks will be seen
* first and the left over flags will end up showing by themselves.
@@ -172,3 +175,64 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \
(flags) ? __print_flags(flags, "|", \
__def_vmaflag_names \
) : "none"
+
+#ifdef CONFIG_COMPACTION
+#define COMPACTION_STATUS \
+ EM( COMPACT_SKIPPED, "skipped") \
+ EM( COMPACT_DEFERRED, "deferred") \
+ EM( COMPACT_CONTINUE, "continue") \
+ EM( COMPACT_SUCCESS, "success") \
+ EM( COMPACT_PARTIAL_SKIPPED, "partial_skipped") \
+ EM( COMPACT_COMPLETE, "complete") \
+ EM( COMPACT_NO_SUITABLE_PAGE, "no_suitable_page") \
+ EM( COMPACT_NOT_SUITABLE_ZONE, "not_suitable_zone") \
+ EMe(COMPACT_CONTENDED, "contended")
+#else
+#define COMPACTION_STATUS
+#endif
+
+#ifdef CONFIG_ZONE_DMA
+#define IFDEF_ZONE_DMA(X) X
+#else
+#define IFDEF_ZONE_DMA(X)
+#endif
+
+#ifdef CONFIG_ZONE_DMA32
+#define IFDEF_ZONE_DMA32(X) X
+#else
+#define IFDEF_ZONE_DMA32(X)
+#endif
+
+#ifdef CONFIG_HIGHMEM
+#define IFDEF_ZONE_HIGHMEM(X) X
+#else
+#define IFDEF_ZONE_HIGHMEM(X)
+#endif
+
+#define ZONE_TYPE \
+ IFDEF_ZONE_DMA( EM (ZONE_DMA, "DMA")) \
+ IFDEF_ZONE_DMA32( EM (ZONE_DMA32, "DMA32")) \
+ EM (ZONE_NORMAL, "Normal") \
+ IFDEF_ZONE_HIGHMEM( EM (ZONE_HIGHMEM,"HighMem")) \
+ EMe(ZONE_MOVABLE,"Movable")
+
+/*
+ * First define the enums in the above macros to be exported to userspace
+ * via TRACE_DEFINE_ENUM().
+ */
+#undef EM
+#undef EMe
+#define EM(a, b) TRACE_DEFINE_ENUM(a);
+#define EMe(a, b) TRACE_DEFINE_ENUM(a);
+
+COMPACTION_STATUS
+ZONE_TYPE
+
+/*
+ * Now redefine the EM() and EMe() macros to map the enums to the strings
+ * that will be printed in the output.
+ */
+#undef EM
+#undef EMe
+#define EM(a, b) {a, b},
+#define EMe(a, b) {a, b}
--
2.10.2
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH 1/3] mm, trace: extract COMPACTION_STATUS and ZONE_TYPE to a common header
@ 2016-12-14 14:53 ` Michal Hocko
0 siblings, 0 replies; 23+ messages in thread
From: Michal Hocko @ 2016-12-14 14:53 UTC (permalink / raw)
To: Andrew Morton
Cc: Vlastimil Babka, David Rientjes, Johannes Weiner, linux-mm, LKML,
Michal Hocko
From: Michal Hocko <mhocko@suse.com>
COMPACTION_STATUS resp. ZONE_TYPE are currently used to translate enum
compact_result resp. struct zone index into their symbolic names for
an easier post processing. The follow up patch would like to reuse
this as well. The code involves some preprocessor black magic which is
better not duplicated elsewhere so move it to a common mm tracing relate
header.
Signed-off-by: Michal Hocko <mhocko@suse.com>
---
include/trace/events/compaction.h | 56 ----------------------------------
include/trace/events/mmflags.h | 64 +++++++++++++++++++++++++++++++++++++++
2 files changed, 64 insertions(+), 56 deletions(-)
diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h
index cbdb90b6b308..2334faa56323 100644
--- a/include/trace/events/compaction.h
+++ b/include/trace/events/compaction.h
@@ -9,62 +9,6 @@
#include <linux/tracepoint.h>
#include <trace/events/mmflags.h>
-#define COMPACTION_STATUS \
- EM( COMPACT_SKIPPED, "skipped") \
- EM( COMPACT_DEFERRED, "deferred") \
- EM( COMPACT_CONTINUE, "continue") \
- EM( COMPACT_SUCCESS, "success") \
- EM( COMPACT_PARTIAL_SKIPPED, "partial_skipped") \
- EM( COMPACT_COMPLETE, "complete") \
- EM( COMPACT_NO_SUITABLE_PAGE, "no_suitable_page") \
- EM( COMPACT_NOT_SUITABLE_ZONE, "not_suitable_zone") \
- EMe(COMPACT_CONTENDED, "contended")
-
-#ifdef CONFIG_ZONE_DMA
-#define IFDEF_ZONE_DMA(X) X
-#else
-#define IFDEF_ZONE_DMA(X)
-#endif
-
-#ifdef CONFIG_ZONE_DMA32
-#define IFDEF_ZONE_DMA32(X) X
-#else
-#define IFDEF_ZONE_DMA32(X)
-#endif
-
-#ifdef CONFIG_HIGHMEM
-#define IFDEF_ZONE_HIGHMEM(X) X
-#else
-#define IFDEF_ZONE_HIGHMEM(X)
-#endif
-
-#define ZONE_TYPE \
- IFDEF_ZONE_DMA( EM (ZONE_DMA, "DMA")) \
- IFDEF_ZONE_DMA32( EM (ZONE_DMA32, "DMA32")) \
- EM (ZONE_NORMAL, "Normal") \
- IFDEF_ZONE_HIGHMEM( EM (ZONE_HIGHMEM,"HighMem")) \
- EMe(ZONE_MOVABLE,"Movable")
-
-/*
- * First define the enums in the above macros to be exported to userspace
- * via TRACE_DEFINE_ENUM().
- */
-#undef EM
-#undef EMe
-#define EM(a, b) TRACE_DEFINE_ENUM(a);
-#define EMe(a, b) TRACE_DEFINE_ENUM(a);
-
-COMPACTION_STATUS
-ZONE_TYPE
-
-/*
- * Now redefine the EM() and EMe() macros to map the enums to the strings
- * that will be printed in the output.
- */
-#undef EM
-#undef EMe
-#define EM(a, b) {a, b},
-#define EMe(a, b) {a, b}
DECLARE_EVENT_CLASS(mm_compaction_isolate_template,
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 5a81ab48a2fb..7e4cfede873c 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -1,3 +1,6 @@
+#include <linux/node.h>
+#include <linux/mmzone.h>
+#include <linux/compaction.h>
/*
* The order of these masks is important. Matching masks will be seen
* first and the left over flags will end up showing by themselves.
@@ -172,3 +175,64 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \
(flags) ? __print_flags(flags, "|", \
__def_vmaflag_names \
) : "none"
+
+#ifdef CONFIG_COMPACTION
+#define COMPACTION_STATUS \
+ EM( COMPACT_SKIPPED, "skipped") \
+ EM( COMPACT_DEFERRED, "deferred") \
+ EM( COMPACT_CONTINUE, "continue") \
+ EM( COMPACT_SUCCESS, "success") \
+ EM( COMPACT_PARTIAL_SKIPPED, "partial_skipped") \
+ EM( COMPACT_COMPLETE, "complete") \
+ EM( COMPACT_NO_SUITABLE_PAGE, "no_suitable_page") \
+ EM( COMPACT_NOT_SUITABLE_ZONE, "not_suitable_zone") \
+ EMe(COMPACT_CONTENDED, "contended")
+#else
+#define COMPACTION_STATUS
+#endif
+
+#ifdef CONFIG_ZONE_DMA
+#define IFDEF_ZONE_DMA(X) X
+#else
+#define IFDEF_ZONE_DMA(X)
+#endif
+
+#ifdef CONFIG_ZONE_DMA32
+#define IFDEF_ZONE_DMA32(X) X
+#else
+#define IFDEF_ZONE_DMA32(X)
+#endif
+
+#ifdef CONFIG_HIGHMEM
+#define IFDEF_ZONE_HIGHMEM(X) X
+#else
+#define IFDEF_ZONE_HIGHMEM(X)
+#endif
+
+#define ZONE_TYPE \
+ IFDEF_ZONE_DMA( EM (ZONE_DMA, "DMA")) \
+ IFDEF_ZONE_DMA32( EM (ZONE_DMA32, "DMA32")) \
+ EM (ZONE_NORMAL, "Normal") \
+ IFDEF_ZONE_HIGHMEM( EM (ZONE_HIGHMEM,"HighMem")) \
+ EMe(ZONE_MOVABLE,"Movable")
+
+/*
+ * First define the enums in the above macros to be exported to userspace
+ * via TRACE_DEFINE_ENUM().
+ */
+#undef EM
+#undef EMe
+#define EM(a, b) TRACE_DEFINE_ENUM(a);
+#define EMe(a, b) TRACE_DEFINE_ENUM(a);
+
+COMPACTION_STATUS
+ZONE_TYPE
+
+/*
+ * Now redefine the EM() and EMe() macros to map the enums to the strings
+ * that will be printed in the output.
+ */
+#undef EM
+#undef EMe
+#define EM(a, b) {a, b},
+#define EMe(a, b) {a, b}
--
2.10.2
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH 1/3] mm, trace: extract COMPACTION_STATUS and ZONE_TYPE to a common header
2016-12-14 14:53 ` Michal Hocko
@ 2016-12-14 17:14 ` Vlastimil Babka
-1 siblings, 0 replies; 23+ messages in thread
From: Vlastimil Babka @ 2016-12-14 17:14 UTC (permalink / raw)
To: Michal Hocko, Andrew Morton
Cc: David Rientjes, Johannes Weiner, linux-mm, LKML, Michal Hocko
On 12/14/2016 03:53 PM, Michal Hocko wrote:
> From: Michal Hocko <mhocko@suse.com>
>
> COMPACTION_STATUS resp. ZONE_TYPE are currently used to translate enum
> compact_result resp. struct zone index into their symbolic names for
> an easier post processing. The follow up patch would like to reuse
> this as well. The code involves some preprocessor black magic which is
> better not duplicated elsewhere so move it to a common mm tracing relate
> header.
>
> Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 1/3] mm, trace: extract COMPACTION_STATUS and ZONE_TYPE to a common header
@ 2016-12-14 17:14 ` Vlastimil Babka
0 siblings, 0 replies; 23+ messages in thread
From: Vlastimil Babka @ 2016-12-14 17:14 UTC (permalink / raw)
To: Michal Hocko, Andrew Morton
Cc: David Rientjes, Johannes Weiner, linux-mm, LKML, Michal Hocko
On 12/14/2016 03:53 PM, Michal Hocko wrote:
> From: Michal Hocko <mhocko@suse.com>
>
> COMPACTION_STATUS resp. ZONE_TYPE are currently used to translate enum
> compact_result resp. struct zone index into their symbolic names for
> an easier post processing. The follow up patch would like to reuse
> this as well. The code involves some preprocessor black magic which is
> better not duplicated elsewhere so move it to a common mm tracing relate
> header.
>
> Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 1/3] mm, trace: extract COMPACTION_STATUS and ZONE_TYPE to a common header
2016-12-14 14:53 ` Michal Hocko
(?)
(?)
@ 2016-12-14 17:32 ` kbuild test robot
2016-12-14 19:02 ` Michal Hocko
-1 siblings, 1 reply; 23+ messages in thread
From: kbuild test robot @ 2016-12-14 17:32 UTC (permalink / raw)
To: Michal Hocko
Cc: kbuild-all, Andrew Morton, Vlastimil Babka, David Rientjes,
Johannes Weiner, linux-mm, LKML, Michal Hocko
[-- Attachment #1: Type: text/plain, Size: 6537 bytes --]
Hi Michal,
[auto build test ERROR on tip/perf/core]
[also build test ERROR on v4.9 next-20161214]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Michal-Hocko/mm-oom-add-oom-detection-tracepoints/20161214-231225
config: x86_64-randconfig-s2-12142134 (attached as .config)
compiler: gcc-4.4 (Debian 4.4.7-8) 4.4.7
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64
All error/warnings (new ones prefixed by >>):
In file included from include/trace/trace_events.h:361,
from include/trace/define_trace.h:95,
from include/trace/events/compaction.h:356,
from mm/compaction.c:43:
include/trace/events/compaction.h: In function 'trace_raw_output_mm_compaction_end':
>> include/trace/events/compaction.h:134: error: expected expression before ',' token
include/trace/events/compaction.h: In function 'trace_raw_output_mm_compaction_suitable_template':
include/trace/events/compaction.h:195: error: expected expression before ',' token
>> include/trace/events/compaction.h:195: warning: missing braces around initializer
include/trace/events/compaction.h:195: warning: (near initialization for 'symbols[0]')
>> include/trace/events/compaction.h:195: error: initializer element is not constant
include/trace/events/compaction.h:195: error: (near initialization for 'symbols[0].mask')
vim +134 include/trace/events/compaction.h
16c4a097 Joonsoo Kim 2015-02-11 128 __entry->migrate_pfn,
16c4a097 Joonsoo Kim 2015-02-11 129 __entry->free_pfn,
16c4a097 Joonsoo Kim 2015-02-11 130 __entry->zone_end,
16c4a097 Joonsoo Kim 2015-02-11 131 __entry->sync ? "sync" : "async")
0eb927c0 Mel Gorman 2014-01-21 132 );
0eb927c0 Mel Gorman 2014-01-21 133
0eb927c0 Mel Gorman 2014-01-21 @134 TRACE_EVENT(mm_compaction_end,
16c4a097 Joonsoo Kim 2015-02-11 135 TP_PROTO(unsigned long zone_start, unsigned long migrate_pfn,
16c4a097 Joonsoo Kim 2015-02-11 136 unsigned long free_pfn, unsigned long zone_end, bool sync,
16c4a097 Joonsoo Kim 2015-02-11 137 int status),
0eb927c0 Mel Gorman 2014-01-21 138
16c4a097 Joonsoo Kim 2015-02-11 139 TP_ARGS(zone_start, migrate_pfn, free_pfn, zone_end, sync, status),
0eb927c0 Mel Gorman 2014-01-21 140
0eb927c0 Mel Gorman 2014-01-21 141 TP_STRUCT__entry(
16c4a097 Joonsoo Kim 2015-02-11 142 __field(unsigned long, zone_start)
16c4a097 Joonsoo Kim 2015-02-11 143 __field(unsigned long, migrate_pfn)
16c4a097 Joonsoo Kim 2015-02-11 144 __field(unsigned long, free_pfn)
16c4a097 Joonsoo Kim 2015-02-11 145 __field(unsigned long, zone_end)
16c4a097 Joonsoo Kim 2015-02-11 146 __field(bool, sync)
0eb927c0 Mel Gorman 2014-01-21 147 __field(int, status)
0eb927c0 Mel Gorman 2014-01-21 148 ),
0eb927c0 Mel Gorman 2014-01-21 149
0eb927c0 Mel Gorman 2014-01-21 150 TP_fast_assign(
16c4a097 Joonsoo Kim 2015-02-11 151 __entry->zone_start = zone_start;
16c4a097 Joonsoo Kim 2015-02-11 152 __entry->migrate_pfn = migrate_pfn;
16c4a097 Joonsoo Kim 2015-02-11 153 __entry->free_pfn = free_pfn;
16c4a097 Joonsoo Kim 2015-02-11 154 __entry->zone_end = zone_end;
16c4a097 Joonsoo Kim 2015-02-11 155 __entry->sync = sync;
0eb927c0 Mel Gorman 2014-01-21 156 __entry->status = status;
0eb927c0 Mel Gorman 2014-01-21 157 ),
0eb927c0 Mel Gorman 2014-01-21 158
16c4a097 Joonsoo Kim 2015-02-11 159 TP_printk("zone_start=0x%lx migrate_pfn=0x%lx free_pfn=0x%lx zone_end=0x%lx, mode=%s status=%s",
16c4a097 Joonsoo Kim 2015-02-11 160 __entry->zone_start,
16c4a097 Joonsoo Kim 2015-02-11 161 __entry->migrate_pfn,
16c4a097 Joonsoo Kim 2015-02-11 162 __entry->free_pfn,
16c4a097 Joonsoo Kim 2015-02-11 163 __entry->zone_end,
16c4a097 Joonsoo Kim 2015-02-11 164 __entry->sync ? "sync" : "async",
fa6c7b46 Vlastimil Babka 2015-11-05 165 __print_symbolic(__entry->status, COMPACTION_STATUS))
0eb927c0 Mel Gorman 2014-01-21 166 );
b7aba698 Mel Gorman 2011-01-13 167
837d026d Joonsoo Kim 2015-02-11 168 TRACE_EVENT(mm_compaction_try_to_compact_pages,
837d026d Joonsoo Kim 2015-02-11 169
837d026d Joonsoo Kim 2015-02-11 170 TP_PROTO(
837d026d Joonsoo Kim 2015-02-11 171 int order,
837d026d Joonsoo Kim 2015-02-11 172 gfp_t gfp_mask,
a5508cd8 Vlastimil Babka 2016-07-28 173 int prio),
837d026d Joonsoo Kim 2015-02-11 174
a5508cd8 Vlastimil Babka 2016-07-28 175 TP_ARGS(order, gfp_mask, prio),
837d026d Joonsoo Kim 2015-02-11 176
837d026d Joonsoo Kim 2015-02-11 177 TP_STRUCT__entry(
837d026d Joonsoo Kim 2015-02-11 178 __field(int, order)
837d026d Joonsoo Kim 2015-02-11 179 __field(gfp_t, gfp_mask)
a5508cd8 Vlastimil Babka 2016-07-28 180 __field(int, prio)
837d026d Joonsoo Kim 2015-02-11 181 ),
837d026d Joonsoo Kim 2015-02-11 182
837d026d Joonsoo Kim 2015-02-11 183 TP_fast_assign(
837d026d Joonsoo Kim 2015-02-11 184 __entry->order = order;
837d026d Joonsoo Kim 2015-02-11 185 __entry->gfp_mask = gfp_mask;
a5508cd8 Vlastimil Babka 2016-07-28 186 __entry->prio = prio;
837d026d Joonsoo Kim 2015-02-11 187 ),
837d026d Joonsoo Kim 2015-02-11 188
a5508cd8 Vlastimil Babka 2016-07-28 189 TP_printk("order=%d gfp_mask=0x%x priority=%d",
837d026d Joonsoo Kim 2015-02-11 190 __entry->order,
837d026d Joonsoo Kim 2015-02-11 191 __entry->gfp_mask,
a5508cd8 Vlastimil Babka 2016-07-28 192 __entry->prio)
837d026d Joonsoo Kim 2015-02-11 193 );
837d026d Joonsoo Kim 2015-02-11 194
837d026d Joonsoo Kim 2015-02-11 @195 DECLARE_EVENT_CLASS(mm_compaction_suitable_template,
837d026d Joonsoo Kim 2015-02-11 196
837d026d Joonsoo Kim 2015-02-11 197 TP_PROTO(struct zone *zone,
837d026d Joonsoo Kim 2015-02-11 198 int order,
:::::: The code at line 134 was first introduced by commit
:::::: 0eb927c0ab789d3d7d69f68acb850f69d4e7c36f mm: compaction: trace compaction begin and end
:::::: TO: Mel Gorman <mgorman@suse.de>
:::::: CC: Linus Torvalds <torvalds@linux-foundation.org>
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 25580 bytes --]
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 1/3] mm, trace: extract COMPACTION_STATUS and ZONE_TYPE to a common header
2016-12-14 17:32 ` kbuild test robot
@ 2016-12-14 19:02 ` Michal Hocko
0 siblings, 0 replies; 23+ messages in thread
From: Michal Hocko @ 2016-12-14 19:02 UTC (permalink / raw)
To: kbuild test robot
Cc: kbuild-all, Andrew Morton, Vlastimil Babka, David Rientjes,
Johannes Weiner, linux-mm, LKML
On Thu 15-12-16 01:32:06, kbuild test robot wrote:
> Hi Michal,
>
> [auto build test ERROR on tip/perf/core]
> [also build test ERROR on v4.9 next-20161214]
> [if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
>
> url: https://github.com/0day-ci/linux/commits/Michal-Hocko/mm-oom-add-oom-detection-tracepoints/20161214-231225
> config: x86_64-randconfig-s2-12142134 (attached as .config)
> compiler: gcc-4.4 (Debian 4.4.7-8) 4.4.7
> reproduce:
> # save the attached .config to linux build tree
> make ARCH=x86_64
>
> All error/warnings (new ones prefixed by >>):
>
> In file included from include/trace/trace_events.h:361,
> from include/trace/define_trace.h:95,
> from include/trace/events/compaction.h:356,
> from mm/compaction.c:43:
> include/trace/events/compaction.h: In function 'trace_raw_output_mm_compaction_end':
> >> include/trace/events/compaction.h:134: error: expected expression before ',' token
> include/trace/events/compaction.h: In function 'trace_raw_output_mm_compaction_suitable_template':
> include/trace/events/compaction.h:195: error: expected expression before ',' token
> >> include/trace/events/compaction.h:195: warning: missing braces around initializer
> include/trace/events/compaction.h:195: warning: (near initialization for 'symbols[0]')
> >> include/trace/events/compaction.h:195: error: initializer element is not constant
> include/trace/events/compaction.h:195: error: (near initialization for 'symbols[0].mask')
Interesting. I am pretty sure that my config battery has
CONFIG_COMPACTION=n. Not sure which part of your config made a change.
Anyway, I've added to my collection. And with the below diff it passes
all my configs.
---
>From 921bf07b8684ded5f076904cc6baa875b52c3a1e Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Wed, 14 Dec 2016 18:56:44 +0100
Subject: [PATCH] fold me "mm, trace: extract COMPACTION_STATUS and ZONE_TYPE
to a common header"
0-day has reported:
In file included from include/trace/trace_events.h:361,
from include/trace/define_trace.h:95,
from include/trace/events/compaction.h:356,
from mm/compaction.c:43:
include/trace/events/compaction.h: In function 'trace_raw_output_mm_compaction_end':
>> include/trace/events/compaction.h:134: error: expected expression before ',' token
include/trace/events/compaction.h: In function 'trace_raw_output_mm_compaction_suitable_template':
include/trace/events/compaction.h:195: error: expected expression before ',' token
>> include/trace/events/compaction.h:195: warning: missing braces around initializer
include/trace/events/compaction.h:195: warning: (near initialization for 'symbols[0]')
>> include/trace/events/compaction.h:195: error: initializer element is not constant
include/trace/events/compaction.h:195: error: (near initialization for 'symbols[0].mask')
CONFIG_COMPACTION=n so COMPACTION_STATUS is not defined properly.
Signed-off-by: Michal Hocko <mhocko@suse.com>
---
include/trace/events/compaction.h | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h
index 2334faa56323..0a18ab6483ff 100644
--- a/include/trace/events/compaction.h
+++ b/include/trace/events/compaction.h
@@ -131,6 +131,7 @@ TRACE_EVENT(mm_compaction_begin,
__entry->sync ? "sync" : "async")
);
+#ifdef CONFIG_COMPACTION
TRACE_EVENT(mm_compaction_end,
TP_PROTO(unsigned long zone_start, unsigned long migrate_pfn,
unsigned long free_pfn, unsigned long zone_end, bool sync,
@@ -164,6 +165,7 @@ TRACE_EVENT(mm_compaction_end,
__entry->sync ? "sync" : "async",
__print_symbolic(__entry->status, COMPACTION_STATUS))
);
+#endif
TRACE_EVENT(mm_compaction_try_to_compact_pages,
@@ -192,6 +194,7 @@ TRACE_EVENT(mm_compaction_try_to_compact_pages,
__entry->prio)
);
+#ifdef CONFIG_COMPACTION
DECLARE_EVENT_CLASS(mm_compaction_suitable_template,
TP_PROTO(struct zone *zone,
@@ -239,7 +242,6 @@ DEFINE_EVENT(mm_compaction_suitable_template, mm_compaction_suitable,
TP_ARGS(zone, order, ret)
);
-#ifdef CONFIG_COMPACTION
DECLARE_EVENT_CLASS(mm_compaction_defer_template,
TP_PROTO(struct zone *zone, int order),
--
2.10.2
--
Michal Hocko
SUSE Labs
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH 1/3] mm, trace: extract COMPACTION_STATUS and ZONE_TYPE to a common header
@ 2016-12-14 19:02 ` Michal Hocko
0 siblings, 0 replies; 23+ messages in thread
From: Michal Hocko @ 2016-12-14 19:02 UTC (permalink / raw)
To: kbuild test robot
Cc: kbuild-all, Andrew Morton, Vlastimil Babka, David Rientjes,
Johannes Weiner, linux-mm, LKML
On Thu 15-12-16 01:32:06, kbuild test robot wrote:
> Hi Michal,
>
> [auto build test ERROR on tip/perf/core]
> [also build test ERROR on v4.9 next-20161214]
> [if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
>
> url: https://github.com/0day-ci/linux/commits/Michal-Hocko/mm-oom-add-oom-detection-tracepoints/20161214-231225
> config: x86_64-randconfig-s2-12142134 (attached as .config)
> compiler: gcc-4.4 (Debian 4.4.7-8) 4.4.7
> reproduce:
> # save the attached .config to linux build tree
> make ARCH=x86_64
>
> All error/warnings (new ones prefixed by >>):
>
> In file included from include/trace/trace_events.h:361,
> from include/trace/define_trace.h:95,
> from include/trace/events/compaction.h:356,
> from mm/compaction.c:43:
> include/trace/events/compaction.h: In function 'trace_raw_output_mm_compaction_end':
> >> include/trace/events/compaction.h:134: error: expected expression before ',' token
> include/trace/events/compaction.h: In function 'trace_raw_output_mm_compaction_suitable_template':
> include/trace/events/compaction.h:195: error: expected expression before ',' token
> >> include/trace/events/compaction.h:195: warning: missing braces around initializer
> include/trace/events/compaction.h:195: warning: (near initialization for 'symbols[0]')
> >> include/trace/events/compaction.h:195: error: initializer element is not constant
> include/trace/events/compaction.h:195: error: (near initialization for 'symbols[0].mask')
Interesting. I am pretty sure that my config battery has
CONFIG_COMPACTION=n. Not sure which part of your config made a change.
Anyway, I've added to my collection. And with the below diff it passes
all my configs.
---
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH 2/3] oom, trace: Add oom detection tracepoints
2016-12-14 14:53 ` Michal Hocko
@ 2016-12-14 14:53 ` Michal Hocko
-1 siblings, 0 replies; 23+ messages in thread
From: Michal Hocko @ 2016-12-14 14:53 UTC (permalink / raw)
To: Andrew Morton
Cc: Vlastimil Babka, David Rientjes, Johannes Weiner, linux-mm, LKML,
Michal Hocko
From: Michal Hocko <mhocko@suse.com>
should_reclaim_retry is the central decision point for declaring the
OOM. It might be really useful to expose data used for this decision
making when debugging an unexpected oom situations.
Say we have an OOM report:
[ 52.264001] mem_eater invoked oom-killer: gfp_mask=0x24280ca(GFP_HIGHUSER_MOVABLE|__GFP_ZERO), nodemask=0, order=0, oom_score_adj=0
[ 52.267549] CPU: 3 PID: 3148 Comm: mem_eater Tainted: G W 4.8.0-oomtrace3-00006-gb21338b386d2 #1024
Now we can check the tracepoint data to see how we have ended up in this
situation:
mem_eater-3148 [003] .... 52.432801: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11134 min_wmark=11084 no_progress_loops=1 wmark_check=1
mem_eater-3148 [003] .... 52.433269: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11103 min_wmark=11084 no_progress_loops=1 wmark_check=1
mem_eater-3148 [003] .... 52.433712: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11100 min_wmark=11084 no_progress_loops=2 wmark_check=1
mem_eater-3148 [003] .... 52.434067: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11097 min_wmark=11084 no_progress_loops=3 wmark_check=1
mem_eater-3148 [003] .... 52.434414: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11094 min_wmark=11084 no_progress_loops=4 wmark_check=1
mem_eater-3148 [003] .... 52.434761: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11091 min_wmark=11084 no_progress_loops=5 wmark_check=1
mem_eater-3148 [003] .... 52.435108: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11087 min_wmark=11084 no_progress_loops=6 wmark_check=1
mem_eater-3148 [003] .... 52.435478: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11084 min_wmark=11084 no_progress_loops=7 wmark_check=0
mem_eater-3148 [003] .... 52.435478: reclaim_retry_zone: node=0 zone=DMA order=0 reclaimable=0 available=1126 min_wmark=179 no_progress_loops=7 wmark_check=0
>From the above we can quickly deduce that the reclaim stopped making
any progress (see no_progress_loops increased in each round) and while
there were still some 51 reclaimable pages they couldn't be dropped
for some reason (vmscan trace points would tell us more about that
part). available will represent reclaimable + free_pages scaled down per
no_progress_loops factor. This is essentially an optimistic estimate of
how much memory we would have when reclaiming everything. This can be
compared to min_wmark to get a rought idea but the wmark_check tells the
result of the watermark check which is more precise (includes lowmem
reserves, considers the order etc.). As we can see no zone is eligible
in the end and that is why we have triggered the oom in this situation.
Please note that higher order requests might fail on the wmark_check even
when there is much more memory available than min_wmark - e.g. when the
memory is fragmented. A follow up tracepoint will help to debug those
situations.
Signed-off-by: Michal Hocko <mhocko@suse.com>
---
include/trace/events/oom.h | 42 ++++++++++++++++++++++++++++++++++++++++++
mm/page_alloc.c | 10 ++++++++--
2 files changed, 50 insertions(+), 2 deletions(-)
diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
index 1e974983757e..9160da7a26a0 100644
--- a/include/trace/events/oom.h
+++ b/include/trace/events/oom.h
@@ -4,6 +4,7 @@
#if !defined(_TRACE_OOM_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_OOM_H
#include <linux/tracepoint.h>
+#include <trace/events/mmflags.h>
TRACE_EVENT(oom_score_adj_update,
@@ -27,6 +28,47 @@ TRACE_EVENT(oom_score_adj_update,
__entry->pid, __entry->comm, __entry->oom_score_adj)
);
+TRACE_EVENT(reclaim_retry_zone,
+
+ TP_PROTO(struct zoneref *zoneref,
+ int order,
+ unsigned long reclaimable,
+ unsigned long available,
+ unsigned long min_wmark,
+ int no_progress_loops,
+ bool wmark_check),
+
+ TP_ARGS(zoneref, order, reclaimable, available, min_wmark, no_progress_loops, wmark_check),
+
+ TP_STRUCT__entry(
+ __field( int, node)
+ __field( int, zone_idx)
+ __field( int, order)
+ __field( unsigned long, reclaimable)
+ __field( unsigned long, available)
+ __field( unsigned long, min_wmark)
+ __field( int, no_progress_loops)
+ __field( bool, wmark_check)
+ ),
+
+ TP_fast_assign(
+ __entry->node = zone_to_nid(zoneref->zone);
+ __entry->zone_idx = zoneref->zone_idx;
+ __entry->order = order;
+ __entry->reclaimable = reclaimable;
+ __entry->available = available;
+ __entry->min_wmark = min_wmark;
+ __entry->no_progress_loops = no_progress_loops;
+ __entry->wmark_check = wmark_check;
+ ),
+
+ TP_printk("node=%d zone=%-8s order=%d reclaimable=%lu available=%lu min_wmark=%lu no_progress_loops=%d wmark_check=%d",
+ __entry->node, __print_symbolic(__entry->zone_idx, ZONE_TYPE),
+ __entry->order,
+ __entry->reclaimable, __entry->available, __entry->min_wmark,
+ __entry->no_progress_loops,
+ __entry->wmark_check)
+);
#endif
/* This part must be outside protection */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3f2c9e535f7f..23ca951a8380 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -55,6 +55,7 @@
#include <linux/kmemleak.h>
#include <linux/compaction.h>
#include <trace/events/kmem.h>
+#include <trace/events/oom.h>
#include <linux/prefetch.h>
#include <linux/mm_inline.h>
#include <linux/migrate.h>
@@ -3472,6 +3473,8 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
ac->nodemask) {
unsigned long available;
unsigned long reclaimable;
+ unsigned long min_wmark = min_wmark_pages(zone);
+ bool wmark;
available = reclaimable = zone_reclaimable_pages(zone);
available -= DIV_ROUND_UP((*no_progress_loops) * available,
@@ -3482,8 +3485,11 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
* Would the allocation succeed if we reclaimed the whole
* available?
*/
- if (__zone_watermark_ok(zone, order, min_wmark_pages(zone),
- ac_classzone_idx(ac), alloc_flags, available)) {
+ wmark = __zone_watermark_ok(zone, order, min_wmark,
+ ac_classzone_idx(ac), alloc_flags, available);
+ trace_reclaim_retry_zone(z, order, reclaimable,
+ available, min_wmark, *no_progress_loops, wmark);
+ if (wmark) {
/*
* If we didn't make any progress and have a lot of
* dirty + writeback pages then we should wait for
--
2.10.2
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH 2/3] oom, trace: Add oom detection tracepoints
@ 2016-12-14 14:53 ` Michal Hocko
0 siblings, 0 replies; 23+ messages in thread
From: Michal Hocko @ 2016-12-14 14:53 UTC (permalink / raw)
To: Andrew Morton
Cc: Vlastimil Babka, David Rientjes, Johannes Weiner, linux-mm, LKML,
Michal Hocko
From: Michal Hocko <mhocko@suse.com>
should_reclaim_retry is the central decision point for declaring the
OOM. It might be really useful to expose data used for this decision
making when debugging an unexpected oom situations.
Say we have an OOM report:
[ 52.264001] mem_eater invoked oom-killer: gfp_mask=0x24280ca(GFP_HIGHUSER_MOVABLE|__GFP_ZERO), nodemask=0, order=0, oom_score_adj=0
[ 52.267549] CPU: 3 PID: 3148 Comm: mem_eater Tainted: G W 4.8.0-oomtrace3-00006-gb21338b386d2 #1024
Now we can check the tracepoint data to see how we have ended up in this
situation:
mem_eater-3148 [003] .... 52.432801: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11134 min_wmark=11084 no_progress_loops=1 wmark_check=1
mem_eater-3148 [003] .... 52.433269: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11103 min_wmark=11084 no_progress_loops=1 wmark_check=1
mem_eater-3148 [003] .... 52.433712: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11100 min_wmark=11084 no_progress_loops=2 wmark_check=1
mem_eater-3148 [003] .... 52.434067: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11097 min_wmark=11084 no_progress_loops=3 wmark_check=1
mem_eater-3148 [003] .... 52.434414: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11094 min_wmark=11084 no_progress_loops=4 wmark_check=1
mem_eater-3148 [003] .... 52.434761: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11091 min_wmark=11084 no_progress_loops=5 wmark_check=1
mem_eater-3148 [003] .... 52.435108: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11087 min_wmark=11084 no_progress_loops=6 wmark_check=1
mem_eater-3148 [003] .... 52.435478: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11084 min_wmark=11084 no_progress_loops=7 wmark_check=0
mem_eater-3148 [003] .... 52.435478: reclaim_retry_zone: node=0 zone=DMA order=0 reclaimable=0 available=1126 min_wmark=179 no_progress_loops=7 wmark_check=0
>From the above we can quickly deduce that the reclaim stopped making
any progress (see no_progress_loops increased in each round) and while
there were still some 51 reclaimable pages they couldn't be dropped
for some reason (vmscan trace points would tell us more about that
part). available will represent reclaimable + free_pages scaled down per
no_progress_loops factor. This is essentially an optimistic estimate of
how much memory we would have when reclaiming everything. This can be
compared to min_wmark to get a rought idea but the wmark_check tells the
result of the watermark check which is more precise (includes lowmem
reserves, considers the order etc.). As we can see no zone is eligible
in the end and that is why we have triggered the oom in this situation.
Please note that higher order requests might fail on the wmark_check even
when there is much more memory available than min_wmark - e.g. when the
memory is fragmented. A follow up tracepoint will help to debug those
situations.
Signed-off-by: Michal Hocko <mhocko@suse.com>
---
include/trace/events/oom.h | 42 ++++++++++++++++++++++++++++++++++++++++++
mm/page_alloc.c | 10 ++++++++--
2 files changed, 50 insertions(+), 2 deletions(-)
diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
index 1e974983757e..9160da7a26a0 100644
--- a/include/trace/events/oom.h
+++ b/include/trace/events/oom.h
@@ -4,6 +4,7 @@
#if !defined(_TRACE_OOM_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_OOM_H
#include <linux/tracepoint.h>
+#include <trace/events/mmflags.h>
TRACE_EVENT(oom_score_adj_update,
@@ -27,6 +28,47 @@ TRACE_EVENT(oom_score_adj_update,
__entry->pid, __entry->comm, __entry->oom_score_adj)
);
+TRACE_EVENT(reclaim_retry_zone,
+
+ TP_PROTO(struct zoneref *zoneref,
+ int order,
+ unsigned long reclaimable,
+ unsigned long available,
+ unsigned long min_wmark,
+ int no_progress_loops,
+ bool wmark_check),
+
+ TP_ARGS(zoneref, order, reclaimable, available, min_wmark, no_progress_loops, wmark_check),
+
+ TP_STRUCT__entry(
+ __field( int, node)
+ __field( int, zone_idx)
+ __field( int, order)
+ __field( unsigned long, reclaimable)
+ __field( unsigned long, available)
+ __field( unsigned long, min_wmark)
+ __field( int, no_progress_loops)
+ __field( bool, wmark_check)
+ ),
+
+ TP_fast_assign(
+ __entry->node = zone_to_nid(zoneref->zone);
+ __entry->zone_idx = zoneref->zone_idx;
+ __entry->order = order;
+ __entry->reclaimable = reclaimable;
+ __entry->available = available;
+ __entry->min_wmark = min_wmark;
+ __entry->no_progress_loops = no_progress_loops;
+ __entry->wmark_check = wmark_check;
+ ),
+
+ TP_printk("node=%d zone=%-8s order=%d reclaimable=%lu available=%lu min_wmark=%lu no_progress_loops=%d wmark_check=%d",
+ __entry->node, __print_symbolic(__entry->zone_idx, ZONE_TYPE),
+ __entry->order,
+ __entry->reclaimable, __entry->available, __entry->min_wmark,
+ __entry->no_progress_loops,
+ __entry->wmark_check)
+);
#endif
/* This part must be outside protection */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3f2c9e535f7f..23ca951a8380 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -55,6 +55,7 @@
#include <linux/kmemleak.h>
#include <linux/compaction.h>
#include <trace/events/kmem.h>
+#include <trace/events/oom.h>
#include <linux/prefetch.h>
#include <linux/mm_inline.h>
#include <linux/migrate.h>
@@ -3472,6 +3473,8 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
ac->nodemask) {
unsigned long available;
unsigned long reclaimable;
+ unsigned long min_wmark = min_wmark_pages(zone);
+ bool wmark;
available = reclaimable = zone_reclaimable_pages(zone);
available -= DIV_ROUND_UP((*no_progress_loops) * available,
@@ -3482,8 +3485,11 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
* Would the allocation succeed if we reclaimed the whole
* available?
*/
- if (__zone_watermark_ok(zone, order, min_wmark_pages(zone),
- ac_classzone_idx(ac), alloc_flags, available)) {
+ wmark = __zone_watermark_ok(zone, order, min_wmark,
+ ac_classzone_idx(ac), alloc_flags, available);
+ trace_reclaim_retry_zone(z, order, reclaimable,
+ available, min_wmark, *no_progress_loops, wmark);
+ if (wmark) {
/*
* If we didn't make any progress and have a lot of
* dirty + writeback pages then we should wait for
--
2.10.2
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH 2/3] oom, trace: Add oom detection tracepoints
2016-12-14 14:53 ` Michal Hocko
@ 2016-12-14 17:17 ` Vlastimil Babka
-1 siblings, 0 replies; 23+ messages in thread
From: Vlastimil Babka @ 2016-12-14 17:17 UTC (permalink / raw)
To: Michal Hocko, Andrew Morton
Cc: David Rientjes, Johannes Weiner, linux-mm, LKML, Michal Hocko
On 12/14/2016 03:53 PM, Michal Hocko wrote:
> From: Michal Hocko <mhocko@suse.com>
I guess the Subject should be more specific to the tracepoint?
> should_reclaim_retry is the central decision point for declaring the
> OOM. It might be really useful to expose data used for this decision
> making when debugging an unexpected oom situations.
>
> Say we have an OOM report:
> [ 52.264001] mem_eater invoked oom-killer: gfp_mask=0x24280ca(GFP_HIGHUSER_MOVABLE|__GFP_ZERO), nodemask=0, order=0, oom_score_adj=0
> [ 52.267549] CPU: 3 PID: 3148 Comm: mem_eater Tainted: G W 4.8.0-oomtrace3-00006-gb21338b386d2 #1024
>
> Now we can check the tracepoint data to see how we have ended up in this
> situation:
> mem_eater-3148 [003] .... 52.432801: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11134 min_wmark=11084 no_progress_loops=1 wmark_check=1
> mem_eater-3148 [003] .... 52.433269: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11103 min_wmark=11084 no_progress_loops=1 wmark_check=1
> mem_eater-3148 [003] .... 52.433712: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11100 min_wmark=11084 no_progress_loops=2 wmark_check=1
> mem_eater-3148 [003] .... 52.434067: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11097 min_wmark=11084 no_progress_loops=3 wmark_check=1
> mem_eater-3148 [003] .... 52.434414: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11094 min_wmark=11084 no_progress_loops=4 wmark_check=1
> mem_eater-3148 [003] .... 52.434761: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11091 min_wmark=11084 no_progress_loops=5 wmark_check=1
> mem_eater-3148 [003] .... 52.435108: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11087 min_wmark=11084 no_progress_loops=6 wmark_check=1
> mem_eater-3148 [003] .... 52.435478: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11084 min_wmark=11084 no_progress_loops=7 wmark_check=0
> mem_eater-3148 [003] .... 52.435478: reclaim_retry_zone: node=0 zone=DMA order=0 reclaimable=0 available=1126 min_wmark=179 no_progress_loops=7 wmark_check=0
>
> From the above we can quickly deduce that the reclaim stopped making
> any progress (see no_progress_loops increased in each round) and while
> there were still some 51 reclaimable pages they couldn't be dropped
> for some reason (vmscan trace points would tell us more about that
> part). available will represent reclaimable + free_pages scaled down per
> no_progress_loops factor. This is essentially an optimistic estimate of
> how much memory we would have when reclaiming everything. This can be
> compared to min_wmark to get a rought idea but the wmark_check tells the
> result of the watermark check which is more precise (includes lowmem
> reserves, considers the order etc.). As we can see no zone is eligible
> in the end and that is why we have triggered the oom in this situation.
>
> Please note that higher order requests might fail on the wmark_check even
> when there is much more memory available than min_wmark - e.g. when the
> memory is fragmented. A follow up tracepoint will help to debug those
> situations.
>
> Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 2/3] oom, trace: Add oom detection tracepoints
@ 2016-12-14 17:17 ` Vlastimil Babka
0 siblings, 0 replies; 23+ messages in thread
From: Vlastimil Babka @ 2016-12-14 17:17 UTC (permalink / raw)
To: Michal Hocko, Andrew Morton
Cc: David Rientjes, Johannes Weiner, linux-mm, LKML, Michal Hocko
On 12/14/2016 03:53 PM, Michal Hocko wrote:
> From: Michal Hocko <mhocko@suse.com>
I guess the Subject should be more specific to the tracepoint?
> should_reclaim_retry is the central decision point for declaring the
> OOM. It might be really useful to expose data used for this decision
> making when debugging an unexpected oom situations.
>
> Say we have an OOM report:
> [ 52.264001] mem_eater invoked oom-killer: gfp_mask=0x24280ca(GFP_HIGHUSER_MOVABLE|__GFP_ZERO), nodemask=0, order=0, oom_score_adj=0
> [ 52.267549] CPU: 3 PID: 3148 Comm: mem_eater Tainted: G W 4.8.0-oomtrace3-00006-gb21338b386d2 #1024
>
> Now we can check the tracepoint data to see how we have ended up in this
> situation:
> mem_eater-3148 [003] .... 52.432801: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11134 min_wmark=11084 no_progress_loops=1 wmark_check=1
> mem_eater-3148 [003] .... 52.433269: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11103 min_wmark=11084 no_progress_loops=1 wmark_check=1
> mem_eater-3148 [003] .... 52.433712: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11100 min_wmark=11084 no_progress_loops=2 wmark_check=1
> mem_eater-3148 [003] .... 52.434067: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11097 min_wmark=11084 no_progress_loops=3 wmark_check=1
> mem_eater-3148 [003] .... 52.434414: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11094 min_wmark=11084 no_progress_loops=4 wmark_check=1
> mem_eater-3148 [003] .... 52.434761: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11091 min_wmark=11084 no_progress_loops=5 wmark_check=1
> mem_eater-3148 [003] .... 52.435108: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11087 min_wmark=11084 no_progress_loops=6 wmark_check=1
> mem_eater-3148 [003] .... 52.435478: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11084 min_wmark=11084 no_progress_loops=7 wmark_check=0
> mem_eater-3148 [003] .... 52.435478: reclaim_retry_zone: node=0 zone=DMA order=0 reclaimable=0 available=1126 min_wmark=179 no_progress_loops=7 wmark_check=0
>
> From the above we can quickly deduce that the reclaim stopped making
> any progress (see no_progress_loops increased in each round) and while
> there were still some 51 reclaimable pages they couldn't be dropped
> for some reason (vmscan trace points would tell us more about that
> part). available will represent reclaimable + free_pages scaled down per
> no_progress_loops factor. This is essentially an optimistic estimate of
> how much memory we would have when reclaiming everything. This can be
> compared to min_wmark to get a rought idea but the wmark_check tells the
> result of the watermark check which is more precise (includes lowmem
> reserves, considers the order etc.). As we can see no zone is eligible
> in the end and that is why we have triggered the oom in this situation.
>
> Please note that higher order requests might fail on the wmark_check even
> when there is much more memory available than min_wmark - e.g. when the
> memory is fragmented. A follow up tracepoint will help to debug those
> situations.
>
> Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH 3/3] oom, trace: add compaction retry tracepoint
2016-12-14 14:53 ` Michal Hocko
@ 2016-12-14 14:53 ` Michal Hocko
-1 siblings, 0 replies; 23+ messages in thread
From: Michal Hocko @ 2016-12-14 14:53 UTC (permalink / raw)
To: Andrew Morton
Cc: Vlastimil Babka, David Rientjes, Johannes Weiner, linux-mm, LKML,
Michal Hocko
From: Michal Hocko <mhocko@suse.com>
Higher order requests oom debugging is currently quite hard. We do have
some compaction points which can tell us how the compaction is operating
but there is no trace point to tell us about compaction retry logic.
This patch adds a one which will have the following format
bash-3126 [001] .... 1498.220001: compact_retry: order=9 priority=COMPACT_PRIO_SYNC_LIGHT compaction_result=withdrawn retries=0 max_retries=16 should_retry=0
we can see that the order 9 request is not retried even though we are in
the highest compaction priority mode becase the last compaction attempt
was withdrawn. This means that compaction_zonelist_suitable must have
returned false and there is no suitable zone to compact for this request
and so no need to retry further.
another example would be
<...>-3137 [001] .... 81.501689: compact_retry: order=9 priority=COMPACT_PRIO_SYNC_LIGHT compaction_result=failed retries=0 max_retries=16 should_retry=0
in this case the order-9 compaction failed to find any suitable
block. We do not retry anymore because this is a costly request
and those do not go below COMPACT_PRIO_SYNC_LIGHT priority.
Signed-off-by: Michal Hocko <mhocko@suse.com>
---
include/trace/events/mmflags.h | 26 ++++++++++++++++++++++++++
include/trace/events/oom.h | 39 +++++++++++++++++++++++++++++++++++++++
mm/page_alloc.c | 22 ++++++++++++++++------
3 files changed, 81 insertions(+), 6 deletions(-)
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 7e4cfede873c..aa4caa6914a9 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -187,8 +187,32 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \
EM( COMPACT_NO_SUITABLE_PAGE, "no_suitable_page") \
EM( COMPACT_NOT_SUITABLE_ZONE, "not_suitable_zone") \
EMe(COMPACT_CONTENDED, "contended")
+
+/* High-level compaction status feedback */
+#define COMPACTION_FAILED 1
+#define COMPACTION_WITHDRAWN 2
+#define COMPACTION_PROGRESS 3
+
+#define compact_result_to_feedback(result) \
+({ \
+ enum compact_result __result = result; \
+ (compaction_failed(__result)) ? COMPACTION_FAILED : \
+ (compaction_withdrawn(__result)) ? COMPACTION_WITHDRAWN : COMPACTION_PROGRESS; \
+})
+
+#define COMPACTION_FEEDBACK \
+ EM(COMPACTION_FAILED, "failed") \
+ EM(COMPACTION_WITHDRAWN, "withdrawn") \
+ EMe(COMPACTION_PROGRESS, "progress")
+
+#define COMPACTION_PRIORITY \
+ EM(COMPACT_PRIO_SYNC_FULL, "COMPACT_PRIO_SYNC_FULL") \
+ EM(COMPACT_PRIO_SYNC_LIGHT, "COMPACT_PRIO_SYNC_LIGHT") \
+ EMe(COMPACT_PRIO_ASYNC, "COMPACT_PRIO_ASYNC")
#else
#define COMPACTION_STATUS
+#define COMPACTION_PRIORITY
+#define COMPACTION_FEEDBACK
#endif
#ifdef CONFIG_ZONE_DMA
@@ -226,6 +250,8 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \
#define EMe(a, b) TRACE_DEFINE_ENUM(a);
COMPACTION_STATUS
+COMPACTION_PRIORITY
+COMPACTION_FEEDBACK
ZONE_TYPE
/*
diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
index 9160da7a26a0..e9d690665b7a 100644
--- a/include/trace/events/oom.h
+++ b/include/trace/events/oom.h
@@ -69,6 +69,45 @@ TRACE_EVENT(reclaim_retry_zone,
__entry->no_progress_loops,
__entry->wmark_check)
);
+
+#ifdef CONFIG_COMPACTION
+TRACE_EVENT(compact_retry,
+
+ TP_PROTO(int order,
+ enum compact_priority priority,
+ enum compact_result result,
+ int retries,
+ int max_retries,
+ bool ret),
+
+ TP_ARGS(order, priority, result, retries, max_retries, ret),
+
+ TP_STRUCT__entry(
+ __field( int, order)
+ __field( int, priority)
+ __field( int, result)
+ __field( int, retries)
+ __field( int, max_retries)
+ __field( bool, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->order = order;
+ __entry->priority = priority;
+ __entry->result = result;
+ __entry->retries = retries;
+ __entry->max_retries = max_retries;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("order=%d priority=%s compaction_result=%s retries=%d max_retries=%d should_retry=%d",
+ __entry->order,
+ __print_symbolic(__entry->priority, COMPACTION_PRIORITY),
+ __print_symbolic(__entry->result, COMPACTION_FEEDBACK),
+ __entry->retries, __entry->max_retries,
+ __entry->ret)
+);
+#endif /* CONFIG_COMPACTION */
#endif
/* This part must be outside protection */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 23ca951a8380..150245328882 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3201,6 +3201,9 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
{
int max_retries = MAX_COMPACT_RETRIES;
int min_priority;
+ bool ret = false;
+ int retries = *compaction_retries;
+ enum compact_priority priority = *compact_priority;
if (!order)
return false;
@@ -3222,8 +3225,10 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
* But do not retry if the given zonelist is not suitable for
* compaction.
*/
- if (compaction_withdrawn(compact_result))
- return compaction_zonelist_suitable(ac, order, alloc_flags);
+ if (compaction_withdrawn(compact_result)) {
+ ret = compaction_zonelist_suitable(ac, order, alloc_flags);
+ goto out;
+ }
/*
* !costly requests are much more important than __GFP_REPEAT
@@ -3235,8 +3240,10 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
*/
if (order > PAGE_ALLOC_COSTLY_ORDER)
max_retries /= 4;
- if (*compaction_retries <= max_retries)
- return true;
+ if (*compaction_retries <= max_retries) {
+ ret = true;
+ goto out;
+ }
/*
* Make sure there are attempts at the highest priority if we exhausted
@@ -3245,12 +3252,15 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
check_priority:
min_priority = (order > PAGE_ALLOC_COSTLY_ORDER) ?
MIN_COMPACT_COSTLY_PRIORITY : MIN_COMPACT_PRIORITY;
+
if (*compact_priority > min_priority) {
(*compact_priority)--;
*compaction_retries = 0;
- return true;
+ ret = true;
}
- return false;
+out:
+ trace_compact_retry(order, priority, compact_result, retries, max_retries, ret);
+ return ret;
}
#else
static inline struct page *
--
2.10.2
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH 3/3] oom, trace: add compaction retry tracepoint
@ 2016-12-14 14:53 ` Michal Hocko
0 siblings, 0 replies; 23+ messages in thread
From: Michal Hocko @ 2016-12-14 14:53 UTC (permalink / raw)
To: Andrew Morton
Cc: Vlastimil Babka, David Rientjes, Johannes Weiner, linux-mm, LKML,
Michal Hocko
From: Michal Hocko <mhocko@suse.com>
Higher order requests oom debugging is currently quite hard. We do have
some compaction points which can tell us how the compaction is operating
but there is no trace point to tell us about compaction retry logic.
This patch adds a one which will have the following format
bash-3126 [001] .... 1498.220001: compact_retry: order=9 priority=COMPACT_PRIO_SYNC_LIGHT compaction_result=withdrawn retries=0 max_retries=16 should_retry=0
we can see that the order 9 request is not retried even though we are in
the highest compaction priority mode becase the last compaction attempt
was withdrawn. This means that compaction_zonelist_suitable must have
returned false and there is no suitable zone to compact for this request
and so no need to retry further.
another example would be
<...>-3137 [001] .... 81.501689: compact_retry: order=9 priority=COMPACT_PRIO_SYNC_LIGHT compaction_result=failed retries=0 max_retries=16 should_retry=0
in this case the order-9 compaction failed to find any suitable
block. We do not retry anymore because this is a costly request
and those do not go below COMPACT_PRIO_SYNC_LIGHT priority.
Signed-off-by: Michal Hocko <mhocko@suse.com>
---
include/trace/events/mmflags.h | 26 ++++++++++++++++++++++++++
include/trace/events/oom.h | 39 +++++++++++++++++++++++++++++++++++++++
mm/page_alloc.c | 22 ++++++++++++++++------
3 files changed, 81 insertions(+), 6 deletions(-)
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 7e4cfede873c..aa4caa6914a9 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -187,8 +187,32 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \
EM( COMPACT_NO_SUITABLE_PAGE, "no_suitable_page") \
EM( COMPACT_NOT_SUITABLE_ZONE, "not_suitable_zone") \
EMe(COMPACT_CONTENDED, "contended")
+
+/* High-level compaction status feedback */
+#define COMPACTION_FAILED 1
+#define COMPACTION_WITHDRAWN 2
+#define COMPACTION_PROGRESS 3
+
+#define compact_result_to_feedback(result) \
+({ \
+ enum compact_result __result = result; \
+ (compaction_failed(__result)) ? COMPACTION_FAILED : \
+ (compaction_withdrawn(__result)) ? COMPACTION_WITHDRAWN : COMPACTION_PROGRESS; \
+})
+
+#define COMPACTION_FEEDBACK \
+ EM(COMPACTION_FAILED, "failed") \
+ EM(COMPACTION_WITHDRAWN, "withdrawn") \
+ EMe(COMPACTION_PROGRESS, "progress")
+
+#define COMPACTION_PRIORITY \
+ EM(COMPACT_PRIO_SYNC_FULL, "COMPACT_PRIO_SYNC_FULL") \
+ EM(COMPACT_PRIO_SYNC_LIGHT, "COMPACT_PRIO_SYNC_LIGHT") \
+ EMe(COMPACT_PRIO_ASYNC, "COMPACT_PRIO_ASYNC")
#else
#define COMPACTION_STATUS
+#define COMPACTION_PRIORITY
+#define COMPACTION_FEEDBACK
#endif
#ifdef CONFIG_ZONE_DMA
@@ -226,6 +250,8 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \
#define EMe(a, b) TRACE_DEFINE_ENUM(a);
COMPACTION_STATUS
+COMPACTION_PRIORITY
+COMPACTION_FEEDBACK
ZONE_TYPE
/*
diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
index 9160da7a26a0..e9d690665b7a 100644
--- a/include/trace/events/oom.h
+++ b/include/trace/events/oom.h
@@ -69,6 +69,45 @@ TRACE_EVENT(reclaim_retry_zone,
__entry->no_progress_loops,
__entry->wmark_check)
);
+
+#ifdef CONFIG_COMPACTION
+TRACE_EVENT(compact_retry,
+
+ TP_PROTO(int order,
+ enum compact_priority priority,
+ enum compact_result result,
+ int retries,
+ int max_retries,
+ bool ret),
+
+ TP_ARGS(order, priority, result, retries, max_retries, ret),
+
+ TP_STRUCT__entry(
+ __field( int, order)
+ __field( int, priority)
+ __field( int, result)
+ __field( int, retries)
+ __field( int, max_retries)
+ __field( bool, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->order = order;
+ __entry->priority = priority;
+ __entry->result = result;
+ __entry->retries = retries;
+ __entry->max_retries = max_retries;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("order=%d priority=%s compaction_result=%s retries=%d max_retries=%d should_retry=%d",
+ __entry->order,
+ __print_symbolic(__entry->priority, COMPACTION_PRIORITY),
+ __print_symbolic(__entry->result, COMPACTION_FEEDBACK),
+ __entry->retries, __entry->max_retries,
+ __entry->ret)
+);
+#endif /* CONFIG_COMPACTION */
#endif
/* This part must be outside protection */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 23ca951a8380..150245328882 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3201,6 +3201,9 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
{
int max_retries = MAX_COMPACT_RETRIES;
int min_priority;
+ bool ret = false;
+ int retries = *compaction_retries;
+ enum compact_priority priority = *compact_priority;
if (!order)
return false;
@@ -3222,8 +3225,10 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
* But do not retry if the given zonelist is not suitable for
* compaction.
*/
- if (compaction_withdrawn(compact_result))
- return compaction_zonelist_suitable(ac, order, alloc_flags);
+ if (compaction_withdrawn(compact_result)) {
+ ret = compaction_zonelist_suitable(ac, order, alloc_flags);
+ goto out;
+ }
/*
* !costly requests are much more important than __GFP_REPEAT
@@ -3235,8 +3240,10 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
*/
if (order > PAGE_ALLOC_COSTLY_ORDER)
max_retries /= 4;
- if (*compaction_retries <= max_retries)
- return true;
+ if (*compaction_retries <= max_retries) {
+ ret = true;
+ goto out;
+ }
/*
* Make sure there are attempts at the highest priority if we exhausted
@@ -3245,12 +3252,15 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
check_priority:
min_priority = (order > PAGE_ALLOC_COSTLY_ORDER) ?
MIN_COMPACT_COSTLY_PRIORITY : MIN_COMPACT_PRIORITY;
+
if (*compact_priority > min_priority) {
(*compact_priority)--;
*compaction_retries = 0;
- return true;
+ ret = true;
}
- return false;
+out:
+ trace_compact_retry(order, priority, compact_result, retries, max_retries, ret);
+ return ret;
}
#else
static inline struct page *
--
2.10.2
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH 3/3] oom, trace: add compaction retry tracepoint
2016-12-14 14:53 ` Michal Hocko
@ 2016-12-14 17:28 ` Vlastimil Babka
-1 siblings, 0 replies; 23+ messages in thread
From: Vlastimil Babka @ 2016-12-14 17:28 UTC (permalink / raw)
To: Michal Hocko, Andrew Morton
Cc: David Rientjes, Johannes Weiner, linux-mm, LKML, Michal Hocko
On 12/14/2016 03:53 PM, Michal Hocko wrote:
> From: Michal Hocko <mhocko@suse.com>
>
> Higher order requests oom debugging is currently quite hard. We do have
> some compaction points which can tell us how the compaction is operating
> but there is no trace point to tell us about compaction retry logic.
> This patch adds a one which will have the following format
>
> bash-3126 [001] .... 1498.220001: compact_retry: order=9 priority=COMPACT_PRIO_SYNC_LIGHT compaction_result=withdrawn retries=0 max_retries=16 should_retry=0
>
> we can see that the order 9 request is not retried even though we are in
> the highest compaction priority mode becase the last compaction attempt
> was withdrawn. This means that compaction_zonelist_suitable must have
> returned false and there is no suitable zone to compact for this request
> and so no need to retry further.
>
> another example would be
> <...>-3137 [001] .... 81.501689: compact_retry: order=9 priority=COMPACT_PRIO_SYNC_LIGHT compaction_result=failed retries=0 max_retries=16 should_retry=0
>
> in this case the order-9 compaction failed to find any suitable
> block. We do not retry anymore because this is a costly request
> and those do not go below COMPACT_PRIO_SYNC_LIGHT priority.
>
> Signed-off-by: Michal Hocko <mhocko@suse.com>
> ---
> include/trace/events/mmflags.h | 26 ++++++++++++++++++++++++++
> include/trace/events/oom.h | 39 +++++++++++++++++++++++++++++++++++++++
> mm/page_alloc.c | 22 ++++++++++++++++------
> 3 files changed, 81 insertions(+), 6 deletions(-)
>
> diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
> index 7e4cfede873c..aa4caa6914a9 100644
> --- a/include/trace/events/mmflags.h
> +++ b/include/trace/events/mmflags.h
> @@ -187,8 +187,32 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \
> EM( COMPACT_NO_SUITABLE_PAGE, "no_suitable_page") \
> EM( COMPACT_NOT_SUITABLE_ZONE, "not_suitable_zone") \
> EMe(COMPACT_CONTENDED, "contended")
> +
> +/* High-level compaction status feedback */
> +#define COMPACTION_FAILED 1
> +#define COMPACTION_WITHDRAWN 2
> +#define COMPACTION_PROGRESS 3
> +
> +#define compact_result_to_feedback(result) \
> +({ \
> + enum compact_result __result = result; \
> + (compaction_failed(__result)) ? COMPACTION_FAILED : \
> + (compaction_withdrawn(__result)) ? COMPACTION_WITHDRAWN : COMPACTION_PROGRESS; \
> +})
It seems you forgot to actually use this "function" (sorry, didn't notice
earlier) so currently it's translating enum compact_result directly into the
failed/withdrawn/progress strings, which is wrong.
The correct place for the result->feedback conversion should be TP_fast_assign,
so __entry->result should become __entry->feedback. It's too late in TP_printk,
as userspace tools (e.g. trace-cmd) won't know the functions that
compact_result_to_feedback() uses.
> +
> +#define COMPACTION_FEEDBACK \
> + EM(COMPACTION_FAILED, "failed") \
> + EM(COMPACTION_WITHDRAWN, "withdrawn") \
> + EMe(COMPACTION_PROGRESS, "progress")
> +
> +#define COMPACTION_PRIORITY \
> + EM(COMPACT_PRIO_SYNC_FULL, "COMPACT_PRIO_SYNC_FULL") \
> + EM(COMPACT_PRIO_SYNC_LIGHT, "COMPACT_PRIO_SYNC_LIGHT") \
> + EMe(COMPACT_PRIO_ASYNC, "COMPACT_PRIO_ASYNC")
> #else
> #define COMPACTION_STATUS
> +#define COMPACTION_PRIORITY
> +#define COMPACTION_FEEDBACK
> #endif
>
> #ifdef CONFIG_ZONE_DMA
> @@ -226,6 +250,8 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \
> #define EMe(a, b) TRACE_DEFINE_ENUM(a);
>
> COMPACTION_STATUS
> +COMPACTION_PRIORITY
> +COMPACTION_FEEDBACK
> ZONE_TYPE
>
> /*
> diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
> index 9160da7a26a0..e9d690665b7a 100644
> --- a/include/trace/events/oom.h
> +++ b/include/trace/events/oom.h
> @@ -69,6 +69,45 @@ TRACE_EVENT(reclaim_retry_zone,
> __entry->no_progress_loops,
> __entry->wmark_check)
> );
> +
> +#ifdef CONFIG_COMPACTION
> +TRACE_EVENT(compact_retry,
> +
> + TP_PROTO(int order,
> + enum compact_priority priority,
> + enum compact_result result,
> + int retries,
> + int max_retries,
> + bool ret),
> +
> + TP_ARGS(order, priority, result, retries, max_retries, ret),
> +
> + TP_STRUCT__entry(
> + __field( int, order)
> + __field( int, priority)
> + __field( int, result)
> + __field( int, retries)
> + __field( int, max_retries)
> + __field( bool, ret)
> + ),
> +
> + TP_fast_assign(
> + __entry->order = order;
> + __entry->priority = priority;
> + __entry->result = result;
> + __entry->retries = retries;
> + __entry->max_retries = max_retries;
> + __entry->ret = ret;
> + ),
> +
> + TP_printk("order=%d priority=%s compaction_result=%s retries=%d max_retries=%d should_retry=%d",
> + __entry->order,
> + __print_symbolic(__entry->priority, COMPACTION_PRIORITY),
> + __print_symbolic(__entry->result, COMPACTION_FEEDBACK),
> + __entry->retries, __entry->max_retries,
> + __entry->ret)
> +);
> +#endif /* CONFIG_COMPACTION */
> #endif
>
> /* This part must be outside protection */
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 23ca951a8380..150245328882 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -3201,6 +3201,9 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
> {
> int max_retries = MAX_COMPACT_RETRIES;
> int min_priority;
> + bool ret = false;
> + int retries = *compaction_retries;
> + enum compact_priority priority = *compact_priority;
>
> if (!order)
> return false;
> @@ -3222,8 +3225,10 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
> * But do not retry if the given zonelist is not suitable for
> * compaction.
> */
> - if (compaction_withdrawn(compact_result))
> - return compaction_zonelist_suitable(ac, order, alloc_flags);
> + if (compaction_withdrawn(compact_result)) {
> + ret = compaction_zonelist_suitable(ac, order, alloc_flags);
> + goto out;
> + }
>
> /*
> * !costly requests are much more important than __GFP_REPEAT
> @@ -3235,8 +3240,10 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
> */
> if (order > PAGE_ALLOC_COSTLY_ORDER)
> max_retries /= 4;
> - if (*compaction_retries <= max_retries)
> - return true;
> + if (*compaction_retries <= max_retries) {
> + ret = true;
> + goto out;
> + }
>
> /*
> * Make sure there are attempts at the highest priority if we exhausted
> @@ -3245,12 +3252,15 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
> check_priority:
> min_priority = (order > PAGE_ALLOC_COSTLY_ORDER) ?
> MIN_COMPACT_COSTLY_PRIORITY : MIN_COMPACT_PRIORITY;
> +
> if (*compact_priority > min_priority) {
> (*compact_priority)--;
> *compaction_retries = 0;
> - return true;
> + ret = true;
> }
> - return false;
> +out:
> + trace_compact_retry(order, priority, compact_result, retries, max_retries, ret);
> + return ret;
> }
> #else
> static inline struct page *
>
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 3/3] oom, trace: add compaction retry tracepoint
@ 2016-12-14 17:28 ` Vlastimil Babka
0 siblings, 0 replies; 23+ messages in thread
From: Vlastimil Babka @ 2016-12-14 17:28 UTC (permalink / raw)
To: Michal Hocko, Andrew Morton
Cc: David Rientjes, Johannes Weiner, linux-mm, LKML, Michal Hocko
On 12/14/2016 03:53 PM, Michal Hocko wrote:
> From: Michal Hocko <mhocko@suse.com>
>
> Higher order requests oom debugging is currently quite hard. We do have
> some compaction points which can tell us how the compaction is operating
> but there is no trace point to tell us about compaction retry logic.
> This patch adds a one which will have the following format
>
> bash-3126 [001] .... 1498.220001: compact_retry: order=9 priority=COMPACT_PRIO_SYNC_LIGHT compaction_result=withdrawn retries=0 max_retries=16 should_retry=0
>
> we can see that the order 9 request is not retried even though we are in
> the highest compaction priority mode becase the last compaction attempt
> was withdrawn. This means that compaction_zonelist_suitable must have
> returned false and there is no suitable zone to compact for this request
> and so no need to retry further.
>
> another example would be
> <...>-3137 [001] .... 81.501689: compact_retry: order=9 priority=COMPACT_PRIO_SYNC_LIGHT compaction_result=failed retries=0 max_retries=16 should_retry=0
>
> in this case the order-9 compaction failed to find any suitable
> block. We do not retry anymore because this is a costly request
> and those do not go below COMPACT_PRIO_SYNC_LIGHT priority.
>
> Signed-off-by: Michal Hocko <mhocko@suse.com>
> ---
> include/trace/events/mmflags.h | 26 ++++++++++++++++++++++++++
> include/trace/events/oom.h | 39 +++++++++++++++++++++++++++++++++++++++
> mm/page_alloc.c | 22 ++++++++++++++++------
> 3 files changed, 81 insertions(+), 6 deletions(-)
>
> diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
> index 7e4cfede873c..aa4caa6914a9 100644
> --- a/include/trace/events/mmflags.h
> +++ b/include/trace/events/mmflags.h
> @@ -187,8 +187,32 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \
> EM( COMPACT_NO_SUITABLE_PAGE, "no_suitable_page") \
> EM( COMPACT_NOT_SUITABLE_ZONE, "not_suitable_zone") \
> EMe(COMPACT_CONTENDED, "contended")
> +
> +/* High-level compaction status feedback */
> +#define COMPACTION_FAILED 1
> +#define COMPACTION_WITHDRAWN 2
> +#define COMPACTION_PROGRESS 3
> +
> +#define compact_result_to_feedback(result) \
> +({ \
> + enum compact_result __result = result; \
> + (compaction_failed(__result)) ? COMPACTION_FAILED : \
> + (compaction_withdrawn(__result)) ? COMPACTION_WITHDRAWN : COMPACTION_PROGRESS; \
> +})
It seems you forgot to actually use this "function" (sorry, didn't notice
earlier) so currently it's translating enum compact_result directly into the
failed/withdrawn/progress strings, which is wrong.
The correct place for the result->feedback conversion should be TP_fast_assign,
so __entry->result should become __entry->feedback. It's too late in TP_printk,
as userspace tools (e.g. trace-cmd) won't know the functions that
compact_result_to_feedback() uses.
> +
> +#define COMPACTION_FEEDBACK \
> + EM(COMPACTION_FAILED, "failed") \
> + EM(COMPACTION_WITHDRAWN, "withdrawn") \
> + EMe(COMPACTION_PROGRESS, "progress")
> +
> +#define COMPACTION_PRIORITY \
> + EM(COMPACT_PRIO_SYNC_FULL, "COMPACT_PRIO_SYNC_FULL") \
> + EM(COMPACT_PRIO_SYNC_LIGHT, "COMPACT_PRIO_SYNC_LIGHT") \
> + EMe(COMPACT_PRIO_ASYNC, "COMPACT_PRIO_ASYNC")
> #else
> #define COMPACTION_STATUS
> +#define COMPACTION_PRIORITY
> +#define COMPACTION_FEEDBACK
> #endif
>
> #ifdef CONFIG_ZONE_DMA
> @@ -226,6 +250,8 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \
> #define EMe(a, b) TRACE_DEFINE_ENUM(a);
>
> COMPACTION_STATUS
> +COMPACTION_PRIORITY
> +COMPACTION_FEEDBACK
> ZONE_TYPE
>
> /*
> diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
> index 9160da7a26a0..e9d690665b7a 100644
> --- a/include/trace/events/oom.h
> +++ b/include/trace/events/oom.h
> @@ -69,6 +69,45 @@ TRACE_EVENT(reclaim_retry_zone,
> __entry->no_progress_loops,
> __entry->wmark_check)
> );
> +
> +#ifdef CONFIG_COMPACTION
> +TRACE_EVENT(compact_retry,
> +
> + TP_PROTO(int order,
> + enum compact_priority priority,
> + enum compact_result result,
> + int retries,
> + int max_retries,
> + bool ret),
> +
> + TP_ARGS(order, priority, result, retries, max_retries, ret),
> +
> + TP_STRUCT__entry(
> + __field( int, order)
> + __field( int, priority)
> + __field( int, result)
> + __field( int, retries)
> + __field( int, max_retries)
> + __field( bool, ret)
> + ),
> +
> + TP_fast_assign(
> + __entry->order = order;
> + __entry->priority = priority;
> + __entry->result = result;
> + __entry->retries = retries;
> + __entry->max_retries = max_retries;
> + __entry->ret = ret;
> + ),
> +
> + TP_printk("order=%d priority=%s compaction_result=%s retries=%d max_retries=%d should_retry=%d",
> + __entry->order,
> + __print_symbolic(__entry->priority, COMPACTION_PRIORITY),
> + __print_symbolic(__entry->result, COMPACTION_FEEDBACK),
> + __entry->retries, __entry->max_retries,
> + __entry->ret)
> +);
> +#endif /* CONFIG_COMPACTION */
> #endif
>
> /* This part must be outside protection */
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 23ca951a8380..150245328882 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -3201,6 +3201,9 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
> {
> int max_retries = MAX_COMPACT_RETRIES;
> int min_priority;
> + bool ret = false;
> + int retries = *compaction_retries;
> + enum compact_priority priority = *compact_priority;
>
> if (!order)
> return false;
> @@ -3222,8 +3225,10 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
> * But do not retry if the given zonelist is not suitable for
> * compaction.
> */
> - if (compaction_withdrawn(compact_result))
> - return compaction_zonelist_suitable(ac, order, alloc_flags);
> + if (compaction_withdrawn(compact_result)) {
> + ret = compaction_zonelist_suitable(ac, order, alloc_flags);
> + goto out;
> + }
>
> /*
> * !costly requests are much more important than __GFP_REPEAT
> @@ -3235,8 +3240,10 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
> */
> if (order > PAGE_ALLOC_COSTLY_ORDER)
> max_retries /= 4;
> - if (*compaction_retries <= max_retries)
> - return true;
> + if (*compaction_retries <= max_retries) {
> + ret = true;
> + goto out;
> + }
>
> /*
> * Make sure there are attempts at the highest priority if we exhausted
> @@ -3245,12 +3252,15 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
> check_priority:
> min_priority = (order > PAGE_ALLOC_COSTLY_ORDER) ?
> MIN_COMPACT_COSTLY_PRIORITY : MIN_COMPACT_PRIORITY;
> +
> if (*compact_priority > min_priority) {
> (*compact_priority)--;
> *compaction_retries = 0;
> - return true;
> + ret = true;
> }
> - return false;
> +out:
> + trace_compact_retry(order, priority, compact_result, retries, max_retries, ret);
> + return ret;
> }
> #else
> static inline struct page *
>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 3/3] oom, trace: add compaction retry tracepoint
2016-12-14 17:28 ` Vlastimil Babka
@ 2016-12-14 18:11 ` Michal Hocko
-1 siblings, 0 replies; 23+ messages in thread
From: Michal Hocko @ 2016-12-14 18:11 UTC (permalink / raw)
To: Vlastimil Babka
Cc: Andrew Morton, David Rientjes, Johannes Weiner, linux-mm, LKML
On Wed 14-12-16 18:28:38, Vlastimil Babka wrote:
> On 12/14/2016 03:53 PM, Michal Hocko wrote:
> > From: Michal Hocko <mhocko@suse.com>
> >
> > Higher order requests oom debugging is currently quite hard. We do have
> > some compaction points which can tell us how the compaction is operating
> > but there is no trace point to tell us about compaction retry logic.
> > This patch adds a one which will have the following format
> >
> > bash-3126 [001] .... 1498.220001: compact_retry: order=9 priority=COMPACT_PRIO_SYNC_LIGHT compaction_result=withdrawn retries=0 max_retries=16 should_retry=0
> >
> > we can see that the order 9 request is not retried even though we are in
> > the highest compaction priority mode becase the last compaction attempt
> > was withdrawn. This means that compaction_zonelist_suitable must have
> > returned false and there is no suitable zone to compact for this request
> > and so no need to retry further.
> >
> > another example would be
> > <...>-3137 [001] .... 81.501689: compact_retry: order=9 priority=COMPACT_PRIO_SYNC_LIGHT compaction_result=failed retries=0 max_retries=16 should_retry=0
> >
> > in this case the order-9 compaction failed to find any suitable
> > block. We do not retry anymore because this is a costly request
> > and those do not go below COMPACT_PRIO_SYNC_LIGHT priority.
> >
> > Signed-off-by: Michal Hocko <mhocko@suse.com>
> > ---
> > include/trace/events/mmflags.h | 26 ++++++++++++++++++++++++++
> > include/trace/events/oom.h | 39 +++++++++++++++++++++++++++++++++++++++
> > mm/page_alloc.c | 22 ++++++++++++++++------
> > 3 files changed, 81 insertions(+), 6 deletions(-)
> >
> > diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
> > index 7e4cfede873c..aa4caa6914a9 100644
> > --- a/include/trace/events/mmflags.h
> > +++ b/include/trace/events/mmflags.h
> > @@ -187,8 +187,32 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \
> > EM( COMPACT_NO_SUITABLE_PAGE, "no_suitable_page") \
> > EM( COMPACT_NOT_SUITABLE_ZONE, "not_suitable_zone") \
> > EMe(COMPACT_CONTENDED, "contended")
> > +
> > +/* High-level compaction status feedback */
> > +#define COMPACTION_FAILED 1
> > +#define COMPACTION_WITHDRAWN 2
> > +#define COMPACTION_PROGRESS 3
> > +
> > +#define compact_result_to_feedback(result) \
> > +({ \
> > + enum compact_result __result = result; \
> > + (compaction_failed(__result)) ? COMPACTION_FAILED : \
> > + (compaction_withdrawn(__result)) ? COMPACTION_WITHDRAWN : COMPACTION_PROGRESS; \
> > +})
>
> It seems you forgot to actually use this "function" (sorry, didn't notice
> earlier) so currently it's translating enum compact_result directly into the
> failed/withdrawn/progress strings, which is wrong.
You are right. I've screwed while integrating the enum translation part.
> The correct place for the result->feedback conversion should be
> TP_fast_assign, so __entry->result should become __entry->feedback. It's too
> late in TP_printk, as userspace tools (e.g. trace-cmd) won't know the
> functions that compact_result_to_feedback() uses.
Thanks. The follow up fix should be
---
>From 82c9064698594d694b052d3847c905e90becc7c5 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Wed, 14 Dec 2016 19:09:10 +0100
Subject: [PATCH] fold me "oom, trace: add compaction retry tracepoint"
Do not forget to translate compact_result into a highlevel constants
as per Vlastimil
Signed-off-by: Michal Hocko <mhocko@suse.com>
---
include/trace/events/oom.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
index e9d690665b7a..38baeb27221a 100644
--- a/include/trace/events/oom.h
+++ b/include/trace/events/oom.h
@@ -94,7 +94,7 @@ TRACE_EVENT(compact_retry,
TP_fast_assign(
__entry->order = order;
__entry->priority = priority;
- __entry->result = result;
+ __entry->result = compact_result_to_feedback(result);
__entry->retries = retries;
__entry->max_retries = max_retries;
__entry->ret = ret;
--
2.10.2
--
Michal Hocko
SUSE Labs
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH 3/3] oom, trace: add compaction retry tracepoint
@ 2016-12-14 18:11 ` Michal Hocko
0 siblings, 0 replies; 23+ messages in thread
From: Michal Hocko @ 2016-12-14 18:11 UTC (permalink / raw)
To: Vlastimil Babka
Cc: Andrew Morton, David Rientjes, Johannes Weiner, linux-mm, LKML
On Wed 14-12-16 18:28:38, Vlastimil Babka wrote:
> On 12/14/2016 03:53 PM, Michal Hocko wrote:
> > From: Michal Hocko <mhocko@suse.com>
> >
> > Higher order requests oom debugging is currently quite hard. We do have
> > some compaction points which can tell us how the compaction is operating
> > but there is no trace point to tell us about compaction retry logic.
> > This patch adds a one which will have the following format
> >
> > bash-3126 [001] .... 1498.220001: compact_retry: order=9 priority=COMPACT_PRIO_SYNC_LIGHT compaction_result=withdrawn retries=0 max_retries=16 should_retry=0
> >
> > we can see that the order 9 request is not retried even though we are in
> > the highest compaction priority mode becase the last compaction attempt
> > was withdrawn. This means that compaction_zonelist_suitable must have
> > returned false and there is no suitable zone to compact for this request
> > and so no need to retry further.
> >
> > another example would be
> > <...>-3137 [001] .... 81.501689: compact_retry: order=9 priority=COMPACT_PRIO_SYNC_LIGHT compaction_result=failed retries=0 max_retries=16 should_retry=0
> >
> > in this case the order-9 compaction failed to find any suitable
> > block. We do not retry anymore because this is a costly request
> > and those do not go below COMPACT_PRIO_SYNC_LIGHT priority.
> >
> > Signed-off-by: Michal Hocko <mhocko@suse.com>
> > ---
> > include/trace/events/mmflags.h | 26 ++++++++++++++++++++++++++
> > include/trace/events/oom.h | 39 +++++++++++++++++++++++++++++++++++++++
> > mm/page_alloc.c | 22 ++++++++++++++++------
> > 3 files changed, 81 insertions(+), 6 deletions(-)
> >
> > diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
> > index 7e4cfede873c..aa4caa6914a9 100644
> > --- a/include/trace/events/mmflags.h
> > +++ b/include/trace/events/mmflags.h
> > @@ -187,8 +187,32 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \
> > EM( COMPACT_NO_SUITABLE_PAGE, "no_suitable_page") \
> > EM( COMPACT_NOT_SUITABLE_ZONE, "not_suitable_zone") \
> > EMe(COMPACT_CONTENDED, "contended")
> > +
> > +/* High-level compaction status feedback */
> > +#define COMPACTION_FAILED 1
> > +#define COMPACTION_WITHDRAWN 2
> > +#define COMPACTION_PROGRESS 3
> > +
> > +#define compact_result_to_feedback(result) \
> > +({ \
> > + enum compact_result __result = result; \
> > + (compaction_failed(__result)) ? COMPACTION_FAILED : \
> > + (compaction_withdrawn(__result)) ? COMPACTION_WITHDRAWN : COMPACTION_PROGRESS; \
> > +})
>
> It seems you forgot to actually use this "function" (sorry, didn't notice
> earlier) so currently it's translating enum compact_result directly into the
> failed/withdrawn/progress strings, which is wrong.
You are right. I've screwed while integrating the enum translation part.
> The correct place for the result->feedback conversion should be
> TP_fast_assign, so __entry->result should become __entry->feedback. It's too
> late in TP_printk, as userspace tools (e.g. trace-cmd) won't know the
> functions that compact_result_to_feedback() uses.
Thanks. The follow up fix should be
---
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 3/3] oom, trace: add compaction retry tracepoint
2016-12-14 18:11 ` Michal Hocko
@ 2016-12-15 8:18 ` Vlastimil Babka
-1 siblings, 0 replies; 23+ messages in thread
From: Vlastimil Babka @ 2016-12-15 8:18 UTC (permalink / raw)
To: Michal Hocko
Cc: Andrew Morton, David Rientjes, Johannes Weiner, linux-mm, LKML
On 12/14/2016 07:11 PM, Michal Hocko wrote:
> On Wed 14-12-16 18:28:38, Vlastimil Babka wrote:
>> On 12/14/2016 03:53 PM, Michal Hocko wrote:
>>> From: Michal Hocko <mhocko@suse.com>
>>>
>>> Higher order requests oom debugging is currently quite hard. We do have
>>> some compaction points which can tell us how the compaction is operating
>>> but there is no trace point to tell us about compaction retry logic.
>>> This patch adds a one which will have the following format
>>>
>>> bash-3126 [001] .... 1498.220001: compact_retry: order=9 priority=COMPACT_PRIO_SYNC_LIGHT compaction_result=withdrawn retries=0 max_retries=16 should_retry=0
>>>
>>> we can see that the order 9 request is not retried even though we are in
>>> the highest compaction priority mode becase the last compaction attempt
>>> was withdrawn. This means that compaction_zonelist_suitable must have
>>> returned false and there is no suitable zone to compact for this request
>>> and so no need to retry further.
>>>
>>> another example would be
>>> <...>-3137 [001] .... 81.501689: compact_retry: order=9 priority=COMPACT_PRIO_SYNC_LIGHT compaction_result=failed retries=0 max_retries=16 should_retry=0
>>>
>>> in this case the order-9 compaction failed to find any suitable
>>> block. We do not retry anymore because this is a costly request
>>> and those do not go below COMPACT_PRIO_SYNC_LIGHT priority.
>>>
>>> Signed-off-by: Michal Hocko <mhocko@suse.com>
>>> ---
>>> include/trace/events/mmflags.h | 26 ++++++++++++++++++++++++++
>>> include/trace/events/oom.h | 39 +++++++++++++++++++++++++++++++++++++++
>>> mm/page_alloc.c | 22 ++++++++++++++++------
>>> 3 files changed, 81 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
>>> index 7e4cfede873c..aa4caa6914a9 100644
>>> --- a/include/trace/events/mmflags.h
>>> +++ b/include/trace/events/mmflags.h
>>> @@ -187,8 +187,32 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \
>>> EM( COMPACT_NO_SUITABLE_PAGE, "no_suitable_page") \
>>> EM( COMPACT_NOT_SUITABLE_ZONE, "not_suitable_zone") \
>>> EMe(COMPACT_CONTENDED, "contended")
>>> +
>>> +/* High-level compaction status feedback */
>>> +#define COMPACTION_FAILED 1
>>> +#define COMPACTION_WITHDRAWN 2
>>> +#define COMPACTION_PROGRESS 3
>>> +
>>> +#define compact_result_to_feedback(result) \
>>> +({ \
>>> + enum compact_result __result = result; \
>>> + (compaction_failed(__result)) ? COMPACTION_FAILED : \
>>> + (compaction_withdrawn(__result)) ? COMPACTION_WITHDRAWN : COMPACTION_PROGRESS; \
>>> +})
>>
>> It seems you forgot to actually use this "function" (sorry, didn't notice
>> earlier) so currently it's translating enum compact_result directly into the
>> failed/withdrawn/progress strings, which is wrong.
>
> You are right. I've screwed while integrating the enum translation part.
>
>> The correct place for the result->feedback conversion should be
>> TP_fast_assign, so __entry->result should become __entry->feedback. It's too
>> late in TP_printk, as userspace tools (e.g. trace-cmd) won't know the
>> functions that compact_result_to_feedback() uses.
>
> Thanks. The follow up fix should be
> ---
> From 82c9064698594d694b052d3847c905e90becc7c5 Mon Sep 17 00:00:00 2001
> From: Michal Hocko <mhocko@suse.com>
> Date: Wed, 14 Dec 2016 19:09:10 +0100
> Subject: [PATCH] fold me "oom, trace: add compaction retry tracepoint"
>
> Do not forget to translate compact_result into a highlevel constants
> as per Vlastimil
>
> Signed-off-by: Michal Hocko <mhocko@suse.com>
OK, with that:
Acked-by: Vlastimil Babka <vbabka@suse.cz>
> ---
> include/trace/events/oom.h | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
> index e9d690665b7a..38baeb27221a 100644
> --- a/include/trace/events/oom.h
> +++ b/include/trace/events/oom.h
> @@ -94,7 +94,7 @@ TRACE_EVENT(compact_retry,
> TP_fast_assign(
> __entry->order = order;
> __entry->priority = priority;
> - __entry->result = result;
> + __entry->result = compact_result_to_feedback(result);
> __entry->retries = retries;
> __entry->max_retries = max_retries;
> __entry->ret = ret;
>
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 3/3] oom, trace: add compaction retry tracepoint
@ 2016-12-15 8:18 ` Vlastimil Babka
0 siblings, 0 replies; 23+ messages in thread
From: Vlastimil Babka @ 2016-12-15 8:18 UTC (permalink / raw)
To: Michal Hocko
Cc: Andrew Morton, David Rientjes, Johannes Weiner, linux-mm, LKML
On 12/14/2016 07:11 PM, Michal Hocko wrote:
> On Wed 14-12-16 18:28:38, Vlastimil Babka wrote:
>> On 12/14/2016 03:53 PM, Michal Hocko wrote:
>>> From: Michal Hocko <mhocko@suse.com>
>>>
>>> Higher order requests oom debugging is currently quite hard. We do have
>>> some compaction points which can tell us how the compaction is operating
>>> but there is no trace point to tell us about compaction retry logic.
>>> This patch adds a one which will have the following format
>>>
>>> bash-3126 [001] .... 1498.220001: compact_retry: order=9 priority=COMPACT_PRIO_SYNC_LIGHT compaction_result=withdrawn retries=0 max_retries=16 should_retry=0
>>>
>>> we can see that the order 9 request is not retried even though we are in
>>> the highest compaction priority mode becase the last compaction attempt
>>> was withdrawn. This means that compaction_zonelist_suitable must have
>>> returned false and there is no suitable zone to compact for this request
>>> and so no need to retry further.
>>>
>>> another example would be
>>> <...>-3137 [001] .... 81.501689: compact_retry: order=9 priority=COMPACT_PRIO_SYNC_LIGHT compaction_result=failed retries=0 max_retries=16 should_retry=0
>>>
>>> in this case the order-9 compaction failed to find any suitable
>>> block. We do not retry anymore because this is a costly request
>>> and those do not go below COMPACT_PRIO_SYNC_LIGHT priority.
>>>
>>> Signed-off-by: Michal Hocko <mhocko@suse.com>
>>> ---
>>> include/trace/events/mmflags.h | 26 ++++++++++++++++++++++++++
>>> include/trace/events/oom.h | 39 +++++++++++++++++++++++++++++++++++++++
>>> mm/page_alloc.c | 22 ++++++++++++++++------
>>> 3 files changed, 81 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
>>> index 7e4cfede873c..aa4caa6914a9 100644
>>> --- a/include/trace/events/mmflags.h
>>> +++ b/include/trace/events/mmflags.h
>>> @@ -187,8 +187,32 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \
>>> EM( COMPACT_NO_SUITABLE_PAGE, "no_suitable_page") \
>>> EM( COMPACT_NOT_SUITABLE_ZONE, "not_suitable_zone") \
>>> EMe(COMPACT_CONTENDED, "contended")
>>> +
>>> +/* High-level compaction status feedback */
>>> +#define COMPACTION_FAILED 1
>>> +#define COMPACTION_WITHDRAWN 2
>>> +#define COMPACTION_PROGRESS 3
>>> +
>>> +#define compact_result_to_feedback(result) \
>>> +({ \
>>> + enum compact_result __result = result; \
>>> + (compaction_failed(__result)) ? COMPACTION_FAILED : \
>>> + (compaction_withdrawn(__result)) ? COMPACTION_WITHDRAWN : COMPACTION_PROGRESS; \
>>> +})
>>
>> It seems you forgot to actually use this "function" (sorry, didn't notice
>> earlier) so currently it's translating enum compact_result directly into the
>> failed/withdrawn/progress strings, which is wrong.
>
> You are right. I've screwed while integrating the enum translation part.
>
>> The correct place for the result->feedback conversion should be
>> TP_fast_assign, so __entry->result should become __entry->feedback. It's too
>> late in TP_printk, as userspace tools (e.g. trace-cmd) won't know the
>> functions that compact_result_to_feedback() uses.
>
> Thanks. The follow up fix should be
> ---
> From 82c9064698594d694b052d3847c905e90becc7c5 Mon Sep 17 00:00:00 2001
> From: Michal Hocko <mhocko@suse.com>
> Date: Wed, 14 Dec 2016 19:09:10 +0100
> Subject: [PATCH] fold me "oom, trace: add compaction retry tracepoint"
>
> Do not forget to translate compact_result into a highlevel constants
> as per Vlastimil
>
> Signed-off-by: Michal Hocko <mhocko@suse.com>
OK, with that:
Acked-by: Vlastimil Babka <vbabka@suse.cz>
> ---
> include/trace/events/oom.h | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
> index e9d690665b7a..38baeb27221a 100644
> --- a/include/trace/events/oom.h
> +++ b/include/trace/events/oom.h
> @@ -94,7 +94,7 @@ TRACE_EVENT(compact_retry,
> TP_fast_assign(
> __entry->order = order;
> __entry->priority = priority;
> - __entry->result = result;
> + __entry->result = compact_result_to_feedback(result);
> __entry->retries = retries;
> __entry->max_retries = max_retries;
> __entry->ret = ret;
>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH 2/3] oom, trace: Add oom detection tracepoints
2016-12-20 13:01 [PATCH 0/3 v2] mm, oom: add oom detection tracepoints Michal Hocko
@ 2016-12-20 13:01 ` Michal Hocko
0 siblings, 0 replies; 23+ messages in thread
From: Michal Hocko @ 2016-12-20 13:01 UTC (permalink / raw)
To: Andrew Morton
Cc: Vlastimil Babka, David Rientjes, Johannes Weiner, linux-mm, LKML,
Michal Hocko
From: Michal Hocko <mhocko@suse.com>
should_reclaim_retry is the central decision point for declaring the
OOM. It might be really useful to expose data used for this decision
making when debugging an unexpected oom situations.
Say we have an OOM report:
[ 52.264001] mem_eater invoked oom-killer: gfp_mask=0x24280ca(GFP_HIGHUSER_MOVABLE|__GFP_ZERO), nodemask=0, order=0, oom_score_adj=0
[ 52.267549] CPU: 3 PID: 3148 Comm: mem_eater Tainted: G W 4.8.0-oomtrace3-00006-gb21338b386d2 #1024
Now we can check the tracepoint data to see how we have ended up in this
situation:
mem_eater-3148 [003] .... 52.432801: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11134 min_wmark=11084 no_progress_loops=1 wmark_check=1
mem_eater-3148 [003] .... 52.433269: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11103 min_wmark=11084 no_progress_loops=1 wmark_check=1
mem_eater-3148 [003] .... 52.433712: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11100 min_wmark=11084 no_progress_loops=2 wmark_check=1
mem_eater-3148 [003] .... 52.434067: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11097 min_wmark=11084 no_progress_loops=3 wmark_check=1
mem_eater-3148 [003] .... 52.434414: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11094 min_wmark=11084 no_progress_loops=4 wmark_check=1
mem_eater-3148 [003] .... 52.434761: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11091 min_wmark=11084 no_progress_loops=5 wmark_check=1
mem_eater-3148 [003] .... 52.435108: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11087 min_wmark=11084 no_progress_loops=6 wmark_check=1
mem_eater-3148 [003] .... 52.435478: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11084 min_wmark=11084 no_progress_loops=7 wmark_check=0
mem_eater-3148 [003] .... 52.435478: reclaim_retry_zone: node=0 zone=DMA order=0 reclaimable=0 available=1126 min_wmark=179 no_progress_loops=7 wmark_check=0
>From the above we can quickly deduce that the reclaim stopped making
any progress (see no_progress_loops increased in each round) and while
there were still some 51 reclaimable pages they couldn't be dropped
for some reason (vmscan trace points would tell us more about that
part). available will represent reclaimable + free_pages scaled down per
no_progress_loops factor. This is essentially an optimistic estimate of
how much memory we would have when reclaiming everything. This can be
compared to min_wmark to get a rought idea but the wmark_check tells the
result of the watermark check which is more precise (includes lowmem
reserves, considers the order etc.). As we can see no zone is eligible
in the end and that is why we have triggered the oom in this situation.
Please note that higher order requests might fail on the wmark_check even
when there is much more memory available than min_wmark - e.g. when the
memory is fragmented. A follow up tracepoint will help to debug those
situations.
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
include/trace/events/oom.h | 42 ++++++++++++++++++++++++++++++++++++++++++
mm/page_alloc.c | 10 ++++++++--
2 files changed, 50 insertions(+), 2 deletions(-)
diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
index 1e974983757e..9160da7a26a0 100644
--- a/include/trace/events/oom.h
+++ b/include/trace/events/oom.h
@@ -4,6 +4,7 @@
#if !defined(_TRACE_OOM_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_OOM_H
#include <linux/tracepoint.h>
+#include <trace/events/mmflags.h>
TRACE_EVENT(oom_score_adj_update,
@@ -27,6 +28,47 @@ TRACE_EVENT(oom_score_adj_update,
__entry->pid, __entry->comm, __entry->oom_score_adj)
);
+TRACE_EVENT(reclaim_retry_zone,
+
+ TP_PROTO(struct zoneref *zoneref,
+ int order,
+ unsigned long reclaimable,
+ unsigned long available,
+ unsigned long min_wmark,
+ int no_progress_loops,
+ bool wmark_check),
+
+ TP_ARGS(zoneref, order, reclaimable, available, min_wmark, no_progress_loops, wmark_check),
+
+ TP_STRUCT__entry(
+ __field( int, node)
+ __field( int, zone_idx)
+ __field( int, order)
+ __field( unsigned long, reclaimable)
+ __field( unsigned long, available)
+ __field( unsigned long, min_wmark)
+ __field( int, no_progress_loops)
+ __field( bool, wmark_check)
+ ),
+
+ TP_fast_assign(
+ __entry->node = zone_to_nid(zoneref->zone);
+ __entry->zone_idx = zoneref->zone_idx;
+ __entry->order = order;
+ __entry->reclaimable = reclaimable;
+ __entry->available = available;
+ __entry->min_wmark = min_wmark;
+ __entry->no_progress_loops = no_progress_loops;
+ __entry->wmark_check = wmark_check;
+ ),
+
+ TP_printk("node=%d zone=%-8s order=%d reclaimable=%lu available=%lu min_wmark=%lu no_progress_loops=%d wmark_check=%d",
+ __entry->node, __print_symbolic(__entry->zone_idx, ZONE_TYPE),
+ __entry->order,
+ __entry->reclaimable, __entry->available, __entry->min_wmark,
+ __entry->no_progress_loops,
+ __entry->wmark_check)
+);
#endif
/* This part must be outside protection */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1c24112308d6..7d11eccd78d1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -55,6 +55,7 @@
#include <linux/kmemleak.h>
#include <linux/compaction.h>
#include <trace/events/kmem.h>
+#include <trace/events/oom.h>
#include <linux/prefetch.h>
#include <linux/mm_inline.h>
#include <linux/migrate.h>
@@ -3479,6 +3480,8 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
ac->nodemask) {
unsigned long available;
unsigned long reclaimable;
+ unsigned long min_wmark = min_wmark_pages(zone);
+ bool wmark;
available = reclaimable = zone_reclaimable_pages(zone);
available -= DIV_ROUND_UP((*no_progress_loops) * available,
@@ -3489,8 +3492,11 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
* Would the allocation succeed if we reclaimed the whole
* available?
*/
- if (__zone_watermark_ok(zone, order, min_wmark_pages(zone),
- ac_classzone_idx(ac), alloc_flags, available)) {
+ wmark = __zone_watermark_ok(zone, order, min_wmark,
+ ac_classzone_idx(ac), alloc_flags, available);
+ trace_reclaim_retry_zone(z, order, reclaimable,
+ available, min_wmark, *no_progress_loops, wmark);
+ if (wmark) {
/*
* If we didn't make any progress and have a lot of
* dirty + writeback pages then we should wait for
--
2.10.2
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH 2/3] oom, trace: Add oom detection tracepoints
@ 2016-12-20 13:01 ` Michal Hocko
0 siblings, 0 replies; 23+ messages in thread
From: Michal Hocko @ 2016-12-20 13:01 UTC (permalink / raw)
To: Andrew Morton
Cc: Vlastimil Babka, David Rientjes, Johannes Weiner, linux-mm, LKML,
Michal Hocko
From: Michal Hocko <mhocko@suse.com>
should_reclaim_retry is the central decision point for declaring the
OOM. It might be really useful to expose data used for this decision
making when debugging an unexpected oom situations.
Say we have an OOM report:
[ 52.264001] mem_eater invoked oom-killer: gfp_mask=0x24280ca(GFP_HIGHUSER_MOVABLE|__GFP_ZERO), nodemask=0, order=0, oom_score_adj=0
[ 52.267549] CPU: 3 PID: 3148 Comm: mem_eater Tainted: G W 4.8.0-oomtrace3-00006-gb21338b386d2 #1024
Now we can check the tracepoint data to see how we have ended up in this
situation:
mem_eater-3148 [003] .... 52.432801: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11134 min_wmark=11084 no_progress_loops=1 wmark_check=1
mem_eater-3148 [003] .... 52.433269: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11103 min_wmark=11084 no_progress_loops=1 wmark_check=1
mem_eater-3148 [003] .... 52.433712: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11100 min_wmark=11084 no_progress_loops=2 wmark_check=1
mem_eater-3148 [003] .... 52.434067: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11097 min_wmark=11084 no_progress_loops=3 wmark_check=1
mem_eater-3148 [003] .... 52.434414: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11094 min_wmark=11084 no_progress_loops=4 wmark_check=1
mem_eater-3148 [003] .... 52.434761: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11091 min_wmark=11084 no_progress_loops=5 wmark_check=1
mem_eater-3148 [003] .... 52.435108: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11087 min_wmark=11084 no_progress_loops=6 wmark_check=1
mem_eater-3148 [003] .... 52.435478: reclaim_retry_zone: node=0 zone=DMA32 order=0 reclaimable=51 available=11084 min_wmark=11084 no_progress_loops=7 wmark_check=0
mem_eater-3148 [003] .... 52.435478: reclaim_retry_zone: node=0 zone=DMA order=0 reclaimable=0 available=1126 min_wmark=179 no_progress_loops=7 wmark_check=0
>From the above we can quickly deduce that the reclaim stopped making
any progress (see no_progress_loops increased in each round) and while
there were still some 51 reclaimable pages they couldn't be dropped
for some reason (vmscan trace points would tell us more about that
part). available will represent reclaimable + free_pages scaled down per
no_progress_loops factor. This is essentially an optimistic estimate of
how much memory we would have when reclaiming everything. This can be
compared to min_wmark to get a rought idea but the wmark_check tells the
result of the watermark check which is more precise (includes lowmem
reserves, considers the order etc.). As we can see no zone is eligible
in the end and that is why we have triggered the oom in this situation.
Please note that higher order requests might fail on the wmark_check even
when there is much more memory available than min_wmark - e.g. when the
memory is fragmented. A follow up tracepoint will help to debug those
situations.
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
include/trace/events/oom.h | 42 ++++++++++++++++++++++++++++++++++++++++++
mm/page_alloc.c | 10 ++++++++--
2 files changed, 50 insertions(+), 2 deletions(-)
diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
index 1e974983757e..9160da7a26a0 100644
--- a/include/trace/events/oom.h
+++ b/include/trace/events/oom.h
@@ -4,6 +4,7 @@
#if !defined(_TRACE_OOM_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_OOM_H
#include <linux/tracepoint.h>
+#include <trace/events/mmflags.h>
TRACE_EVENT(oom_score_adj_update,
@@ -27,6 +28,47 @@ TRACE_EVENT(oom_score_adj_update,
__entry->pid, __entry->comm, __entry->oom_score_adj)
);
+TRACE_EVENT(reclaim_retry_zone,
+
+ TP_PROTO(struct zoneref *zoneref,
+ int order,
+ unsigned long reclaimable,
+ unsigned long available,
+ unsigned long min_wmark,
+ int no_progress_loops,
+ bool wmark_check),
+
+ TP_ARGS(zoneref, order, reclaimable, available, min_wmark, no_progress_loops, wmark_check),
+
+ TP_STRUCT__entry(
+ __field( int, node)
+ __field( int, zone_idx)
+ __field( int, order)
+ __field( unsigned long, reclaimable)
+ __field( unsigned long, available)
+ __field( unsigned long, min_wmark)
+ __field( int, no_progress_loops)
+ __field( bool, wmark_check)
+ ),
+
+ TP_fast_assign(
+ __entry->node = zone_to_nid(zoneref->zone);
+ __entry->zone_idx = zoneref->zone_idx;
+ __entry->order = order;
+ __entry->reclaimable = reclaimable;
+ __entry->available = available;
+ __entry->min_wmark = min_wmark;
+ __entry->no_progress_loops = no_progress_loops;
+ __entry->wmark_check = wmark_check;
+ ),
+
+ TP_printk("node=%d zone=%-8s order=%d reclaimable=%lu available=%lu min_wmark=%lu no_progress_loops=%d wmark_check=%d",
+ __entry->node, __print_symbolic(__entry->zone_idx, ZONE_TYPE),
+ __entry->order,
+ __entry->reclaimable, __entry->available, __entry->min_wmark,
+ __entry->no_progress_loops,
+ __entry->wmark_check)
+);
#endif
/* This part must be outside protection */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1c24112308d6..7d11eccd78d1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -55,6 +55,7 @@
#include <linux/kmemleak.h>
#include <linux/compaction.h>
#include <trace/events/kmem.h>
+#include <trace/events/oom.h>
#include <linux/prefetch.h>
#include <linux/mm_inline.h>
#include <linux/migrate.h>
@@ -3479,6 +3480,8 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
ac->nodemask) {
unsigned long available;
unsigned long reclaimable;
+ unsigned long min_wmark = min_wmark_pages(zone);
+ bool wmark;
available = reclaimable = zone_reclaimable_pages(zone);
available -= DIV_ROUND_UP((*no_progress_loops) * available,
@@ -3489,8 +3492,11 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
* Would the allocation succeed if we reclaimed the whole
* available?
*/
- if (__zone_watermark_ok(zone, order, min_wmark_pages(zone),
- ac_classzone_idx(ac), alloc_flags, available)) {
+ wmark = __zone_watermark_ok(zone, order, min_wmark,
+ ac_classzone_idx(ac), alloc_flags, available);
+ trace_reclaim_retry_zone(z, order, reclaimable,
+ available, min_wmark, *no_progress_loops, wmark);
+ if (wmark) {
/*
* If we didn't make any progress and have a lot of
* dirty + writeback pages then we should wait for
--
2.10.2
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 23+ messages in thread