All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Add slowpath enter/exit trace events
@ 2017-11-23 10:43 ` peter.enderborg
  0 siblings, 0 replies; 26+ messages in thread
From: peter.enderborg @ 2017-11-23 10:43 UTC (permalink / raw)
  To: Michal Hocko, linux-kernel, linux-mm
  Cc: Steven Rostedt, Ingo Molnar, Alex Deucher, David S . Miller,
	Harry Wentland, Greg Kroah-Hartman, Tony Cheng, Peter Enderborg,
	Andrew Morton, Vlastimil Babka, Mel Gorman, Johannes Weiner,
	Pavel Tatashin

From: Peter Enderborg <peter.enderborg@sony.com>

The warning of slow allocation has been removed, this is
a other way to fetch that information. But you need
to enable the trace. The exit function also returns
information about the number of retries, how long
it was stalled and failure reason if that happened.

Signed-off-by: Peter Enderborg <peter.enderborg@sony.com>
---
 include/trace/events/kmem.h | 68 +++++++++++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c             | 62 +++++++++++++++++++++++++++++++----------
 2 files changed, 116 insertions(+), 14 deletions(-)

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index eb57e30..bb882ca 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -315,6 +315,74 @@ TRACE_EVENT(mm_page_alloc_extfrag,
 		__entry->change_ownership)
 );
 
+TRACE_EVENT(mm_page_alloc_slowpath_enter,
+
+	TP_PROTO(int alloc_order,
+		nodemask_t *nodemask,
+		gfp_t gfp_flags),
+
+	TP_ARGS(alloc_order, nodemask, gfp_flags),
+
+	TP_STRUCT__entry(
+		__field(int, alloc_order)
+		__field(nodemask_t *, nodemask)
+		__field(gfp_t, gfp_flags)
+	 ),
+
+	 TP_fast_assign(
+		__entry->alloc_order		= alloc_order;
+		__entry->nodemask		= nodemask;
+		__entry->gfp_flags		= gfp_flags;
+	 ),
+
+	 TP_printk("alloc_order=%d nodemask=%*pbl gfp_flags=%s",
+		__entry->alloc_order,
+		nodemask_pr_args(__entry->nodemask),
+		show_gfp_flags(__entry->gfp_flags))
+);
+
+TRACE_EVENT(mm_page_alloc_slowpath_exit,
+
+	TP_PROTO(struct page *page,
+		int alloc_order,
+		nodemask_t *nodemask,
+		u64 alloc_start,
+		gfp_t gfp_flags,
+		int retrys,
+		int exit),
+
+	TP_ARGS(page, alloc_order, nodemask, alloc_start, gfp_flags,
+		retrys, exit),
+
+	TP_STRUCT__entry(__field(struct page *, page)
+		__field(int, alloc_order)
+		__field(nodemask_t *, nodemask)
+		__field(u64, msdelay)
+		__field(gfp_t, gfp_flags)
+		__field(int, retrys)
+		__field(int, exit)
+	),
+
+	TP_fast_assign(
+		__entry->page	     = page;
+		__entry->alloc_order = alloc_order;
+		__entry->nodemask    = nodemask;
+		__entry->msdelay     = jiffies_to_msecs(jiffies-alloc_start);
+		__entry->gfp_flags   = gfp_flags;
+		__entry->retrys	     = retrys;
+		__entry->exit	     = exit;
+	),
+
+	TP_printk("page=%p alloc_order=%d nodemask=%*pbl msdelay=%llu gfp_flags=%s retrys=%d exit=%d",
+		__entry->page,
+		__entry->alloc_order,
+		nodemask_pr_args(__entry->nodemask),
+		__entry->msdelay,
+		show_gfp_flags(__entry->gfp_flags),
+		__entry->retrys,
+		__entry->exit)
+);
+
 #endif /* _TRACE_KMEM_H */
 
 /* This part must be outside protection */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 48b5b01..bae9cb9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -104,6 +104,17 @@ DEFINE_PER_CPU(struct work_struct, pcpu_drain);
 volatile unsigned long latent_entropy __latent_entropy;
 EXPORT_SYMBOL(latent_entropy);
 #endif
+enum slowpath_exit {
+	SLOWPATH_NOZONE = -16,
+	SLOWPATH_COMPACT_DEFERRED,
+	SLOWPATH_CAN_NOT_DIRECT_RECLAIM,
+	SLOWPATH_RECURSION,
+	SLOWPATH_NO_RETRY,
+	SLOWPATH_COSTLY_ORDER,
+	SLOWPATH_OOM_VICTIM,
+	SLOWPATH_NO_DIRECT_RECLAIM,
+	SLOWPATH_ORDER
+};
 
 /*
  * Array of node states.
@@ -3908,8 +3919,15 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	enum compact_result compact_result;
 	int compaction_retries;
 	int no_progress_loops;
+	unsigned long alloc_start = jiffies;
 	unsigned int cpuset_mems_cookie;
 	int reserve_flags;
+	enum slowpath_exit slowpath_exit;
+	int retry_count = 0;
+
+	trace_mm_page_alloc_slowpath_enter(order,
+		ac->nodemask,
+		gfp_mask);
 
 	/*
 	 * In the slowpath, we sanity check order to avoid ever trying to
@@ -3919,7 +3937,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	 */
 	if (order >= MAX_ORDER) {
 		WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN));
-		return NULL;
+		slowpath_exit = SLOWPATH_ORDER;
+		goto fail;
 	}
 
 	/*
@@ -3951,8 +3970,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	 */
 	ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
 					ac->high_zoneidx, ac->nodemask);
-	if (!ac->preferred_zoneref->zone)
+	if (!ac->preferred_zoneref->zone) {
+		slowpath_exit = SLOWPATH_NOZONE;
 		goto nopage;
+	}
 
 	if (gfp_mask & __GFP_KSWAPD_RECLAIM)
 		wake_all_kswapds(order, ac);
@@ -3998,8 +4019,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 			 * system, so we fail the allocation instead of entering
 			 * direct reclaim.
 			 */
-			if (compact_result == COMPACT_DEFERRED)
+			if (compact_result == COMPACT_DEFERRED) {
+				slowpath_exit = SLOWPATH_COMPACT_DEFERRED;
 				goto nopage;
+			}
 
 			/*
 			 * Looks like reclaim/compaction is worth trying, but
@@ -4011,6 +4034,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	}
 
 retry:
+	retry_count++;
 	/* Ensure kswapd doesn't accidentally go to sleep as long as we loop */
 	if (gfp_mask & __GFP_KSWAPD_RECLAIM)
 		wake_all_kswapds(order, ac);
@@ -4036,13 +4060,16 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 		goto got_pg;
 
 	/* Caller is not willing to reclaim, we can't balance anything */
-	if (!can_direct_reclaim)
+	if (!can_direct_reclaim) {
+		slowpath_exit = SLOWPATH_CAN_NOT_DIRECT_RECLAIM;
 		goto nopage;
+	}
 
 	/* Avoid recursion of direct reclaim */
-	if (current->flags & PF_MEMALLOC)
+	if (current->flags & PF_MEMALLOC) {
+		slowpath_exit = SLOWPATH_RECURSION;
 		goto nopage;
-
+	}
 	/* Try direct reclaim and then allocating */
 	page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac,
 							&did_some_progress);
@@ -4056,16 +4083,18 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 		goto got_pg;
 
 	/* Do not loop if specifically requested */
-	if (gfp_mask & __GFP_NORETRY)
+	if (gfp_mask & __GFP_NORETRY) {
+		slowpath_exit = SLOWPATH_NO_RETRY;
 		goto nopage;
-
+	}
 	/*
 	 * Do not retry costly high order allocations unless they are
 	 * __GFP_RETRY_MAYFAIL
 	 */
-	if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL))
+	if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL)) {
+		slowpath_exit = SLOWPATH_COSTLY_ORDER;
 		goto nopage;
-
+	}
 	if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
 				 did_some_progress > 0, &no_progress_loops))
 		goto retry;
@@ -4095,9 +4124,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	/* Avoid allocations with no watermarks from looping endlessly */
 	if (tsk_is_oom_victim(current) &&
 	    (alloc_flags == ALLOC_OOM ||
-	     (gfp_mask & __GFP_NOMEMALLOC)))
+	     (gfp_mask & __GFP_NOMEMALLOC))) {
+		slowpath_exit = SLOWPATH_OOM_VICTIM;
 		goto nopage;
-
+	}
 	/* Retry as long as the OOM killer is making progress */
 	if (did_some_progress) {
 		no_progress_loops = 0;
@@ -4118,9 +4148,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 		 * All existing users of the __GFP_NOFAIL are blockable, so warn
 		 * of any new users that actually require GFP_NOWAIT
 		 */
-		if (WARN_ON_ONCE(!can_direct_reclaim))
+		if (WARN_ON_ONCE(!can_direct_reclaim)) {
+			slowpath_exit = SLOWPATH_NO_DIRECT_RECLAIM;
 			goto fail;
-
+		}
 		/*
 		 * PF_MEMALLOC request from this context is rather bizarre
 		 * because we cannot reclaim anything and only can loop waiting
@@ -4153,6 +4184,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	warn_alloc(gfp_mask, ac->nodemask,
 			"page allocation failure: order:%u", order);
 got_pg:
+	trace_mm_page_alloc_slowpath_exit(page, order, ac->nodemask,
+		alloc_start, gfp_mask, retry_count, slowpath_exit);
+
 	return page;
 }
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH] Add slowpath enter/exit trace events
@ 2017-11-23 10:43 ` peter.enderborg
  0 siblings, 0 replies; 26+ messages in thread
From: peter.enderborg @ 2017-11-23 10:43 UTC (permalink / raw)
  To: Michal Hocko, linux-kernel, linux-mm
  Cc: Steven Rostedt, Ingo Molnar, Alex Deucher, David S . Miller,
	Harry Wentland, Greg Kroah-Hartman, Tony Cheng, Peter Enderborg,
	Andrew Morton, Vlastimil Babka, Mel Gorman, Johannes Weiner,
	Pavel Tatashin

From: Peter Enderborg <peter.enderborg@sony.com>

The warning of slow allocation has been removed, this is
a other way to fetch that information. But you need
to enable the trace. The exit function also returns
information about the number of retries, how long
it was stalled and failure reason if that happened.

Signed-off-by: Peter Enderborg <peter.enderborg@sony.com>
---
 include/trace/events/kmem.h | 68 +++++++++++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c             | 62 +++++++++++++++++++++++++++++++----------
 2 files changed, 116 insertions(+), 14 deletions(-)

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index eb57e30..bb882ca 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -315,6 +315,74 @@ TRACE_EVENT(mm_page_alloc_extfrag,
 		__entry->change_ownership)
 );
 
+TRACE_EVENT(mm_page_alloc_slowpath_enter,
+
+	TP_PROTO(int alloc_order,
+		nodemask_t *nodemask,
+		gfp_t gfp_flags),
+
+	TP_ARGS(alloc_order, nodemask, gfp_flags),
+
+	TP_STRUCT__entry(
+		__field(int, alloc_order)
+		__field(nodemask_t *, nodemask)
+		__field(gfp_t, gfp_flags)
+	 ),
+
+	 TP_fast_assign(
+		__entry->alloc_order		= alloc_order;
+		__entry->nodemask		= nodemask;
+		__entry->gfp_flags		= gfp_flags;
+	 ),
+
+	 TP_printk("alloc_order=%d nodemask=%*pbl gfp_flags=%s",
+		__entry->alloc_order,
+		nodemask_pr_args(__entry->nodemask),
+		show_gfp_flags(__entry->gfp_flags))
+);
+
+TRACE_EVENT(mm_page_alloc_slowpath_exit,
+
+	TP_PROTO(struct page *page,
+		int alloc_order,
+		nodemask_t *nodemask,
+		u64 alloc_start,
+		gfp_t gfp_flags,
+		int retrys,
+		int exit),
+
+	TP_ARGS(page, alloc_order, nodemask, alloc_start, gfp_flags,
+		retrys, exit),
+
+	TP_STRUCT__entry(__field(struct page *, page)
+		__field(int, alloc_order)
+		__field(nodemask_t *, nodemask)
+		__field(u64, msdelay)
+		__field(gfp_t, gfp_flags)
+		__field(int, retrys)
+		__field(int, exit)
+	),
+
+	TP_fast_assign(
+		__entry->page	     = page;
+		__entry->alloc_order = alloc_order;
+		__entry->nodemask    = nodemask;
+		__entry->msdelay     = jiffies_to_msecs(jiffies-alloc_start);
+		__entry->gfp_flags   = gfp_flags;
+		__entry->retrys	     = retrys;
+		__entry->exit	     = exit;
+	),
+
+	TP_printk("page=%p alloc_order=%d nodemask=%*pbl msdelay=%llu gfp_flags=%s retrys=%d exit=%d",
+		__entry->page,
+		__entry->alloc_order,
+		nodemask_pr_args(__entry->nodemask),
+		__entry->msdelay,
+		show_gfp_flags(__entry->gfp_flags),
+		__entry->retrys,
+		__entry->exit)
+);
+
 #endif /* _TRACE_KMEM_H */
 
 /* This part must be outside protection */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 48b5b01..bae9cb9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -104,6 +104,17 @@ DEFINE_PER_CPU(struct work_struct, pcpu_drain);
 volatile unsigned long latent_entropy __latent_entropy;
 EXPORT_SYMBOL(latent_entropy);
 #endif
+enum slowpath_exit {
+	SLOWPATH_NOZONE = -16,
+	SLOWPATH_COMPACT_DEFERRED,
+	SLOWPATH_CAN_NOT_DIRECT_RECLAIM,
+	SLOWPATH_RECURSION,
+	SLOWPATH_NO_RETRY,
+	SLOWPATH_COSTLY_ORDER,
+	SLOWPATH_OOM_VICTIM,
+	SLOWPATH_NO_DIRECT_RECLAIM,
+	SLOWPATH_ORDER
+};
 
 /*
  * Array of node states.
@@ -3908,8 +3919,15 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	enum compact_result compact_result;
 	int compaction_retries;
 	int no_progress_loops;
+	unsigned long alloc_start = jiffies;
 	unsigned int cpuset_mems_cookie;
 	int reserve_flags;
+	enum slowpath_exit slowpath_exit;
+	int retry_count = 0;
+
+	trace_mm_page_alloc_slowpath_enter(order,
+		ac->nodemask,
+		gfp_mask);
 
 	/*
 	 * In the slowpath, we sanity check order to avoid ever trying to
@@ -3919,7 +3937,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	 */
 	if (order >= MAX_ORDER) {
 		WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN));
-		return NULL;
+		slowpath_exit = SLOWPATH_ORDER;
+		goto fail;
 	}
 
 	/*
@@ -3951,8 +3970,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	 */
 	ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
 					ac->high_zoneidx, ac->nodemask);
-	if (!ac->preferred_zoneref->zone)
+	if (!ac->preferred_zoneref->zone) {
+		slowpath_exit = SLOWPATH_NOZONE;
 		goto nopage;
+	}
 
 	if (gfp_mask & __GFP_KSWAPD_RECLAIM)
 		wake_all_kswapds(order, ac);
@@ -3998,8 +4019,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 			 * system, so we fail the allocation instead of entering
 			 * direct reclaim.
 			 */
-			if (compact_result == COMPACT_DEFERRED)
+			if (compact_result == COMPACT_DEFERRED) {
+				slowpath_exit = SLOWPATH_COMPACT_DEFERRED;
 				goto nopage;
+			}
 
 			/*
 			 * Looks like reclaim/compaction is worth trying, but
@@ -4011,6 +4034,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	}
 
 retry:
+	retry_count++;
 	/* Ensure kswapd doesn't accidentally go to sleep as long as we loop */
 	if (gfp_mask & __GFP_KSWAPD_RECLAIM)
 		wake_all_kswapds(order, ac);
@@ -4036,13 +4060,16 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 		goto got_pg;
 
 	/* Caller is not willing to reclaim, we can't balance anything */
-	if (!can_direct_reclaim)
+	if (!can_direct_reclaim) {
+		slowpath_exit = SLOWPATH_CAN_NOT_DIRECT_RECLAIM;
 		goto nopage;
+	}
 
 	/* Avoid recursion of direct reclaim */
-	if (current->flags & PF_MEMALLOC)
+	if (current->flags & PF_MEMALLOC) {
+		slowpath_exit = SLOWPATH_RECURSION;
 		goto nopage;
-
+	}
 	/* Try direct reclaim and then allocating */
 	page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac,
 							&did_some_progress);
@@ -4056,16 +4083,18 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 		goto got_pg;
 
 	/* Do not loop if specifically requested */
-	if (gfp_mask & __GFP_NORETRY)
+	if (gfp_mask & __GFP_NORETRY) {
+		slowpath_exit = SLOWPATH_NO_RETRY;
 		goto nopage;
-
+	}
 	/*
 	 * Do not retry costly high order allocations unless they are
 	 * __GFP_RETRY_MAYFAIL
 	 */
-	if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL))
+	if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL)) {
+		slowpath_exit = SLOWPATH_COSTLY_ORDER;
 		goto nopage;
-
+	}
 	if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
 				 did_some_progress > 0, &no_progress_loops))
 		goto retry;
@@ -4095,9 +4124,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	/* Avoid allocations with no watermarks from looping endlessly */
 	if (tsk_is_oom_victim(current) &&
 	    (alloc_flags == ALLOC_OOM ||
-	     (gfp_mask & __GFP_NOMEMALLOC)))
+	     (gfp_mask & __GFP_NOMEMALLOC))) {
+		slowpath_exit = SLOWPATH_OOM_VICTIM;
 		goto nopage;
-
+	}
 	/* Retry as long as the OOM killer is making progress */
 	if (did_some_progress) {
 		no_progress_loops = 0;
@@ -4118,9 +4148,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 		 * All existing users of the __GFP_NOFAIL are blockable, so warn
 		 * of any new users that actually require GFP_NOWAIT
 		 */
-		if (WARN_ON_ONCE(!can_direct_reclaim))
+		if (WARN_ON_ONCE(!can_direct_reclaim)) {
+			slowpath_exit = SLOWPATH_NO_DIRECT_RECLAIM;
 			goto fail;
-
+		}
 		/*
 		 * PF_MEMALLOC request from this context is rather bizarre
 		 * because we cannot reclaim anything and only can loop waiting
@@ -4153,6 +4184,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	warn_alloc(gfp_mask, ac->nodemask,
 			"page allocation failure: order:%u", order);
 got_pg:
+	trace_mm_page_alloc_slowpath_exit(page, order, ac->nodemask,
+		alloc_start, gfp_mask, retry_count, slowpath_exit);
+
 	return page;
 }
 
-- 
2.7.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH] Add slowpath enter/exit trace events
  2017-11-23 10:43 ` peter.enderborg
@ 2017-11-23 12:25   ` Michal Hocko
  -1 siblings, 0 replies; 26+ messages in thread
From: Michal Hocko @ 2017-11-23 12:25 UTC (permalink / raw)
  To: peter.enderborg
  Cc: linux-kernel, linux-mm, Steven Rostedt, Ingo Molnar,
	Alex Deucher, David S . Miller, Harry Wentland,
	Greg Kroah-Hartman, Tony Cheng, Andrew Morton, Vlastimil Babka,
	Mel Gorman, Johannes Weiner, Pavel Tatashin

On Thu 23-11-17 11:43:36, peter.enderborg@sony.com wrote:
> From: Peter Enderborg <peter.enderborg@sony.com>
> 
> The warning of slow allocation has been removed, this is
> a other way to fetch that information. But you need
> to enable the trace. The exit function also returns
> information about the number of retries, how long
> it was stalled and failure reason if that happened.

I think this is just too excessive. We already have a tracepoint for the
allocation exit. All we need is an entry to have a base to compare with.
Another usecase would be to measure allocation latency. Information you
are adding can be (partially) covered by existing tracepoints.

> Signed-off-by: Peter Enderborg <peter.enderborg@sony.com>
> ---
>  include/trace/events/kmem.h | 68 +++++++++++++++++++++++++++++++++++++++++++++
>  mm/page_alloc.c             | 62 +++++++++++++++++++++++++++++++----------
>  2 files changed, 116 insertions(+), 14 deletions(-)
> 
> diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
> index eb57e30..bb882ca 100644
> --- a/include/trace/events/kmem.h
> +++ b/include/trace/events/kmem.h
> @@ -315,6 +315,74 @@ TRACE_EVENT(mm_page_alloc_extfrag,
>  		__entry->change_ownership)
>  );
>  
> +TRACE_EVENT(mm_page_alloc_slowpath_enter,
> +
> +	TP_PROTO(int alloc_order,
> +		nodemask_t *nodemask,
> +		gfp_t gfp_flags),
> +
> +	TP_ARGS(alloc_order, nodemask, gfp_flags),
> +
> +	TP_STRUCT__entry(
> +		__field(int, alloc_order)
> +		__field(nodemask_t *, nodemask)
> +		__field(gfp_t, gfp_flags)
> +	 ),
> +
> +	 TP_fast_assign(
> +		__entry->alloc_order		= alloc_order;
> +		__entry->nodemask		= nodemask;
> +		__entry->gfp_flags		= gfp_flags;
> +	 ),
> +
> +	 TP_printk("alloc_order=%d nodemask=%*pbl gfp_flags=%s",
> +		__entry->alloc_order,
> +		nodemask_pr_args(__entry->nodemask),
> +		show_gfp_flags(__entry->gfp_flags))
> +);
> +
> +TRACE_EVENT(mm_page_alloc_slowpath_exit,
> +
> +	TP_PROTO(struct page *page,
> +		int alloc_order,
> +		nodemask_t *nodemask,
> +		u64 alloc_start,
> +		gfp_t gfp_flags,
> +		int retrys,
> +		int exit),
> +
> +	TP_ARGS(page, alloc_order, nodemask, alloc_start, gfp_flags,
> +		retrys, exit),
> +
> +	TP_STRUCT__entry(__field(struct page *, page)
> +		__field(int, alloc_order)
> +		__field(nodemask_t *, nodemask)
> +		__field(u64, msdelay)
> +		__field(gfp_t, gfp_flags)
> +		__field(int, retrys)
> +		__field(int, exit)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->page	     = page;
> +		__entry->alloc_order = alloc_order;
> +		__entry->nodemask    = nodemask;
> +		__entry->msdelay     = jiffies_to_msecs(jiffies-alloc_start);
> +		__entry->gfp_flags   = gfp_flags;
> +		__entry->retrys	     = retrys;
> +		__entry->exit	     = exit;
> +	),
> +
> +	TP_printk("page=%p alloc_order=%d nodemask=%*pbl msdelay=%llu gfp_flags=%s retrys=%d exit=%d",
> +		__entry->page,
> +		__entry->alloc_order,
> +		nodemask_pr_args(__entry->nodemask),
> +		__entry->msdelay,
> +		show_gfp_flags(__entry->gfp_flags),
> +		__entry->retrys,
> +		__entry->exit)
> +);
> +
>  #endif /* _TRACE_KMEM_H */
>  
>  /* This part must be outside protection */
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 48b5b01..bae9cb9 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -104,6 +104,17 @@ DEFINE_PER_CPU(struct work_struct, pcpu_drain);
>  volatile unsigned long latent_entropy __latent_entropy;
>  EXPORT_SYMBOL(latent_entropy);
>  #endif
> +enum slowpath_exit {
> +	SLOWPATH_NOZONE = -16,
> +	SLOWPATH_COMPACT_DEFERRED,
> +	SLOWPATH_CAN_NOT_DIRECT_RECLAIM,
> +	SLOWPATH_RECURSION,
> +	SLOWPATH_NO_RETRY,
> +	SLOWPATH_COSTLY_ORDER,
> +	SLOWPATH_OOM_VICTIM,
> +	SLOWPATH_NO_DIRECT_RECLAIM,
> +	SLOWPATH_ORDER
> +};
>  
>  /*
>   * Array of node states.
> @@ -3908,8 +3919,15 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>  	enum compact_result compact_result;
>  	int compaction_retries;
>  	int no_progress_loops;
> +	unsigned long alloc_start = jiffies;
>  	unsigned int cpuset_mems_cookie;
>  	int reserve_flags;
> +	enum slowpath_exit slowpath_exit;
> +	int retry_count = 0;
> +
> +	trace_mm_page_alloc_slowpath_enter(order,
> +		ac->nodemask,
> +		gfp_mask);
>  
>  	/*
>  	 * In the slowpath, we sanity check order to avoid ever trying to
> @@ -3919,7 +3937,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>  	 */
>  	if (order >= MAX_ORDER) {
>  		WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN));
> -		return NULL;
> +		slowpath_exit = SLOWPATH_ORDER;
> +		goto fail;
>  	}
>  
>  	/*
> @@ -3951,8 +3970,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>  	 */
>  	ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
>  					ac->high_zoneidx, ac->nodemask);
> -	if (!ac->preferred_zoneref->zone)
> +	if (!ac->preferred_zoneref->zone) {
> +		slowpath_exit = SLOWPATH_NOZONE;
>  		goto nopage;
> +	}
>  
>  	if (gfp_mask & __GFP_KSWAPD_RECLAIM)
>  		wake_all_kswapds(order, ac);
> @@ -3998,8 +4019,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>  			 * system, so we fail the allocation instead of entering
>  			 * direct reclaim.
>  			 */
> -			if (compact_result == COMPACT_DEFERRED)
> +			if (compact_result == COMPACT_DEFERRED) {
> +				slowpath_exit = SLOWPATH_COMPACT_DEFERRED;
>  				goto nopage;
> +			}
>  
>  			/*
>  			 * Looks like reclaim/compaction is worth trying, but
> @@ -4011,6 +4034,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>  	}
>  
>  retry:
> +	retry_count++;
>  	/* Ensure kswapd doesn't accidentally go to sleep as long as we loop */
>  	if (gfp_mask & __GFP_KSWAPD_RECLAIM)
>  		wake_all_kswapds(order, ac);
> @@ -4036,13 +4060,16 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>  		goto got_pg;
>  
>  	/* Caller is not willing to reclaim, we can't balance anything */
> -	if (!can_direct_reclaim)
> +	if (!can_direct_reclaim) {
> +		slowpath_exit = SLOWPATH_CAN_NOT_DIRECT_RECLAIM;
>  		goto nopage;
> +	}
>  
>  	/* Avoid recursion of direct reclaim */
> -	if (current->flags & PF_MEMALLOC)
> +	if (current->flags & PF_MEMALLOC) {
> +		slowpath_exit = SLOWPATH_RECURSION;
>  		goto nopage;
> -
> +	}
>  	/* Try direct reclaim and then allocating */
>  	page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac,
>  							&did_some_progress);
> @@ -4056,16 +4083,18 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>  		goto got_pg;
>  
>  	/* Do not loop if specifically requested */
> -	if (gfp_mask & __GFP_NORETRY)
> +	if (gfp_mask & __GFP_NORETRY) {
> +		slowpath_exit = SLOWPATH_NO_RETRY;
>  		goto nopage;
> -
> +	}
>  	/*
>  	 * Do not retry costly high order allocations unless they are
>  	 * __GFP_RETRY_MAYFAIL
>  	 */
> -	if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL))
> +	if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL)) {
> +		slowpath_exit = SLOWPATH_COSTLY_ORDER;
>  		goto nopage;
> -
> +	}
>  	if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
>  				 did_some_progress > 0, &no_progress_loops))
>  		goto retry;
> @@ -4095,9 +4124,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>  	/* Avoid allocations with no watermarks from looping endlessly */
>  	if (tsk_is_oom_victim(current) &&
>  	    (alloc_flags == ALLOC_OOM ||
> -	     (gfp_mask & __GFP_NOMEMALLOC)))
> +	     (gfp_mask & __GFP_NOMEMALLOC))) {
> +		slowpath_exit = SLOWPATH_OOM_VICTIM;
>  		goto nopage;
> -
> +	}
>  	/* Retry as long as the OOM killer is making progress */
>  	if (did_some_progress) {
>  		no_progress_loops = 0;
> @@ -4118,9 +4148,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>  		 * All existing users of the __GFP_NOFAIL are blockable, so warn
>  		 * of any new users that actually require GFP_NOWAIT
>  		 */
> -		if (WARN_ON_ONCE(!can_direct_reclaim))
> +		if (WARN_ON_ONCE(!can_direct_reclaim)) {
> +			slowpath_exit = SLOWPATH_NO_DIRECT_RECLAIM;
>  			goto fail;
> -
> +		}
>  		/*
>  		 * PF_MEMALLOC request from this context is rather bizarre
>  		 * because we cannot reclaim anything and only can loop waiting
> @@ -4153,6 +4184,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>  	warn_alloc(gfp_mask, ac->nodemask,
>  			"page allocation failure: order:%u", order);
>  got_pg:
> +	trace_mm_page_alloc_slowpath_exit(page, order, ac->nodemask,
> +		alloc_start, gfp_mask, retry_count, slowpath_exit);
> +
>  	return page;
>  }
>  
> -- 
> 2.7.4
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Add slowpath enter/exit trace events
@ 2017-11-23 12:25   ` Michal Hocko
  0 siblings, 0 replies; 26+ messages in thread
From: Michal Hocko @ 2017-11-23 12:25 UTC (permalink / raw)
  To: peter.enderborg
  Cc: linux-kernel, linux-mm, Steven Rostedt, Ingo Molnar,
	Alex Deucher, David S . Miller, Harry Wentland,
	Greg Kroah-Hartman, Tony Cheng, Andrew Morton, Vlastimil Babka,
	Mel Gorman, Johannes Weiner, Pavel Tatashin

On Thu 23-11-17 11:43:36, peter.enderborg@sony.com wrote:
> From: Peter Enderborg <peter.enderborg@sony.com>
> 
> The warning of slow allocation has been removed, this is
> a other way to fetch that information. But you need
> to enable the trace. The exit function also returns
> information about the number of retries, how long
> it was stalled and failure reason if that happened.

I think this is just too excessive. We already have a tracepoint for the
allocation exit. All we need is an entry to have a base to compare with.
Another usecase would be to measure allocation latency. Information you
are adding can be (partially) covered by existing tracepoints.

> Signed-off-by: Peter Enderborg <peter.enderborg@sony.com>
> ---
>  include/trace/events/kmem.h | 68 +++++++++++++++++++++++++++++++++++++++++++++
>  mm/page_alloc.c             | 62 +++++++++++++++++++++++++++++++----------
>  2 files changed, 116 insertions(+), 14 deletions(-)
> 
> diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
> index eb57e30..bb882ca 100644
> --- a/include/trace/events/kmem.h
> +++ b/include/trace/events/kmem.h
> @@ -315,6 +315,74 @@ TRACE_EVENT(mm_page_alloc_extfrag,
>  		__entry->change_ownership)
>  );
>  
> +TRACE_EVENT(mm_page_alloc_slowpath_enter,
> +
> +	TP_PROTO(int alloc_order,
> +		nodemask_t *nodemask,
> +		gfp_t gfp_flags),
> +
> +	TP_ARGS(alloc_order, nodemask, gfp_flags),
> +
> +	TP_STRUCT__entry(
> +		__field(int, alloc_order)
> +		__field(nodemask_t *, nodemask)
> +		__field(gfp_t, gfp_flags)
> +	 ),
> +
> +	 TP_fast_assign(
> +		__entry->alloc_order		= alloc_order;
> +		__entry->nodemask		= nodemask;
> +		__entry->gfp_flags		= gfp_flags;
> +	 ),
> +
> +	 TP_printk("alloc_order=%d nodemask=%*pbl gfp_flags=%s",
> +		__entry->alloc_order,
> +		nodemask_pr_args(__entry->nodemask),
> +		show_gfp_flags(__entry->gfp_flags))
> +);
> +
> +TRACE_EVENT(mm_page_alloc_slowpath_exit,
> +
> +	TP_PROTO(struct page *page,
> +		int alloc_order,
> +		nodemask_t *nodemask,
> +		u64 alloc_start,
> +		gfp_t gfp_flags,
> +		int retrys,
> +		int exit),
> +
> +	TP_ARGS(page, alloc_order, nodemask, alloc_start, gfp_flags,
> +		retrys, exit),
> +
> +	TP_STRUCT__entry(__field(struct page *, page)
> +		__field(int, alloc_order)
> +		__field(nodemask_t *, nodemask)
> +		__field(u64, msdelay)
> +		__field(gfp_t, gfp_flags)
> +		__field(int, retrys)
> +		__field(int, exit)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->page	     = page;
> +		__entry->alloc_order = alloc_order;
> +		__entry->nodemask    = nodemask;
> +		__entry->msdelay     = jiffies_to_msecs(jiffies-alloc_start);
> +		__entry->gfp_flags   = gfp_flags;
> +		__entry->retrys	     = retrys;
> +		__entry->exit	     = exit;
> +	),
> +
> +	TP_printk("page=%p alloc_order=%d nodemask=%*pbl msdelay=%llu gfp_flags=%s retrys=%d exit=%d",
> +		__entry->page,
> +		__entry->alloc_order,
> +		nodemask_pr_args(__entry->nodemask),
> +		__entry->msdelay,
> +		show_gfp_flags(__entry->gfp_flags),
> +		__entry->retrys,
> +		__entry->exit)
> +);
> +
>  #endif /* _TRACE_KMEM_H */
>  
>  /* This part must be outside protection */
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 48b5b01..bae9cb9 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -104,6 +104,17 @@ DEFINE_PER_CPU(struct work_struct, pcpu_drain);
>  volatile unsigned long latent_entropy __latent_entropy;
>  EXPORT_SYMBOL(latent_entropy);
>  #endif
> +enum slowpath_exit {
> +	SLOWPATH_NOZONE = -16,
> +	SLOWPATH_COMPACT_DEFERRED,
> +	SLOWPATH_CAN_NOT_DIRECT_RECLAIM,
> +	SLOWPATH_RECURSION,
> +	SLOWPATH_NO_RETRY,
> +	SLOWPATH_COSTLY_ORDER,
> +	SLOWPATH_OOM_VICTIM,
> +	SLOWPATH_NO_DIRECT_RECLAIM,
> +	SLOWPATH_ORDER
> +};
>  
>  /*
>   * Array of node states.
> @@ -3908,8 +3919,15 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>  	enum compact_result compact_result;
>  	int compaction_retries;
>  	int no_progress_loops;
> +	unsigned long alloc_start = jiffies;
>  	unsigned int cpuset_mems_cookie;
>  	int reserve_flags;
> +	enum slowpath_exit slowpath_exit;
> +	int retry_count = 0;
> +
> +	trace_mm_page_alloc_slowpath_enter(order,
> +		ac->nodemask,
> +		gfp_mask);
>  
>  	/*
>  	 * In the slowpath, we sanity check order to avoid ever trying to
> @@ -3919,7 +3937,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>  	 */
>  	if (order >= MAX_ORDER) {
>  		WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN));
> -		return NULL;
> +		slowpath_exit = SLOWPATH_ORDER;
> +		goto fail;
>  	}
>  
>  	/*
> @@ -3951,8 +3970,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>  	 */
>  	ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
>  					ac->high_zoneidx, ac->nodemask);
> -	if (!ac->preferred_zoneref->zone)
> +	if (!ac->preferred_zoneref->zone) {
> +		slowpath_exit = SLOWPATH_NOZONE;
>  		goto nopage;
> +	}
>  
>  	if (gfp_mask & __GFP_KSWAPD_RECLAIM)
>  		wake_all_kswapds(order, ac);
> @@ -3998,8 +4019,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>  			 * system, so we fail the allocation instead of entering
>  			 * direct reclaim.
>  			 */
> -			if (compact_result == COMPACT_DEFERRED)
> +			if (compact_result == COMPACT_DEFERRED) {
> +				slowpath_exit = SLOWPATH_COMPACT_DEFERRED;
>  				goto nopage;
> +			}
>  
>  			/*
>  			 * Looks like reclaim/compaction is worth trying, but
> @@ -4011,6 +4034,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>  	}
>  
>  retry:
> +	retry_count++;
>  	/* Ensure kswapd doesn't accidentally go to sleep as long as we loop */
>  	if (gfp_mask & __GFP_KSWAPD_RECLAIM)
>  		wake_all_kswapds(order, ac);
> @@ -4036,13 +4060,16 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>  		goto got_pg;
>  
>  	/* Caller is not willing to reclaim, we can't balance anything */
> -	if (!can_direct_reclaim)
> +	if (!can_direct_reclaim) {
> +		slowpath_exit = SLOWPATH_CAN_NOT_DIRECT_RECLAIM;
>  		goto nopage;
> +	}
>  
>  	/* Avoid recursion of direct reclaim */
> -	if (current->flags & PF_MEMALLOC)
> +	if (current->flags & PF_MEMALLOC) {
> +		slowpath_exit = SLOWPATH_RECURSION;
>  		goto nopage;
> -
> +	}
>  	/* Try direct reclaim and then allocating */
>  	page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac,
>  							&did_some_progress);
> @@ -4056,16 +4083,18 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>  		goto got_pg;
>  
>  	/* Do not loop if specifically requested */
> -	if (gfp_mask & __GFP_NORETRY)
> +	if (gfp_mask & __GFP_NORETRY) {
> +		slowpath_exit = SLOWPATH_NO_RETRY;
>  		goto nopage;
> -
> +	}
>  	/*
>  	 * Do not retry costly high order allocations unless they are
>  	 * __GFP_RETRY_MAYFAIL
>  	 */
> -	if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL))
> +	if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL)) {
> +		slowpath_exit = SLOWPATH_COSTLY_ORDER;
>  		goto nopage;
> -
> +	}
>  	if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
>  				 did_some_progress > 0, &no_progress_loops))
>  		goto retry;
> @@ -4095,9 +4124,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>  	/* Avoid allocations with no watermarks from looping endlessly */
>  	if (tsk_is_oom_victim(current) &&
>  	    (alloc_flags == ALLOC_OOM ||
> -	     (gfp_mask & __GFP_NOMEMALLOC)))
> +	     (gfp_mask & __GFP_NOMEMALLOC))) {
> +		slowpath_exit = SLOWPATH_OOM_VICTIM;
>  		goto nopage;
> -
> +	}
>  	/* Retry as long as the OOM killer is making progress */
>  	if (did_some_progress) {
>  		no_progress_loops = 0;
> @@ -4118,9 +4148,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>  		 * All existing users of the __GFP_NOFAIL are blockable, so warn
>  		 * of any new users that actually require GFP_NOWAIT
>  		 */
> -		if (WARN_ON_ONCE(!can_direct_reclaim))
> +		if (WARN_ON_ONCE(!can_direct_reclaim)) {
> +			slowpath_exit = SLOWPATH_NO_DIRECT_RECLAIM;
>  			goto fail;
> -
> +		}
>  		/*
>  		 * PF_MEMALLOC request from this context is rather bizarre
>  		 * because we cannot reclaim anything and only can loop waiting
> @@ -4153,6 +4184,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>  	warn_alloc(gfp_mask, ac->nodemask,
>  			"page allocation failure: order:%u", order);
>  got_pg:
> +	trace_mm_page_alloc_slowpath_exit(page, order, ac->nodemask,
> +		alloc_start, gfp_mask, retry_count, slowpath_exit);
> +
>  	return page;
>  }
>  
> -- 
> 2.7.4
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

-- 
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Add slowpath enter/exit trace events
  2017-11-23 12:25   ` Michal Hocko
@ 2017-11-23 12:35     ` peter enderborg
  -1 siblings, 0 replies; 26+ messages in thread
From: peter enderborg @ 2017-11-23 12:35 UTC (permalink / raw)
  To: Michal Hocko
  Cc: linux-kernel, linux-mm, Steven Rostedt, Ingo Molnar,
	Alex Deucher, David S . Miller, Harry Wentland,
	Greg Kroah-Hartman, Tony Cheng, Andrew Morton, Vlastimil Babka,
	Mel Gorman, Johannes Weiner, Pavel Tatashin

Monitoring both enter/exit for all allocations and track down the one that are slow will be a very
big load on mobile devices or embedded device consuming a lot of battery and cpu. With this we can do useful monitoring
on devices on our field tests with real usage.

On 11/23/2017 01:25 PM, Michal Hocko wrote:
> On Thu 23-11-17 11:43:36, peter.enderborg@sony.com wrote:
>> From: Peter Enderborg <peter.enderborg@sony.com>
>>
>> The warning of slow allocation has been removed, this is
>> a other way to fetch that information. But you need
>> to enable the trace. The exit function also returns
>> information about the number of retries, how long
>> it was stalled and failure reason if that happened.
> I think this is just too excessive. We already have a tracepoint for the
> allocation exit. All we need is an entry to have a base to compare with.
> Another usecase would be to measure allocation latency. Information you
> are adding can be (partially) covered by existing tracepoints.
>
>> Signed-off-by: Peter Enderborg <peter.enderborg@sony.com>
>> ---
>>  include/trace/events/kmem.h | 68 +++++++++++++++++++++++++++++++++++++++++++++
>>  mm/page_alloc.c             | 62 +++++++++++++++++++++++++++++++----------
>>  2 files changed, 116 insertions(+), 14 deletions(-)
>>
>> diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
>> index eb57e30..bb882ca 100644
>> --- a/include/trace/events/kmem.h
>> +++ b/include/trace/events/kmem.h
>> @@ -315,6 +315,74 @@ TRACE_EVENT(mm_page_alloc_extfrag,
>>  		__entry->change_ownership)
>>  );
>>  
>> +TRACE_EVENT(mm_page_alloc_slowpath_enter,
>> +
>> +	TP_PROTO(int alloc_order,
>> +		nodemask_t *nodemask,
>> +		gfp_t gfp_flags),
>> +
>> +	TP_ARGS(alloc_order, nodemask, gfp_flags),
>> +
>> +	TP_STRUCT__entry(
>> +		__field(int, alloc_order)
>> +		__field(nodemask_t *, nodemask)
>> +		__field(gfp_t, gfp_flags)
>> +	 ),
>> +
>> +	 TP_fast_assign(
>> +		__entry->alloc_order		= alloc_order;
>> +		__entry->nodemask		= nodemask;
>> +		__entry->gfp_flags		= gfp_flags;
>> +	 ),
>> +
>> +	 TP_printk("alloc_order=%d nodemask=%*pbl gfp_flags=%s",
>> +		__entry->alloc_order,
>> +		nodemask_pr_args(__entry->nodemask),
>> +		show_gfp_flags(__entry->gfp_flags))
>> +);
>> +
>> +TRACE_EVENT(mm_page_alloc_slowpath_exit,
>> +
>> +	TP_PROTO(struct page *page,
>> +		int alloc_order,
>> +		nodemask_t *nodemask,
>> +		u64 alloc_start,
>> +		gfp_t gfp_flags,
>> +		int retrys,
>> +		int exit),
>> +
>> +	TP_ARGS(page, alloc_order, nodemask, alloc_start, gfp_flags,
>> +		retrys, exit),
>> +
>> +	TP_STRUCT__entry(__field(struct page *, page)
>> +		__field(int, alloc_order)
>> +		__field(nodemask_t *, nodemask)
>> +		__field(u64, msdelay)
>> +		__field(gfp_t, gfp_flags)
>> +		__field(int, retrys)
>> +		__field(int, exit)
>> +	),
>> +
>> +	TP_fast_assign(
>> +		__entry->page	     = page;
>> +		__entry->alloc_order = alloc_order;
>> +		__entry->nodemask    = nodemask;
>> +		__entry->msdelay     = jiffies_to_msecs(jiffies-alloc_start);
>> +		__entry->gfp_flags   = gfp_flags;
>> +		__entry->retrys	     = retrys;
>> +		__entry->exit	     = exit;
>> +	),
>> +
>> +	TP_printk("page=%p alloc_order=%d nodemask=%*pbl msdelay=%llu gfp_flags=%s retrys=%d exit=%d",
>> +		__entry->page,
>> +		__entry->alloc_order,
>> +		nodemask_pr_args(__entry->nodemask),
>> +		__entry->msdelay,
>> +		show_gfp_flags(__entry->gfp_flags),
>> +		__entry->retrys,
>> +		__entry->exit)
>> +);
>> +
>>  #endif /* _TRACE_KMEM_H */
>>  
>>  /* This part must be outside protection */
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index 48b5b01..bae9cb9 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -104,6 +104,17 @@ DEFINE_PER_CPU(struct work_struct, pcpu_drain);
>>  volatile unsigned long latent_entropy __latent_entropy;
>>  EXPORT_SYMBOL(latent_entropy);
>>  #endif
>> +enum slowpath_exit {
>> +	SLOWPATH_NOZONE = -16,
>> +	SLOWPATH_COMPACT_DEFERRED,
>> +	SLOWPATH_CAN_NOT_DIRECT_RECLAIM,
>> +	SLOWPATH_RECURSION,
>> +	SLOWPATH_NO_RETRY,
>> +	SLOWPATH_COSTLY_ORDER,
>> +	SLOWPATH_OOM_VICTIM,
>> +	SLOWPATH_NO_DIRECT_RECLAIM,
>> +	SLOWPATH_ORDER
>> +};
>>  
>>  /*
>>   * Array of node states.
>> @@ -3908,8 +3919,15 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>>  	enum compact_result compact_result;
>>  	int compaction_retries;
>>  	int no_progress_loops;
>> +	unsigned long alloc_start = jiffies;
>>  	unsigned int cpuset_mems_cookie;
>>  	int reserve_flags;
>> +	enum slowpath_exit slowpath_exit;
>> +	int retry_count = 0;
>> +
>> +	trace_mm_page_alloc_slowpath_enter(order,
>> +		ac->nodemask,
>> +		gfp_mask);
>>  
>>  	/*
>>  	 * In the slowpath, we sanity check order to avoid ever trying to
>> @@ -3919,7 +3937,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>>  	 */
>>  	if (order >= MAX_ORDER) {
>>  		WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN));
>> -		return NULL;
>> +		slowpath_exit = SLOWPATH_ORDER;
>> +		goto fail;
>>  	}
>>  
>>  	/*
>> @@ -3951,8 +3970,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>>  	 */
>>  	ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
>>  					ac->high_zoneidx, ac->nodemask);
>> -	if (!ac->preferred_zoneref->zone)
>> +	if (!ac->preferred_zoneref->zone) {
>> +		slowpath_exit = SLOWPATH_NOZONE;
>>  		goto nopage;
>> +	}
>>  
>>  	if (gfp_mask & __GFP_KSWAPD_RECLAIM)
>>  		wake_all_kswapds(order, ac);
>> @@ -3998,8 +4019,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>>  			 * system, so we fail the allocation instead of entering
>>  			 * direct reclaim.
>>  			 */
>> -			if (compact_result == COMPACT_DEFERRED)
>> +			if (compact_result == COMPACT_DEFERRED) {
>> +				slowpath_exit = SLOWPATH_COMPACT_DEFERRED;
>>  				goto nopage;
>> +			}
>>  
>>  			/*
>>  			 * Looks like reclaim/compaction is worth trying, but
>> @@ -4011,6 +4034,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>>  	}
>>  
>>  retry:
>> +	retry_count++;
>>  	/* Ensure kswapd doesn't accidentally go to sleep as long as we loop */
>>  	if (gfp_mask & __GFP_KSWAPD_RECLAIM)
>>  		wake_all_kswapds(order, ac);
>> @@ -4036,13 +4060,16 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>>  		goto got_pg;
>>  
>>  	/* Caller is not willing to reclaim, we can't balance anything */
>> -	if (!can_direct_reclaim)
>> +	if (!can_direct_reclaim) {
>> +		slowpath_exit = SLOWPATH_CAN_NOT_DIRECT_RECLAIM;
>>  		goto nopage;
>> +	}
>>  
>>  	/* Avoid recursion of direct reclaim */
>> -	if (current->flags & PF_MEMALLOC)
>> +	if (current->flags & PF_MEMALLOC) {
>> +		slowpath_exit = SLOWPATH_RECURSION;
>>  		goto nopage;
>> -
>> +	}
>>  	/* Try direct reclaim and then allocating */
>>  	page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac,
>>  							&did_some_progress);
>> @@ -4056,16 +4083,18 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>>  		goto got_pg;
>>  
>>  	/* Do not loop if specifically requested */
>> -	if (gfp_mask & __GFP_NORETRY)
>> +	if (gfp_mask & __GFP_NORETRY) {
>> +		slowpath_exit = SLOWPATH_NO_RETRY;
>>  		goto nopage;
>> -
>> +	}
>>  	/*
>>  	 * Do not retry costly high order allocations unless they are
>>  	 * __GFP_RETRY_MAYFAIL
>>  	 */
>> -	if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL))
>> +	if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL)) {
>> +		slowpath_exit = SLOWPATH_COSTLY_ORDER;
>>  		goto nopage;
>> -
>> +	}
>>  	if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
>>  				 did_some_progress > 0, &no_progress_loops))
>>  		goto retry;
>> @@ -4095,9 +4124,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>>  	/* Avoid allocations with no watermarks from looping endlessly */
>>  	if (tsk_is_oom_victim(current) &&
>>  	    (alloc_flags == ALLOC_OOM ||
>> -	     (gfp_mask & __GFP_NOMEMALLOC)))
>> +	     (gfp_mask & __GFP_NOMEMALLOC))) {
>> +		slowpath_exit = SLOWPATH_OOM_VICTIM;
>>  		goto nopage;
>> -
>> +	}
>>  	/* Retry as long as the OOM killer is making progress */
>>  	if (did_some_progress) {
>>  		no_progress_loops = 0;
>> @@ -4118,9 +4148,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>>  		 * All existing users of the __GFP_NOFAIL are blockable, so warn
>>  		 * of any new users that actually require GFP_NOWAIT
>>  		 */
>> -		if (WARN_ON_ONCE(!can_direct_reclaim))
>> +		if (WARN_ON_ONCE(!can_direct_reclaim)) {
>> +			slowpath_exit = SLOWPATH_NO_DIRECT_RECLAIM;
>>  			goto fail;
>> -
>> +		}
>>  		/*
>>  		 * PF_MEMALLOC request from this context is rather bizarre
>>  		 * because we cannot reclaim anything and only can loop waiting
>> @@ -4153,6 +4184,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>>  	warn_alloc(gfp_mask, ac->nodemask,
>>  			"page allocation failure: order:%u", order);
>>  got_pg:
>> +	trace_mm_page_alloc_slowpath_exit(page, order, ac->nodemask,
>> +		alloc_start, gfp_mask, retry_count, slowpath_exit);
>> +
>>  	return page;
>>  }
>>  
>> -- 
>> 2.7.4
>>
>> --
>> To unsubscribe, send a message with 'unsubscribe linux-mm' in
>> the body to majordomo@kvack.org.  For more info on Linux MM,
>> see: http://www.linux-mm.org/ .
>> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Add slowpath enter/exit trace events
@ 2017-11-23 12:35     ` peter enderborg
  0 siblings, 0 replies; 26+ messages in thread
From: peter enderborg @ 2017-11-23 12:35 UTC (permalink / raw)
  To: Michal Hocko
  Cc: linux-kernel, linux-mm, Steven Rostedt, Ingo Molnar,
	Alex Deucher, David S . Miller, Harry Wentland,
	Greg Kroah-Hartman, Tony Cheng, Andrew Morton, Vlastimil Babka,
	Mel Gorman, Johannes Weiner, Pavel Tatashin

Monitoring both enter/exit for all allocations and track down the one that are slow will be a very
big load on mobile devices or embedded device consuming a lot of battery and cpu. With this we can do useful monitoring
on devices on our field tests with real usage.

On 11/23/2017 01:25 PM, Michal Hocko wrote:
> On Thu 23-11-17 11:43:36, peter.enderborg@sony.com wrote:
>> From: Peter Enderborg <peter.enderborg@sony.com>
>>
>> The warning of slow allocation has been removed, this is
>> a other way to fetch that information. But you need
>> to enable the trace. The exit function also returns
>> information about the number of retries, how long
>> it was stalled and failure reason if that happened.
> I think this is just too excessive. We already have a tracepoint for the
> allocation exit. All we need is an entry to have a base to compare with.
> Another usecase would be to measure allocation latency. Information you
> are adding can be (partially) covered by existing tracepoints.
>
>> Signed-off-by: Peter Enderborg <peter.enderborg@sony.com>
>> ---
>>  include/trace/events/kmem.h | 68 +++++++++++++++++++++++++++++++++++++++++++++
>>  mm/page_alloc.c             | 62 +++++++++++++++++++++++++++++++----------
>>  2 files changed, 116 insertions(+), 14 deletions(-)
>>
>> diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
>> index eb57e30..bb882ca 100644
>> --- a/include/trace/events/kmem.h
>> +++ b/include/trace/events/kmem.h
>> @@ -315,6 +315,74 @@ TRACE_EVENT(mm_page_alloc_extfrag,
>>  		__entry->change_ownership)
>>  );
>>  
>> +TRACE_EVENT(mm_page_alloc_slowpath_enter,
>> +
>> +	TP_PROTO(int alloc_order,
>> +		nodemask_t *nodemask,
>> +		gfp_t gfp_flags),
>> +
>> +	TP_ARGS(alloc_order, nodemask, gfp_flags),
>> +
>> +	TP_STRUCT__entry(
>> +		__field(int, alloc_order)
>> +		__field(nodemask_t *, nodemask)
>> +		__field(gfp_t, gfp_flags)
>> +	 ),
>> +
>> +	 TP_fast_assign(
>> +		__entry->alloc_order		= alloc_order;
>> +		__entry->nodemask		= nodemask;
>> +		__entry->gfp_flags		= gfp_flags;
>> +	 ),
>> +
>> +	 TP_printk("alloc_order=%d nodemask=%*pbl gfp_flags=%s",
>> +		__entry->alloc_order,
>> +		nodemask_pr_args(__entry->nodemask),
>> +		show_gfp_flags(__entry->gfp_flags))
>> +);
>> +
>> +TRACE_EVENT(mm_page_alloc_slowpath_exit,
>> +
>> +	TP_PROTO(struct page *page,
>> +		int alloc_order,
>> +		nodemask_t *nodemask,
>> +		u64 alloc_start,
>> +		gfp_t gfp_flags,
>> +		int retrys,
>> +		int exit),
>> +
>> +	TP_ARGS(page, alloc_order, nodemask, alloc_start, gfp_flags,
>> +		retrys, exit),
>> +
>> +	TP_STRUCT__entry(__field(struct page *, page)
>> +		__field(int, alloc_order)
>> +		__field(nodemask_t *, nodemask)
>> +		__field(u64, msdelay)
>> +		__field(gfp_t, gfp_flags)
>> +		__field(int, retrys)
>> +		__field(int, exit)
>> +	),
>> +
>> +	TP_fast_assign(
>> +		__entry->page	     = page;
>> +		__entry->alloc_order = alloc_order;
>> +		__entry->nodemask    = nodemask;
>> +		__entry->msdelay     = jiffies_to_msecs(jiffies-alloc_start);
>> +		__entry->gfp_flags   = gfp_flags;
>> +		__entry->retrys	     = retrys;
>> +		__entry->exit	     = exit;
>> +	),
>> +
>> +	TP_printk("page=%p alloc_order=%d nodemask=%*pbl msdelay=%llu gfp_flags=%s retrys=%d exit=%d",
>> +		__entry->page,
>> +		__entry->alloc_order,
>> +		nodemask_pr_args(__entry->nodemask),
>> +		__entry->msdelay,
>> +		show_gfp_flags(__entry->gfp_flags),
>> +		__entry->retrys,
>> +		__entry->exit)
>> +);
>> +
>>  #endif /* _TRACE_KMEM_H */
>>  
>>  /* This part must be outside protection */
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index 48b5b01..bae9cb9 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -104,6 +104,17 @@ DEFINE_PER_CPU(struct work_struct, pcpu_drain);
>>  volatile unsigned long latent_entropy __latent_entropy;
>>  EXPORT_SYMBOL(latent_entropy);
>>  #endif
>> +enum slowpath_exit {
>> +	SLOWPATH_NOZONE = -16,
>> +	SLOWPATH_COMPACT_DEFERRED,
>> +	SLOWPATH_CAN_NOT_DIRECT_RECLAIM,
>> +	SLOWPATH_RECURSION,
>> +	SLOWPATH_NO_RETRY,
>> +	SLOWPATH_COSTLY_ORDER,
>> +	SLOWPATH_OOM_VICTIM,
>> +	SLOWPATH_NO_DIRECT_RECLAIM,
>> +	SLOWPATH_ORDER
>> +};
>>  
>>  /*
>>   * Array of node states.
>> @@ -3908,8 +3919,15 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>>  	enum compact_result compact_result;
>>  	int compaction_retries;
>>  	int no_progress_loops;
>> +	unsigned long alloc_start = jiffies;
>>  	unsigned int cpuset_mems_cookie;
>>  	int reserve_flags;
>> +	enum slowpath_exit slowpath_exit;
>> +	int retry_count = 0;
>> +
>> +	trace_mm_page_alloc_slowpath_enter(order,
>> +		ac->nodemask,
>> +		gfp_mask);
>>  
>>  	/*
>>  	 * In the slowpath, we sanity check order to avoid ever trying to
>> @@ -3919,7 +3937,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>>  	 */
>>  	if (order >= MAX_ORDER) {
>>  		WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN));
>> -		return NULL;
>> +		slowpath_exit = SLOWPATH_ORDER;
>> +		goto fail;
>>  	}
>>  
>>  	/*
>> @@ -3951,8 +3970,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>>  	 */
>>  	ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
>>  					ac->high_zoneidx, ac->nodemask);
>> -	if (!ac->preferred_zoneref->zone)
>> +	if (!ac->preferred_zoneref->zone) {
>> +		slowpath_exit = SLOWPATH_NOZONE;
>>  		goto nopage;
>> +	}
>>  
>>  	if (gfp_mask & __GFP_KSWAPD_RECLAIM)
>>  		wake_all_kswapds(order, ac);
>> @@ -3998,8 +4019,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>>  			 * system, so we fail the allocation instead of entering
>>  			 * direct reclaim.
>>  			 */
>> -			if (compact_result == COMPACT_DEFERRED)
>> +			if (compact_result == COMPACT_DEFERRED) {
>> +				slowpath_exit = SLOWPATH_COMPACT_DEFERRED;
>>  				goto nopage;
>> +			}
>>  
>>  			/*
>>  			 * Looks like reclaim/compaction is worth trying, but
>> @@ -4011,6 +4034,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>>  	}
>>  
>>  retry:
>> +	retry_count++;
>>  	/* Ensure kswapd doesn't accidentally go to sleep as long as we loop */
>>  	if (gfp_mask & __GFP_KSWAPD_RECLAIM)
>>  		wake_all_kswapds(order, ac);
>> @@ -4036,13 +4060,16 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>>  		goto got_pg;
>>  
>>  	/* Caller is not willing to reclaim, we can't balance anything */
>> -	if (!can_direct_reclaim)
>> +	if (!can_direct_reclaim) {
>> +		slowpath_exit = SLOWPATH_CAN_NOT_DIRECT_RECLAIM;
>>  		goto nopage;
>> +	}
>>  
>>  	/* Avoid recursion of direct reclaim */
>> -	if (current->flags & PF_MEMALLOC)
>> +	if (current->flags & PF_MEMALLOC) {
>> +		slowpath_exit = SLOWPATH_RECURSION;
>>  		goto nopage;
>> -
>> +	}
>>  	/* Try direct reclaim and then allocating */
>>  	page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac,
>>  							&did_some_progress);
>> @@ -4056,16 +4083,18 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>>  		goto got_pg;
>>  
>>  	/* Do not loop if specifically requested */
>> -	if (gfp_mask & __GFP_NORETRY)
>> +	if (gfp_mask & __GFP_NORETRY) {
>> +		slowpath_exit = SLOWPATH_NO_RETRY;
>>  		goto nopage;
>> -
>> +	}
>>  	/*
>>  	 * Do not retry costly high order allocations unless they are
>>  	 * __GFP_RETRY_MAYFAIL
>>  	 */
>> -	if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL))
>> +	if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL)) {
>> +		slowpath_exit = SLOWPATH_COSTLY_ORDER;
>>  		goto nopage;
>> -
>> +	}
>>  	if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
>>  				 did_some_progress > 0, &no_progress_loops))
>>  		goto retry;
>> @@ -4095,9 +4124,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>>  	/* Avoid allocations with no watermarks from looping endlessly */
>>  	if (tsk_is_oom_victim(current) &&
>>  	    (alloc_flags == ALLOC_OOM ||
>> -	     (gfp_mask & __GFP_NOMEMALLOC)))
>> +	     (gfp_mask & __GFP_NOMEMALLOC))) {
>> +		slowpath_exit = SLOWPATH_OOM_VICTIM;
>>  		goto nopage;
>> -
>> +	}
>>  	/* Retry as long as the OOM killer is making progress */
>>  	if (did_some_progress) {
>>  		no_progress_loops = 0;
>> @@ -4118,9 +4148,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>>  		 * All existing users of the __GFP_NOFAIL are blockable, so warn
>>  		 * of any new users that actually require GFP_NOWAIT
>>  		 */
>> -		if (WARN_ON_ONCE(!can_direct_reclaim))
>> +		if (WARN_ON_ONCE(!can_direct_reclaim)) {
>> +			slowpath_exit = SLOWPATH_NO_DIRECT_RECLAIM;
>>  			goto fail;
>> -
>> +		}
>>  		/*
>>  		 * PF_MEMALLOC request from this context is rather bizarre
>>  		 * because we cannot reclaim anything and only can loop waiting
>> @@ -4153,6 +4184,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>>  	warn_alloc(gfp_mask, ac->nodemask,
>>  			"page allocation failure: order:%u", order);
>>  got_pg:
>> +	trace_mm_page_alloc_slowpath_exit(page, order, ac->nodemask,
>> +		alloc_start, gfp_mask, retry_count, slowpath_exit);
>> +
>>  	return page;
>>  }
>>  
>> -- 
>> 2.7.4
>>
>> --
>> To unsubscribe, send a message with 'unsubscribe linux-mm' in
>> the body to majordomo@kvack.org.  For more info on Linux MM,
>> see: http://www.linux-mm.org/ .
>> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Add slowpath enter/exit trace events
  2017-11-23 12:35     ` peter enderborg
@ 2017-11-23 12:47       ` Michal Hocko
  -1 siblings, 0 replies; 26+ messages in thread
From: Michal Hocko @ 2017-11-23 12:47 UTC (permalink / raw)
  To: peter enderborg
  Cc: linux-kernel, linux-mm, Steven Rostedt, Ingo Molnar,
	Alex Deucher, David S . Miller, Harry Wentland,
	Greg Kroah-Hartman, Tony Cheng, Andrew Morton, Vlastimil Babka,
	Mel Gorman, Johannes Weiner, Pavel Tatashin

On Thu 23-11-17 13:35:15, peter enderborg wrote:
> Monitoring both enter/exit for all allocations and track down the one
> that are slow will be a very big load on mobile devices or embedded
> device consuming a lot of battery and cpu. With this we can do useful
> monitoring on devices on our field tests with real usage.

This might be true but the other POV is that the trace point with the
additional information is just too disruptive to the rest of the code
and it exposes too many implementation details.
-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Add slowpath enter/exit trace events
@ 2017-11-23 12:47       ` Michal Hocko
  0 siblings, 0 replies; 26+ messages in thread
From: Michal Hocko @ 2017-11-23 12:47 UTC (permalink / raw)
  To: peter enderborg
  Cc: linux-kernel, linux-mm, Steven Rostedt, Ingo Molnar,
	Alex Deucher, David S . Miller, Harry Wentland,
	Greg Kroah-Hartman, Tony Cheng, Andrew Morton, Vlastimil Babka,
	Mel Gorman, Johannes Weiner, Pavel Tatashin

On Thu 23-11-17 13:35:15, peter enderborg wrote:
> Monitoring both enter/exit for all allocations and track down the one
> that are slow will be a very big load on mobile devices or embedded
> device consuming a lot of battery and cpu. With this we can do useful
> monitoring on devices on our field tests with real usage.

This might be true but the other POV is that the trace point with the
additional information is just too disruptive to the rest of the code
and it exposes too many implementation details.
-- 
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Add slowpath enter/exit trace events
  2017-11-23 10:43 ` peter.enderborg
@ 2017-11-23 13:00   ` Tetsuo Handa
  -1 siblings, 0 replies; 26+ messages in thread
From: Tetsuo Handa @ 2017-11-23 13:00 UTC (permalink / raw)
  To: peter.enderborg, Michal Hocko, linux-kernel, linux-mm
  Cc: Steven Rostedt, Ingo Molnar, Alex Deucher, David S . Miller,
	Harry Wentland, Greg Kroah-Hartman, Tony Cheng, Andrew Morton,
	Vlastimil Babka, Mel Gorman, Johannes Weiner, Pavel Tatashin

On 2017/11/23 19:43, peter.enderborg@sony.com wrote:
> The warning of slow allocation has been removed, this is
> a other way to fetch that information. But you need
> to enable the trace. The exit function also returns
> information about the number of retries, how long
> it was stalled and failure reason if that happened.

However, the fast path (I mean, get_page_from_freelist() at
"/* First allocation attempt */" label) might be slow, for it is
allowed to call node_reclaim() which can take uncontrollable
duration. I think that you need to add hooks like
http://lkml.kernel.org/r/1510833448-19918-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp does. ;-)

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Add slowpath enter/exit trace events
@ 2017-11-23 13:00   ` Tetsuo Handa
  0 siblings, 0 replies; 26+ messages in thread
From: Tetsuo Handa @ 2017-11-23 13:00 UTC (permalink / raw)
  To: peter.enderborg, Michal Hocko, linux-kernel, linux-mm
  Cc: Steven Rostedt, Ingo Molnar, Alex Deucher, David S . Miller,
	Harry Wentland, Greg Kroah-Hartman, Tony Cheng, Andrew Morton,
	Vlastimil Babka, Mel Gorman, Johannes Weiner, Pavel Tatashin

On 2017/11/23 19:43, peter.enderborg@sony.com wrote:
> The warning of slow allocation has been removed, this is
> a other way to fetch that information. But you need
> to enable the trace. The exit function also returns
> information about the number of retries, how long
> it was stalled and failure reason if that happened.

However, the fast path (I mean, get_page_from_freelist() at
"/* First allocation attempt */" label) might be slow, for it is
allowed to call node_reclaim() which can take uncontrollable
duration. I think that you need to add hooks like
http://lkml.kernel.org/r/1510833448-19918-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp does. ;-)

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Add slowpath enter/exit trace events
  2017-11-23 12:47       ` Michal Hocko
@ 2017-11-23 13:03         ` peter enderborg
  -1 siblings, 0 replies; 26+ messages in thread
From: peter enderborg @ 2017-11-23 13:03 UTC (permalink / raw)
  To: Michal Hocko
  Cc: linux-kernel, linux-mm, Steven Rostedt, Ingo Molnar,
	Alex Deucher, David S . Miller, Harry Wentland,
	Greg Kroah-Hartman, Tony Cheng, Andrew Morton, Vlastimil Babka,
	Mel Gorman, Johannes Weiner, Pavel Tatashin

On 11/23/2017 01:47 PM, Michal Hocko wrote:
>
> This might be true but the other POV is that the trace point with the
> additional information is just too disruptive to the rest of the code
> and it exposes too many implementation details.

>From who do you want to hide details? Is this a security thing? I don't understand this  argument.  Tracefs is not part of uapi, right?

Hopefully there are not that many fails, and they might be very hard to reproduce if you don't know what you are looking for.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Add slowpath enter/exit trace events
@ 2017-11-23 13:03         ` peter enderborg
  0 siblings, 0 replies; 26+ messages in thread
From: peter enderborg @ 2017-11-23 13:03 UTC (permalink / raw)
  To: Michal Hocko
  Cc: linux-kernel, linux-mm, Steven Rostedt, Ingo Molnar,
	Alex Deucher, David S . Miller, Harry Wentland,
	Greg Kroah-Hartman, Tony Cheng, Andrew Morton, Vlastimil Babka,
	Mel Gorman, Johannes Weiner, Pavel Tatashin

On 11/23/2017 01:47 PM, Michal Hocko wrote:
>
> This might be true but the other POV is that the trace point with the
> additional information is just too disruptive to the rest of the code
> and it exposes too many implementation details.

>From who do you want to hide details? Is this a security thing? I don't understand this  argument.  Tracefs is not part of uapi, right?

Hopefully there are not that many fails, and they might be very hard to reproduce if you don't know what you are looking for.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Add slowpath enter/exit trace events
  2017-11-23 12:25   ` Michal Hocko
@ 2017-11-23 13:36     ` Mel Gorman
  -1 siblings, 0 replies; 26+ messages in thread
From: Mel Gorman @ 2017-11-23 13:36 UTC (permalink / raw)
  To: Michal Hocko
  Cc: peter.enderborg, linux-kernel, linux-mm, Steven Rostedt,
	Ingo Molnar, Alex Deucher, David S . Miller, Harry Wentland,
	Greg Kroah-Hartman, Tony Cheng, Andrew Morton, Vlastimil Babka,
	Johannes Weiner, Pavel Tatashin

On Thu, Nov 23, 2017 at 01:25:30PM +0100, Michal Hocko wrote:
> On Thu 23-11-17 11:43:36, peter.enderborg@sony.com wrote:
> > From: Peter Enderborg <peter.enderborg@sony.com>
> > 
> > The warning of slow allocation has been removed, this is
> > a other way to fetch that information. But you need
> > to enable the trace. The exit function also returns
> > information about the number of retries, how long
> > it was stalled and failure reason if that happened.
> 
> I think this is just too excessive. We already have a tracepoint for the
> allocation exit. All we need is an entry to have a base to compare with.
> Another usecase would be to measure allocation latency. Information you
> are adding can be (partially) covered by existing tracepoints.
> 

You can gather that by simply adding a probe to __alloc_pages_slowpath
(like what perf probe does) and matching the trigger with the existing
mm_page_alloc points. This is a bit approximate because you would need
to filter mm_page_alloc hits that do not have a corresponding hit with
__alloc_pages_slowpath but that is easy.

With that probe, it's trivial to use systemtap to track the latencies between
those points on a per-processes basis and then only do a dump_stack from
systemtap for the ones that are above a particular threshold. This can all
be done without introducing state-tracking code into the page allocator
that is active regardless of whether the tracepoint is in use. It also
has the benefit of working with many older kernels.

If systemtap is not an option then use ftrace directly to gather the
information from userspace. It can be done via trace_pipe with some overhead
or on a per-cpu basis like what trace-cmd does. It's important to note
that even *if* the tracepoints were introduced that it would be necessary
to have something gather the information and report it in a sensible fashion.

That probe+mm_page_alloc can tell you the frequency of allocation
attempts that take a long time but not the why. Compaction and direct
reclaim latencies can be checked via existing tracepoints and in the case
of compaction, detailed information can also be gathered from existing
tracepoints. Detailed information on why direct reclaim stalled can be
harder but the biggest one is checking if reclaim stalls due to congestion
and again, tracepoints already exist for that.

I'm not convinced that a new tracepoint is needed.

-- 
Mel Gorman
SUSE Labs

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Add slowpath enter/exit trace events
@ 2017-11-23 13:36     ` Mel Gorman
  0 siblings, 0 replies; 26+ messages in thread
From: Mel Gorman @ 2017-11-23 13:36 UTC (permalink / raw)
  To: Michal Hocko
  Cc: peter.enderborg, linux-kernel, linux-mm, Steven Rostedt,
	Ingo Molnar, Alex Deucher, David S . Miller, Harry Wentland,
	Greg Kroah-Hartman, Tony Cheng, Andrew Morton, Vlastimil Babka,
	Johannes Weiner, Pavel Tatashin

On Thu, Nov 23, 2017 at 01:25:30PM +0100, Michal Hocko wrote:
> On Thu 23-11-17 11:43:36, peter.enderborg@sony.com wrote:
> > From: Peter Enderborg <peter.enderborg@sony.com>
> > 
> > The warning of slow allocation has been removed, this is
> > a other way to fetch that information. But you need
> > to enable the trace. The exit function also returns
> > information about the number of retries, how long
> > it was stalled and failure reason if that happened.
> 
> I think this is just too excessive. We already have a tracepoint for the
> allocation exit. All we need is an entry to have a base to compare with.
> Another usecase would be to measure allocation latency. Information you
> are adding can be (partially) covered by existing tracepoints.
> 

You can gather that by simply adding a probe to __alloc_pages_slowpath
(like what perf probe does) and matching the trigger with the existing
mm_page_alloc points. This is a bit approximate because you would need
to filter mm_page_alloc hits that do not have a corresponding hit with
__alloc_pages_slowpath but that is easy.

With that probe, it's trivial to use systemtap to track the latencies between
those points on a per-processes basis and then only do a dump_stack from
systemtap for the ones that are above a particular threshold. This can all
be done without introducing state-tracking code into the page allocator
that is active regardless of whether the tracepoint is in use. It also
has the benefit of working with many older kernels.

If systemtap is not an option then use ftrace directly to gather the
information from userspace. It can be done via trace_pipe with some overhead
or on a per-cpu basis like what trace-cmd does. It's important to note
that even *if* the tracepoints were introduced that it would be necessary
to have something gather the information and report it in a sensible fashion.

That probe+mm_page_alloc can tell you the frequency of allocation
attempts that take a long time but not the why. Compaction and direct
reclaim latencies can be checked via existing tracepoints and in the case
of compaction, detailed information can also be gathered from existing
tracepoints. Detailed information on why direct reclaim stalled can be
harder but the biggest one is checking if reclaim stalls due to congestion
and again, tracepoints already exist for that.

I'm not convinced that a new tracepoint is needed.

-- 
Mel Gorman
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Add slowpath enter/exit trace events
  2017-11-23 13:36     ` Mel Gorman
@ 2017-11-23 13:43       ` Tetsuo Handa
  -1 siblings, 0 replies; 26+ messages in thread
From: Tetsuo Handa @ 2017-11-23 13:43 UTC (permalink / raw)
  To: Mel Gorman, Michal Hocko
  Cc: peter.enderborg, linux-kernel, linux-mm, Steven Rostedt,
	Ingo Molnar, Alex Deucher, David S . Miller, Harry Wentland,
	Greg Kroah-Hartman, Tony Cheng, Andrew Morton, Vlastimil Babka,
	Johannes Weiner, Pavel Tatashin

On 2017/11/23 22:36, Mel Gorman wrote:
> On Thu, Nov 23, 2017 at 01:25:30PM +0100, Michal Hocko wrote:
>> On Thu 23-11-17 11:43:36, peter.enderborg@sony.com wrote:
>>> From: Peter Enderborg <peter.enderborg@sony.com>
>>>
>>> The warning of slow allocation has been removed, this is
>>> a other way to fetch that information. But you need
>>> to enable the trace. The exit function also returns
>>> information about the number of retries, how long
>>> it was stalled and failure reason if that happened.
>>
>> I think this is just too excessive. We already have a tracepoint for the
>> allocation exit. All we need is an entry to have a base to compare with.
>> Another usecase would be to measure allocation latency. Information you
>> are adding can be (partially) covered by existing tracepoints.
>>
> 
> You can gather that by simply adding a probe to __alloc_pages_slowpath
> (like what perf probe does) and matching the trigger with the existing
> mm_page_alloc points. This is a bit approximate because you would need
> to filter mm_page_alloc hits that do not have a corresponding hit with
> __alloc_pages_slowpath but that is easy.
> 
> With that probe, it's trivial to use systemtap to track the latencies between
> those points on a per-processes basis and then only do a dump_stack from
> systemtap for the ones that are above a particular threshold. This can all
> be done without introducing state-tracking code into the page allocator
> that is active regardless of whether the tracepoint is in use. It also
> has the benefit of working with many older kernels.

Please see my attempt at
http://lkml.kernel.org/r/1510833448-19918-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp .
Printing just current thread is not sufficient for me.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Add slowpath enter/exit trace events
@ 2017-11-23 13:43       ` Tetsuo Handa
  0 siblings, 0 replies; 26+ messages in thread
From: Tetsuo Handa @ 2017-11-23 13:43 UTC (permalink / raw)
  To: Mel Gorman, Michal Hocko
  Cc: peter.enderborg, linux-kernel, linux-mm, Steven Rostedt,
	Ingo Molnar, Alex Deucher, David S . Miller, Harry Wentland,
	Greg Kroah-Hartman, Tony Cheng, Andrew Morton, Vlastimil Babka,
	Johannes Weiner, Pavel Tatashin

On 2017/11/23 22:36, Mel Gorman wrote:
> On Thu, Nov 23, 2017 at 01:25:30PM +0100, Michal Hocko wrote:
>> On Thu 23-11-17 11:43:36, peter.enderborg@sony.com wrote:
>>> From: Peter Enderborg <peter.enderborg@sony.com>
>>>
>>> The warning of slow allocation has been removed, this is
>>> a other way to fetch that information. But you need
>>> to enable the trace. The exit function also returns
>>> information about the number of retries, how long
>>> it was stalled and failure reason if that happened.
>>
>> I think this is just too excessive. We already have a tracepoint for the
>> allocation exit. All we need is an entry to have a base to compare with.
>> Another usecase would be to measure allocation latency. Information you
>> are adding can be (partially) covered by existing tracepoints.
>>
> 
> You can gather that by simply adding a probe to __alloc_pages_slowpath
> (like what perf probe does) and matching the trigger with the existing
> mm_page_alloc points. This is a bit approximate because you would need
> to filter mm_page_alloc hits that do not have a corresponding hit with
> __alloc_pages_slowpath but that is easy.
> 
> With that probe, it's trivial to use systemtap to track the latencies between
> those points on a per-processes basis and then only do a dump_stack from
> systemtap for the ones that are above a particular threshold. This can all
> be done without introducing state-tracking code into the page allocator
> that is active regardless of whether the tracepoint is in use. It also
> has the benefit of working with many older kernels.

Please see my attempt at
http://lkml.kernel.org/r/1510833448-19918-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp .
Printing just current thread is not sufficient for me.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Add slowpath enter/exit trace events
  2017-11-23 13:03         ` peter enderborg
@ 2017-11-23 13:47           ` Michal Hocko
  -1 siblings, 0 replies; 26+ messages in thread
From: Michal Hocko @ 2017-11-23 13:47 UTC (permalink / raw)
  To: peter enderborg
  Cc: linux-kernel, linux-mm, Steven Rostedt, Ingo Molnar,
	Alex Deucher, David S . Miller, Harry Wentland,
	Greg Kroah-Hartman, Tony Cheng, Andrew Morton, Vlastimil Babka,
	Mel Gorman, Johannes Weiner, Pavel Tatashin

On Thu 23-11-17 14:03:04, peter enderborg wrote:
> On 11/23/2017 01:47 PM, Michal Hocko wrote:
> >
> > This might be true but the other POV is that the trace point with the
> > additional information is just too disruptive to the rest of the code
> > and it exposes too many implementation details.
> 
> From who do you want to hide details? Is this a security thing? I
> don't understand this  argument.  Tracefs is not part of uapi,
> right?

Linus would disagree https://lwn.net/Articles/737530/

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Add slowpath enter/exit trace events
@ 2017-11-23 13:47           ` Michal Hocko
  0 siblings, 0 replies; 26+ messages in thread
From: Michal Hocko @ 2017-11-23 13:47 UTC (permalink / raw)
  To: peter enderborg
  Cc: linux-kernel, linux-mm, Steven Rostedt, Ingo Molnar,
	Alex Deucher, David S . Miller, Harry Wentland,
	Greg Kroah-Hartman, Tony Cheng, Andrew Morton, Vlastimil Babka,
	Mel Gorman, Johannes Weiner, Pavel Tatashin

On Thu 23-11-17 14:03:04, peter enderborg wrote:
> On 11/23/2017 01:47 PM, Michal Hocko wrote:
> >
> > This might be true but the other POV is that the trace point with the
> > additional information is just too disruptive to the rest of the code
> > and it exposes too many implementation details.
> 
> From who do you want to hide details? Is this a security thing? I
> don't understand this  argument.  Tracefs is not part of uapi,
> right?

Linus would disagree https://lwn.net/Articles/737530/

-- 
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Add slowpath enter/exit trace events
  2017-11-23 13:36     ` Mel Gorman
@ 2017-11-23 14:01       ` Michal Hocko
  -1 siblings, 0 replies; 26+ messages in thread
From: Michal Hocko @ 2017-11-23 14:01 UTC (permalink / raw)
  To: Mel Gorman
  Cc: peter.enderborg, linux-kernel, linux-mm, Steven Rostedt,
	Ingo Molnar, Alex Deucher, David S . Miller, Harry Wentland,
	Greg Kroah-Hartman, Tony Cheng, Andrew Morton, Vlastimil Babka,
	Johannes Weiner, Pavel Tatashin

On Thu 23-11-17 13:36:29, Mel Gorman wrote:
> On Thu, Nov 23, 2017 at 01:25:30PM +0100, Michal Hocko wrote:
> > On Thu 23-11-17 11:43:36, peter.enderborg@sony.com wrote:
> > > From: Peter Enderborg <peter.enderborg@sony.com>
> > > 
> > > The warning of slow allocation has been removed, this is
> > > a other way to fetch that information. But you need
> > > to enable the trace. The exit function also returns
> > > information about the number of retries, how long
> > > it was stalled and failure reason if that happened.
> > 
> > I think this is just too excessive. We already have a tracepoint for the
> > allocation exit. All we need is an entry to have a base to compare with.
> > Another usecase would be to measure allocation latency. Information you
> > are adding can be (partially) covered by existing tracepoints.
> > 
> 
> You can gather that by simply adding a probe to __alloc_pages_slowpath
> (like what perf probe does) and matching the trigger with the existing
> mm_page_alloc points.

I am not sure adding a probe on a production system will fly in many
cases. A static tracepoint would be much easier in that case. But I
agree there are other means to accomplish the same thing. My main point
was to have an easy out-of-the-box way to check latencies. But that is
not something I would really insist on.

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Add slowpath enter/exit trace events
@ 2017-11-23 14:01       ` Michal Hocko
  0 siblings, 0 replies; 26+ messages in thread
From: Michal Hocko @ 2017-11-23 14:01 UTC (permalink / raw)
  To: Mel Gorman
  Cc: peter.enderborg, linux-kernel, linux-mm, Steven Rostedt,
	Ingo Molnar, Alex Deucher, David S . Miller, Harry Wentland,
	Greg Kroah-Hartman, Tony Cheng, Andrew Morton, Vlastimil Babka,
	Johannes Weiner, Pavel Tatashin

On Thu 23-11-17 13:36:29, Mel Gorman wrote:
> On Thu, Nov 23, 2017 at 01:25:30PM +0100, Michal Hocko wrote:
> > On Thu 23-11-17 11:43:36, peter.enderborg@sony.com wrote:
> > > From: Peter Enderborg <peter.enderborg@sony.com>
> > > 
> > > The warning of slow allocation has been removed, this is
> > > a other way to fetch that information. But you need
> > > to enable the trace. The exit function also returns
> > > information about the number of retries, how long
> > > it was stalled and failure reason if that happened.
> > 
> > I think this is just too excessive. We already have a tracepoint for the
> > allocation exit. All we need is an entry to have a base to compare with.
> > Another usecase would be to measure allocation latency. Information you
> > are adding can be (partially) covered by existing tracepoints.
> > 
> 
> You can gather that by simply adding a probe to __alloc_pages_slowpath
> (like what perf probe does) and matching the trigger with the existing
> mm_page_alloc points.

I am not sure adding a probe on a production system will fly in many
cases. A static tracepoint would be much easier in that case. But I
agree there are other means to accomplish the same thing. My main point
was to have an easy out-of-the-box way to check latencies. But that is
not something I would really insist on.

-- 
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Add slowpath enter/exit trace events
  2017-11-23 14:01       ` Michal Hocko
@ 2017-11-23 15:09         ` Mel Gorman
  -1 siblings, 0 replies; 26+ messages in thread
From: Mel Gorman @ 2017-11-23 15:09 UTC (permalink / raw)
  To: Michal Hocko
  Cc: peter.enderborg, linux-kernel, linux-mm, Steven Rostedt,
	Ingo Molnar, Alex Deucher, David S . Miller, Harry Wentland,
	Greg Kroah-Hartman, Tony Cheng, Andrew Morton, Vlastimil Babka,
	Johannes Weiner, Pavel Tatashin

On Thu, Nov 23, 2017 at 03:01:27PM +0100, Michal Hocko wrote:
> On Thu 23-11-17 13:36:29, Mel Gorman wrote:
> > On Thu, Nov 23, 2017 at 01:25:30PM +0100, Michal Hocko wrote:
> > > On Thu 23-11-17 11:43:36, peter.enderborg@sony.com wrote:
> > > > From: Peter Enderborg <peter.enderborg@sony.com>
> > > > 
> > > > The warning of slow allocation has been removed, this is
> > > > a other way to fetch that information. But you need
> > > > to enable the trace. The exit function also returns
> > > > information about the number of retries, how long
> > > > it was stalled and failure reason if that happened.
> > > 
> > > I think this is just too excessive. We already have a tracepoint for the
> > > allocation exit. All we need is an entry to have a base to compare with.
> > > Another usecase would be to measure allocation latency. Information you
> > > are adding can be (partially) covered by existing tracepoints.
> > > 
> > 
> > You can gather that by simply adding a probe to __alloc_pages_slowpath
> > (like what perf probe does) and matching the trigger with the existing
> > mm_page_alloc points.
> 
> I am not sure adding a probe on a production system will fly in many
> cases.

Not sure why not considering that the final mechanism is very similar.

> A static tracepoint would be much easier in that case.

Sure, but if it's only really latencies that are a concern then a probe
would do the job without backports. 

> But I
> agree there are other means to accomplish the same thing. My main point
> was to have an easy out-of-the-box way to check latencies. But that is
> not something I would really insist on.
> 

An entry tracepoint is enough for just latencies by punting the analysis
to userspace and state tracking to either systemtap or userspace. If
userspace then it would need to never malloc once analysis starts and be
mlocked.

-- 
Mel Gorman
SUSE Labs

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Add slowpath enter/exit trace events
@ 2017-11-23 15:09         ` Mel Gorman
  0 siblings, 0 replies; 26+ messages in thread
From: Mel Gorman @ 2017-11-23 15:09 UTC (permalink / raw)
  To: Michal Hocko
  Cc: peter.enderborg, linux-kernel, linux-mm, Steven Rostedt,
	Ingo Molnar, Alex Deucher, David S . Miller, Harry Wentland,
	Greg Kroah-Hartman, Tony Cheng, Andrew Morton, Vlastimil Babka,
	Johannes Weiner, Pavel Tatashin

On Thu, Nov 23, 2017 at 03:01:27PM +0100, Michal Hocko wrote:
> On Thu 23-11-17 13:36:29, Mel Gorman wrote:
> > On Thu, Nov 23, 2017 at 01:25:30PM +0100, Michal Hocko wrote:
> > > On Thu 23-11-17 11:43:36, peter.enderborg@sony.com wrote:
> > > > From: Peter Enderborg <peter.enderborg@sony.com>
> > > > 
> > > > The warning of slow allocation has been removed, this is
> > > > a other way to fetch that information. But you need
> > > > to enable the trace. The exit function also returns
> > > > information about the number of retries, how long
> > > > it was stalled and failure reason if that happened.
> > > 
> > > I think this is just too excessive. We already have a tracepoint for the
> > > allocation exit. All we need is an entry to have a base to compare with.
> > > Another usecase would be to measure allocation latency. Information you
> > > are adding can be (partially) covered by existing tracepoints.
> > > 
> > 
> > You can gather that by simply adding a probe to __alloc_pages_slowpath
> > (like what perf probe does) and matching the trigger with the existing
> > mm_page_alloc points.
> 
> I am not sure adding a probe on a production system will fly in many
> cases.

Not sure why not considering that the final mechanism is very similar.

> A static tracepoint would be much easier in that case.

Sure, but if it's only really latencies that are a concern then a probe
would do the job without backports. 

> But I
> agree there are other means to accomplish the same thing. My main point
> was to have an easy out-of-the-box way to check latencies. But that is
> not something I would really insist on.
> 

An entry tracepoint is enough for just latencies by punting the analysis
to userspace and state tracking to either systemtap or userspace. If
userspace then it would need to never malloc once analysis starts and be
mlocked.

-- 
Mel Gorman
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Add slowpath enter/exit trace events
  2017-11-23 13:43       ` Tetsuo Handa
@ 2017-11-24  7:43         ` peter enderborg
  -1 siblings, 0 replies; 26+ messages in thread
From: peter enderborg @ 2017-11-24  7:43 UTC (permalink / raw)
  To: Tetsuo Handa, Mel Gorman, Michal Hocko
  Cc: linux-kernel, linux-mm, Steven Rostedt, Ingo Molnar,
	Alex Deucher, David S . Miller, Harry Wentland,
	Greg Kroah-Hartman, Tony Cheng, Andrew Morton, Vlastimil Babka,
	Johannes Weiner, Pavel Tatashin

On 11/23/2017 02:43 PM, Tetsuo Handa wrote:
> Please see my attempt at
> http://lkml.kernel.org/r/1510833448-19918-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp .
> Printing just current thread is not sufficient for me.
>
>
Seems to  me that it is a lot more overhead with timers and stuff.
My probe is for the health of the system trying to capture how get the penalty and how much. A slowpath alloc in a audio stream can causes drop-outs. And they are very hard to debug in userspace.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Add slowpath enter/exit trace events
@ 2017-11-24  7:43         ` peter enderborg
  0 siblings, 0 replies; 26+ messages in thread
From: peter enderborg @ 2017-11-24  7:43 UTC (permalink / raw)
  To: Tetsuo Handa, Mel Gorman, Michal Hocko
  Cc: linux-kernel, linux-mm, Steven Rostedt, Ingo Molnar,
	Alex Deucher, David S . Miller, Harry Wentland,
	Greg Kroah-Hartman, Tony Cheng, Andrew Morton, Vlastimil Babka,
	Johannes Weiner, Pavel Tatashin

On 11/23/2017 02:43 PM, Tetsuo Handa wrote:
> Please see my attempt at
> http://lkml.kernel.org/r/1510833448-19918-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp .
> Printing just current thread is not sufficient for me.
>
>
Seems to  me that it is a lot more overhead with timers and stuff.
My probe is for the health of the system trying to capture how get the penalty and how much. A slowpath alloc in a audio stream can causes drop-outs. And they are very hard to debug in userspace.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Add slowpath enter/exit trace events
  2017-11-23 14:01       ` Michal Hocko
@ 2017-11-24  8:38         ` peter enderborg
  -1 siblings, 0 replies; 26+ messages in thread
From: peter enderborg @ 2017-11-24  8:38 UTC (permalink / raw)
  To: Michal Hocko, Mel Gorman
  Cc: linux-kernel, linux-mm, Steven Rostedt, Ingo Molnar,
	Alex Deucher, David S . Miller, Harry Wentland,
	Greg Kroah-Hartman, Tony Cheng, Andrew Morton, Vlastimil Babka,
	Johannes Weiner, Pavel Tatashin

On 11/23/2017 03:01 PM, Michal Hocko wrote:
> I am not sure adding a probe on a production system will fly in many
> cases. A static tracepoint would be much easier in that case. But I
> agree there are other means to accomplish the same thing. My main point
> was to have an easy out-of-the-box way to check latencies. But that is
> not something I would really insist on.
>
In android tracefs (or part of it) is the way for the system to control to what developers can access to the linux system on commercial devices.  So it is very much used on production systems, it is even  a requirement from google to be certified as android.  Things like dmesg is not.  However, this probe is at the moment not in that scope. 

My point is that you need to condense the information as much as possible but still be useful before making the effort to copy it to userspace.  And  for this the trace-event are very useful for small systems since the cost is very low for events where no one is listening.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] Add slowpath enter/exit trace events
@ 2017-11-24  8:38         ` peter enderborg
  0 siblings, 0 replies; 26+ messages in thread
From: peter enderborg @ 2017-11-24  8:38 UTC (permalink / raw)
  To: Michal Hocko, Mel Gorman
  Cc: linux-kernel, linux-mm, Steven Rostedt, Ingo Molnar,
	Alex Deucher, David S . Miller, Harry Wentland,
	Greg Kroah-Hartman, Tony Cheng, Andrew Morton, Vlastimil Babka,
	Johannes Weiner, Pavel Tatashin

On 11/23/2017 03:01 PM, Michal Hocko wrote:
> I am not sure adding a probe on a production system will fly in many
> cases. A static tracepoint would be much easier in that case. But I
> agree there are other means to accomplish the same thing. My main point
> was to have an easy out-of-the-box way to check latencies. But that is
> not something I would really insist on.
>
In android tracefs (or part of it) is the way for the system to control to what developers can access to the linux system on commercial devices.  So it is very much used on production systems, it is even  a requirement from google to be certified as android.  Things like dmesg is not.  However, this probe is at the moment not in that scope. 

My point is that you need to condense the information as much as possible but still be useful before making the effort to copy it to userspace.  And  for this the trace-event are very useful for small systems since the cost is very low for events where no one is listening.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2017-11-24  8:38 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-23 10:43 [PATCH] Add slowpath enter/exit trace events peter.enderborg
2017-11-23 10:43 ` peter.enderborg
2017-11-23 12:25 ` Michal Hocko
2017-11-23 12:25   ` Michal Hocko
2017-11-23 12:35   ` peter enderborg
2017-11-23 12:35     ` peter enderborg
2017-11-23 12:47     ` Michal Hocko
2017-11-23 12:47       ` Michal Hocko
2017-11-23 13:03       ` peter enderborg
2017-11-23 13:03         ` peter enderborg
2017-11-23 13:47         ` Michal Hocko
2017-11-23 13:47           ` Michal Hocko
2017-11-23 13:36   ` Mel Gorman
2017-11-23 13:36     ` Mel Gorman
2017-11-23 13:43     ` Tetsuo Handa
2017-11-23 13:43       ` Tetsuo Handa
2017-11-24  7:43       ` peter enderborg
2017-11-24  7:43         ` peter enderborg
2017-11-23 14:01     ` Michal Hocko
2017-11-23 14:01       ` Michal Hocko
2017-11-23 15:09       ` Mel Gorman
2017-11-23 15:09         ` Mel Gorman
2017-11-24  8:38       ` peter enderborg
2017-11-24  8:38         ` peter enderborg
2017-11-23 13:00 ` Tetsuo Handa
2017-11-23 13:00   ` Tetsuo Handa

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.