linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Steven Rostedt <rostedt@goodmis.org>
To: Mel Gorman <mgorman@techsingularity.net>
Cc: Vincent Guittot <vincent.guittot@linaro.org>,
	Ingo Molnar <mingo@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Juri Lelli <juri.lelli@redhat.com>,
	Dietmar Eggemann <dietmar.eggemann@arm.com>,
	Ben Segall <bsegall@google.com>,
	Valentin Schneider <valentin.schneider@arm.com>,
	Phil Auld <pauld@redhat.com>, LKML <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH 05/11] sched/numa: Distinguish between the different task_numa_migrate failure cases
Date: Wed, 12 Feb 2020 09:43:08 -0500	[thread overview]
Message-ID: <20200212094308.04bcf8a2@gandalf.local.home> (raw)
In-Reply-To: <20200212093654.4816-6-mgorman@techsingularity.net>

On Wed, 12 Feb 2020 09:36:48 +0000
Mel Gorman <mgorman@techsingularity.net> wrote:

> sched:sched_stick_numa is meant to fire when a task is unable to migrate
> to the preferred node but from the trace, it's possibile to tell the
> difference between "no CPU found", "migration to idle CPU failed" and
> "tasks could not be swapped". Extend the tracepoint accordingly.
> 
> Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
> ---
>  include/trace/events/sched.h | 51 +++++++++++++++++++++++++++++++++-----------
>  kernel/sched/fair.c          |  6 +++---
>  2 files changed, 42 insertions(+), 15 deletions(-)
> 
> diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
> index 420e80e56e55..3d07c0af4ab8 100644
> --- a/include/trace/events/sched.h
> +++ b/include/trace/events/sched.h
> @@ -487,7 +487,11 @@ TRACE_EVENT(sched_process_hang,
>  );
>  #endif /* CONFIG_DETECT_HUNG_TASK */
>  
> -DECLARE_EVENT_CLASS(sched_move_task_template,
> +/*
> + * Tracks migration of tasks from one runqueue to another. Can be used to
> + * detect if automatic NUMA balancing is bouncing between nodes
> + */
> +TRACE_EVENT(sched_move_numa,
>  
>  	TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
>  
> @@ -519,20 +523,43 @@ DECLARE_EVENT_CLASS(sched_move_task_template,
>  			__entry->dst_cpu, __entry->dst_nid)
>  );
>  
> -/*
> - * Tracks migration of tasks from one runqueue to another. Can be used to
> - * detect if automatic NUMA balancing is bouncing between nodes
> - */
> -DEFINE_EVENT(sched_move_task_template, sched_move_numa,
> -	TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
> +TRACE_EVENT(sched_stick_numa,
>  
> -	TP_ARGS(tsk, src_cpu, dst_cpu)
> -);
> +	TP_PROTO(struct task_struct *src_tsk, int src_cpu, struct task_struct *dst_tsk, int dst_cpu),
>  
> -DEFINE_EVENT(sched_move_task_template, sched_stick_numa,
> -	TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
> +	TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu),
>  
> -	TP_ARGS(tsk, src_cpu, dst_cpu)
> +	TP_STRUCT__entry(
> +		__field( pid_t,	src_pid			)
> +		__field( pid_t,	src_tgid		)
> +		__field( pid_t,	src_ngid		)
> +		__field( int,	src_cpu			)
> +		__field( int,	src_nid			)
> +		__field( pid_t,	dst_pid			)
> +		__field( pid_t,	dst_tgid		)
> +		__field( pid_t,	dst_ngid		)
> +		__field( int,	dst_cpu			)
> +		__field( int,	dst_nid			)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->src_pid	= task_pid_nr(src_tsk);
> +		__entry->src_tgid	= task_tgid_nr(src_tsk);
> +		__entry->src_ngid	= task_numa_group_id(src_tsk);
> +		__entry->src_cpu	= src_cpu;
> +		__entry->src_nid	= cpu_to_node(src_cpu);
> +		__entry->dst_pid	= dst_tsk ? task_pid_nr(dst_tsk) : 0;
> +		__entry->dst_tgid	= dst_tsk ? task_tgid_nr(dst_tsk) : 0;
> +		__entry->dst_ngid	= dst_tsk ? task_numa_group_id(dst_tsk) : 0;
> +		__entry->dst_cpu	= dst_cpu;
> +		__entry->dst_nid	= dst_cpu >= 0 ? cpu_to_node(dst_cpu) : -1;
> +	),
> +
> +	TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d",
> +			__entry->src_pid, __entry->src_tgid, __entry->src_ngid,
> +			__entry->src_cpu, __entry->src_nid,
> +			__entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid,
> +			__entry->dst_cpu, __entry->dst_nid)
>  );
>  

The above looks the same as the below sched_swap_numa. Can you make a
DECLARE_EVENT_CLASS() and merge the two for sched_swap_numa?

Note, most the footprint of a trace event happens in the
DECLARE_EVENT_CLASS() (a TRACE_EVENT() is just a DECLARE_EVENT_CLASS()
and DEFINE_EVENT() put together). The more DECLARE_EVENT_CLASS()s you
can share, the less the footprint is.

-- Steve


>  TRACE_EVENT(sched_swap_numa,
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 4ab18fba5b82..6005ce28033b 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -1849,7 +1849,7 @@ static int task_numa_migrate(struct task_struct *p)
>  
>  	/* No better CPU than the current one was found. */
>  	if (env.best_cpu == -1) {
> -		trace_sched_stick_numa(p, env.src_cpu, -1);
> +		trace_sched_stick_numa(p, env.src_cpu, NULL, -1);
>  		return -EAGAIN;
>  	}
>  
> @@ -1858,7 +1858,7 @@ static int task_numa_migrate(struct task_struct *p)
>  		ret = migrate_task_to(p, env.best_cpu);
>  		WRITE_ONCE(best_rq->numa_migrate_on, 0);
>  		if (ret != 0)
> -			trace_sched_stick_numa(p, env.src_cpu, env.best_cpu);
> +			trace_sched_stick_numa(p, env.src_cpu, NULL, env.best_cpu);
>  		return ret;
>  	}
>  
> @@ -1866,7 +1866,7 @@ static int task_numa_migrate(struct task_struct *p)
>  	WRITE_ONCE(best_rq->numa_migrate_on, 0);
>  
>  	if (ret != 0)
> -		trace_sched_stick_numa(p, env.src_cpu, task_cpu(env.best_task));
> +		trace_sched_stick_numa(p, env.src_cpu, env.best_task, env.best_cpu);
>  	put_task_struct(env.best_task);
>  	return ret;
>  }


  reply	other threads:[~2020-02-12 14:43 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-12  9:36 [RFC PATCH 00/11] Reconcile NUMA balancing decisions with the load balancer Mel Gorman
2020-02-12  9:36 ` [PATCH 01/11] sched/fair: Allow a small load imbalance between low utilisation SD_NUMA domains Mel Gorman
2020-02-12  9:36 ` [PATCH 02/11] sched/fair: Optimize select_idle_core() Mel Gorman
2020-02-12  9:36 ` [PATCH 03/11] sched/fair: Allow a per-CPU kthread waking a task to stack on the same CPU, to fix XFS performance regression Mel Gorman
2020-02-12  9:36 ` [PATCH 04/11] sched/numa: Trace when no candidate CPU was found on the preferred node Mel Gorman
2020-02-12  9:36 ` [PATCH 05/11] sched/numa: Distinguish between the different task_numa_migrate failure cases Mel Gorman
2020-02-12 14:43   ` Steven Rostedt [this message]
2020-02-12 15:59     ` Mel Gorman
2020-02-12  9:36 ` [PATCH 06/11] sched/numa: Prefer using an idle cpu as a migration target instead of comparing tasks Mel Gorman
2020-02-12  9:36 ` [PATCH 07/11] sched/numa: Find an alternative idle CPU if the CPU is part of an active NUMA balance Mel Gorman
2020-02-12  9:36 ` [PATCH 08/11] sched/numa: Bias swapping tasks based on their preferred node Mel Gorman
2020-02-13 10:31   ` Peter Zijlstra
2020-02-13 11:18     ` Mel Gorman
2020-02-12 13:22 ` [RFC PATCH 00/11] Reconcile NUMA balancing decisions with the load balancer Vincent Guittot
2020-02-12 14:07   ` Valentin Schneider
2020-02-12 15:48   ` Mel Gorman
2020-02-12 16:13     ` Vincent Guittot
2020-02-12 15:45 ` [PATCH 09/11] sched/fair: Split out helper to adjust imbalances between domains Mel Gorman
2020-02-12 15:46 ` [PATCH 10/11] sched/numa: Use similar logic to the load balancer for moving between domains with spare capacity Mel Gorman
2020-02-12 15:46 ` [PATCH 11/11] sched/numa: Use similar logic to the load balancer for moving between overloaded domains Mel Gorman
     [not found] ` <20200214041232.18904-1-hdanton@sina.com>
2020-02-14  7:50   ` [PATCH 08/11] sched/numa: Bias swapping tasks based on their preferred node Mel Gorman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200212094308.04bcf8a2@gandalf.local.home \
    --to=rostedt@goodmis.org \
    --cc=bsegall@google.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=juri.lelli@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mgorman@techsingularity.net \
    --cc=mingo@kernel.org \
    --cc=pauld@redhat.com \
    --cc=peterz@infradead.org \
    --cc=valentin.schneider@arm.com \
    --cc=vincent.guittot@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).