All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Gautham R. Shenoy" <gautham.shenoy@amd.com>
To: Abel Wu <wuyun.abel@bytedance.com>
Cc: Peter Zijlstra <peterz@infradead.org>,
	Mel Gorman <mgorman@suse.de>,
	Vincent Guittot <vincent.guittot@linaro.org>,
	Josh Don <joshdon@google.com>, Chen Yu <yu.c.chen@intel.com>,
	Tim Chen <tim.c.chen@linux.intel.com>,
	K Prateek Nayak <kprateek.nayak@amd.com>,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH v4 6/7] sched/fair: skip busy cores in SIS search
Date: Wed, 20 Jul 2022 21:46:05 +0530	[thread overview]
Message-ID: <YtgqRVDIGqCNJAZ6@BLR-5CG11610CF.amd.com> (raw)
In-Reply-To: <20220619120451.95251-7-wuyun.abel@bytedance.com>

Hello Abel,


On Sun, Jun 19, 2022 at 08:04:50PM +0800, Abel Wu wrote:

[..snip..]

>  
> +static void sd_update_icpus(int core, int icpu)

How about update_llc_icpus() ?

> +{
> +	struct sched_domain_shared *sds;
> +	struct cpumask *icpus;
> +
> +	sds = rcu_dereference(per_cpu(sd_llc_shared, core));
> +	if (!sds)
> +		return;
> +
> +	icpus = sched_domain_icpus(sds);
> +
> +	/*
> +	 * XXX: The update is racy between different cores.
> +	 * The non-atomic ops here is a tradeoff of accuracy
> +	 * for easing the cache traffic.
> +	 */
> +	if (icpu == -1)
> +		cpumask_andnot(icpus, icpus, cpu_smt_mask(core));
> +	else if (!cpumask_test_cpu(icpu, icpus))
> +		__cpumask_set_cpu(icpu, icpus);
> +}
> +
>  /*
>   * Scans the local SMT mask to see if the entire core is idle, and records this
>   * information in sd_llc_shared->has_idle_cores.
> @@ -6340,6 +6362,10 @@ static inline bool test_idle_cpus(int cpu)
>  	return true;
>  }
>  
> +static inline void sd_update_icpus(int core, int icpu)
> +{
> +}
> +
>  static inline int select_idle_core(struct task_struct *p, int core, struct cpumask *cpus, int *idle_cpu)
>  {
>  	return __select_idle_cpu(core, p);
> @@ -6370,7 +6396,8 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
>  	if (!this_sd)
>  		return -1;
>  
> -	cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
> +	cpumask_and(cpus, has_idle_core ? sched_domain_span(sd) :
> +		    sched_domain_icpus(sd->shared), p->cpus_ptr);

With this we get an idea of the likely idle CPUs. However, we may
still want SIS_UTIL on top of this as it determines the number of idle
CPUs to scan based on the utilization average that will iron out any
transient idle CPUs which may feature in
sched_domain_icpus(sd->shared) but are not likely to remain idle. Is
this understanding correct ?


>  
>  	if (sched_feat(SIS_PROP) && !has_idle_core) {
>  		u64 avg_cost, avg_idle, span_avg;
> @@ -8342,6 +8369,7 @@ struct sd_lb_stats {
>  	unsigned int prefer_sibling; /* tasks should go to sibling first */
>  
>  	int sd_state;
> +	int idle_cpu;
>  
>  	struct sg_lb_stats busiest_stat;/* Statistics of the busiest group */
>  	struct sg_lb_stats local_stat;	/* Statistics of the local group */
> @@ -8362,6 +8390,7 @@ static inline void init_sd_lb_stats(struct sd_lb_stats *sds)
>  		.total_load = 0UL,
>  		.total_capacity = 0UL,
>  		.sd_state = sd_is_busy,
> +		.idle_cpu = -1,
>  		.busiest_stat = {
>  			.idle_cpus = UINT_MAX,
>  			.group_type = group_has_spare,
> @@ -8702,10 +8731,18 @@ sched_asym(struct lb_env *env, struct sd_lb_stats *sds,  struct sg_lb_stats *sgs
>  	return sched_asym_prefer(env->dst_cpu, group->asym_prefer_cpu);
>  }
>  
> -static inline void sd_classify(struct sd_lb_stats *sds, struct rq *rq)
> +static inline void sd_classify(struct sd_lb_stats *sds, struct rq *rq, int cpu)
>  {
> -	if (sds->sd_state != sd_has_icpus && unoccupied_rq(rq))
> +	if (sds->sd_state != sd_has_icpus && unoccupied_rq(rq)) {
> +		/*
> +		 * Prefer idle cpus than unoccupied ones. This
> +		 * is achieved by only allowing the idle ones
> +		 * unconditionally overwrite the preious record
                                                 ^^^^^^^^
Nit:						 previous


> +		 * while the occupied ones can't.
> +		 */

This if condition is only executed when we encounter the very first
unoccupied cpu in the SMT domain. So why do we need this comment here
about preferring idle cpus over unoccupied ones ?


> +		sds->idle_cpu = cpu;
>  		sds->sd_state = sd_has_icpus;
> +	}
>  }
>  
>  /**
> @@ -8741,7 +8778,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
>  		sgs->sum_nr_running += nr_running;
>  
>  		if (update_core)
> -			sd_classify(sds, rq);
> +			sd_classify(sds, rq, i);
>  
>  		if (nr_running > 1)
>  			*sg_status |= SG_OVERLOAD;
> @@ -8757,7 +8794,16 @@ static inline void update_sg_lb_stats(struct lb_env *env,
>  		 * No need to call idle_cpu() if nr_running is not 0
>  		 */
>  		if (!nr_running && idle_cpu(i)) {
> +			/*
> +			 * Prefer the last idle cpu by overwriting
> +			 * preious one. The first idle cpu in this
                           ^^^^^^^
Nit:			   previous

> +			 * domain (if any) can trigger balancing
> +			 * and fed with tasks, so we'd better choose
> +			 * a candidate in an opposite way.
> +			 */

This is a better place to call out the fact that an idle cpu is
preferrable to an unoccupied cpu.

> +			sds->idle_cpu = i;
>  			sgs->idle_cpus++;
> +
>  			/* Idle cpu can't have misfit task */
>  			continue;
>  		}
> @@ -9273,8 +9319,40 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
>  
>  static void sd_update_state(struct lb_env *env, struct sd_lb_stats *sds)
>  {
> -	if (sds->sd_state == sd_has_icpus && !test_idle_cpus(env->dst_cpu))
> -		set_idle_cpus(env->dst_cpu, true);
> +	struct sched_domain_shared *sd_smt_shared = env->sd->shared;
> +	enum sd_state new = sds->sd_state;
> +	int this = env->dst_cpu;
> +
> +	/*
> +	 * Parallel updating can hardly contribute accuracy to
> +	 * the filter, besides it can be one of the burdens on
> +	 * cache traffic.
> +	 */
> +	if (cmpxchg(&sd_smt_shared->updating, 0, 1))
> +		return;
> +
> +	/*
> +	 * There is at least one unoccupied cpu available, so
> +	 * propagate it to the filter to avoid false negative
> +	 * issue which could result in lost tracking of some
> +	 * idle cpus thus throughupt downgraded.
> +	 */
> +	if (new != sd_is_busy) {
> +		if (!test_idle_cpus(this))
> +			set_idle_cpus(this, true);
> +	} else {
> +		/*
> +		 * Nothing changes so nothing to update or
> +		 * propagate.
> +		 */
> +		if (sd_smt_shared->state == sd_is_busy)
> +			goto out;


The main use of sd_smt_shared->state is to detect the transition
between sd_has_icpu --> sd_is_busy during which sds->idle_cpu == -1
which will ensure that sd_update_icpus() below clears this core's CPUs
from the LLC's icpus mask. Calling this out may be a more useful
comment instead of the comment above.

 
> +	}
> +
> +	sd_update_icpus(this, sds->idle_cpu);
> +	sd_smt_shared->state = new;
> +out:
> +	xchg(&sd_smt_shared->updating, 0);
>  }


--
Thanks and Regards
gautham.

  parent reply	other threads:[~2022-07-20 16:16 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-19 12:04 [PATCH v4 0/7] sched/fair: improve scan efficiency of SIS Abel Wu
2022-06-19 12:04 ` [PATCH v4 1/7] sched/fair: default to false in test_idle_cores Abel Wu
2022-06-27 22:53   ` Josh Don
2022-06-28  3:39     ` Abel Wu
2022-06-19 12:04 ` [PATCH v4 2/7] sched/fair: remove redundant check in select_idle_smt Abel Wu
2022-06-27 23:17   ` Josh Don
2022-06-19 12:04 ` [PATCH v4 3/7] sched/fair: avoid double search on same cpu Abel Wu
2022-06-27 23:24   ` Josh Don
2022-06-19 12:04 ` [PATCH v4 4/7] sched/fair: remove useless check in select_idle_core Abel Wu
2022-06-27 23:42   ` Josh Don
2022-06-28  3:51     ` Abel Wu
2022-06-29  0:41       ` Josh Don
2022-06-19 12:04 ` [PATCH v4 5/7] sched/fair: skip SIS domain search if fully busy Abel Wu
2022-06-28  0:28   ` Josh Don
2022-06-28  6:53     ` Abel Wu
2022-06-29  1:11       ` Josh Don
2022-06-29  7:05         ` Abel Wu
2022-07-20 15:34   ` Gautham R. Shenoy
2022-08-15  9:49     ` Abel Wu
2022-06-19 12:04 ` [PATCH v4 6/7] sched/fair: skip busy cores in SIS search Abel Wu
2022-06-21 18:14   ` Chen Yu
2022-06-22  3:52     ` [External] " Abel Wu
2022-06-24  3:30       ` Chen Yu
2022-06-27 10:13         ` Abel Wu
2022-06-28  7:58           ` Abel Wu
2022-06-30  4:16             ` Chen Yu
2022-06-30 10:46               ` Abel Wu
2022-07-09  8:55                 ` Chen Yu
2022-07-09 15:56                   ` Abel Wu
2022-07-11 12:02                     ` Chen Yu
2022-07-13 10:25                       ` Abel Wu
2022-06-22  4:03     ` Abel Wu
2022-07-20 16:16   ` Gautham R. Shenoy [this message]
2022-08-15  9:49     ` Abel Wu
2022-06-19 12:04 ` [PATCH v4 7/7] sched/fair: de-entropy for SIS filter Abel Wu
2022-06-21 18:23   ` Chen Yu
2022-06-22  4:01     ` Abel Wu
2022-06-30  7:46   ` Abel Wu
2022-07-09 14:42   ` [sched/fair] 32fe13cd7a: phoronix-test-suite.fio.SequentialWrite.IO_uring.Yes.No.4KB.DefaultTestDirectory.mb_s -11.7% regression kernel test robot
2022-07-09 14:42     ` kernel test robot
2022-07-09 16:14     ` Abel Wu
2022-07-09 16:14       ` Abel Wu
2022-07-20 17:08   ` [PATCH v4 7/7] sched/fair: de-entropy for SIS filter Gautham R. Shenoy
2022-08-15  9:49     ` Abel Wu
2022-07-06  9:51 ` [PATCH v4 0/7] sched/fair: improve scan efficiency of SIS Abel Wu
2022-07-18 11:00 ` K Prateek Nayak
2022-08-15 13:59   ` Abel Wu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YtgqRVDIGqCNJAZ6@BLR-5CG11610CF.amd.com \
    --to=gautham.shenoy@amd.com \
    --cc=joshdon@google.com \
    --cc=kprateek.nayak@amd.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mgorman@suse.de \
    --cc=peterz@infradead.org \
    --cc=tim.c.chen@linux.intel.com \
    --cc=vincent.guittot@linaro.org \
    --cc=wuyun.abel@bytedance.com \
    --cc=yu.c.chen@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.