linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Valentin Schneider <valentin.schneider@arm.com>
To: Vincent Guittot <vincent.guittot@linaro.org>,
	linux-kernel@vger.kernel.org, mingo@redhat.com,
	peterz@infradead.org
Cc: pauld@redhat.com, srikar@linux.vnet.ibm.com,
	quentin.perret@arm.com, dietmar.eggemann@arm.com,
	Morten.Rasmussen@arm.com, hdanton@sina.com, parth@linux.ibm.com,
	riel@surriel.com
Subject: Re: [PATCH v4 11/11] sched/fair: rework find_idlest_group
Date: Fri, 22 Nov 2019 14:34:05 +0000	[thread overview]
Message-ID: <5b4d204f-ce18-948a-416b-1920bcea7cf7@arm.com> (raw)
In-Reply-To: <1571405198-27570-12-git-send-email-vincent.guittot@linaro.org>

Hi Vincent,

Apologies for the delayed review on that one. I have a few comments inline,
otherwise for the misfit part, if at all still relevant:

Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>

On 18/10/2019 14:26, Vincent Guittot wrote:
>  static struct sched_group *
>  find_idlest_group(struct sched_domain *sd, struct task_struct *p,
> +		  int this_cpu, int sd_flag);
                                    ^^^^^^^
That parameter is now unused. AFAICT it was only used to special-case fork
events (sd flag & SD_BALANCE_FORK). I didn't see any explicit handling of
this case in the rework, I assume the new group type classification makes
it possible to forgo?

> @@ -8241,6 +8123,252 @@ static inline enum fbq_type fbq_classify_rq(struct rq *rq)
>  }
>  #endif /* CONFIG_NUMA_BALANCING */
>  
> +
> +struct sg_lb_stats;
> +
> +/*
> + * update_sg_wakeup_stats - Update sched_group's statistics for wakeup.
> + * @denv: The ched_domain level to look for idlest group.
> + * @group: sched_group whose statistics are to be updated.
> + * @sgs: variable to hold the statistics for this group.
> + */
> +static inline void update_sg_wakeup_stats(struct sched_domain *sd,
> +					  struct sched_group *group,
> +					  struct sg_lb_stats *sgs,
> +					  struct task_struct *p)
> +{
> +	int i, nr_running;
> +
> +	memset(sgs, 0, sizeof(*sgs));
> +
> +	for_each_cpu(i, sched_group_span(group)) {
> +		struct rq *rq = cpu_rq(i);
> +
> +		sgs->group_load += cpu_load(rq);
> +		sgs->group_util += cpu_util_without(i, p);
> +		sgs->sum_h_nr_running += rq->cfs.h_nr_running;
> +
> +		nr_running = rq->nr_running;
> +		sgs->sum_nr_running += nr_running;
> +
> +		/*
> +		 * No need to call idle_cpu() if nr_running is not 0
> +		 */
> +		if (!nr_running && idle_cpu(i))
> +			sgs->idle_cpus++;
> +
> +
> +	}
> +
> +	/* Check if task fits in the group */
> +	if (sd->flags & SD_ASYM_CPUCAPACITY &&
> +	    !task_fits_capacity(p, group->sgc->max_capacity)) {
> +		sgs->group_misfit_task_load = 1;
> +	}
> +
> +	sgs->group_capacity = group->sgc->capacity;
> +
> +	sgs->group_type = group_classify(sd->imbalance_pct, group, sgs);
> +
> +	/*
> +	 * Computing avg_load makes sense only when group is fully busy or
> +	 * overloaded
> +	 */
> +	if (sgs->group_type < group_fully_busy)
> +		sgs->avg_load = (sgs->group_load * SCHED_CAPACITY_SCALE) /
> +				sgs->group_capacity;
> +}
> +
> +static bool update_pick_idlest(struct sched_group *idlest,

Nit: could we name this update_sd_pick_idlest() to follow
update_sd_pick_busiest()? It's the kind of thing where if I typed
"update_sd" in gtags I'd like to see both listed, seeing as they are
*very* similar. And we already have update_sg_{wakeup, lb}_stats().

> +			       struct sg_lb_stats *idlest_sgs,
> +			       struct sched_group *group,
> +			       struct sg_lb_stats *sgs)
> +{
> +	if (sgs->group_type < idlest_sgs->group_type)
> +		return true;
> +
> +	if (sgs->group_type > idlest_sgs->group_type)
> +		return false;
> +
> +	/*
> +	 * The candidate and the current idles group are the same type of
> +	 * group. Let check which one is the idlest according to the type.
> +	 */
> +
> +	switch (sgs->group_type) {
> +	case group_overloaded:
> +	case group_fully_busy:
> +		/* Select the group with lowest avg_load. */
> +		if (idlest_sgs->avg_load <= sgs->avg_load)
> +			return false;
> +		break;
> +
> +	case group_imbalanced:
> +	case group_asym_packing:
> +		/* Those types are not used in the slow wakeup path */
> +		return false;
> +
> +	case group_misfit_task:
> +		/* Select group with the highest max capacity */
> +		if (idlest->sgc->max_capacity >= group->sgc->max_capacity)
> +			return false;
> +		break;
> +
> +	case group_has_spare:
> +		/* Select group with most idle CPUs */
> +		if (idlest_sgs->idle_cpus >= sgs->idle_cpus)
> +			return false;
> +		break;
> +	}
> +
> +	return true;
> +}
> +
> +/*
> + * find_idlest_group finds and returns the least busy CPU group within the
> + * domain.
> + *
> + * Assumes p is allowed on at least one CPU in sd.
> + */
> +static struct sched_group *
> +find_idlest_group(struct sched_domain *sd, struct task_struct *p,
> +		  int this_cpu, int sd_flag)
> +{
> +	struct sched_group *idlest = NULL, *local = NULL, *group = sd->groups;
> +	struct sg_lb_stats local_sgs, tmp_sgs;
> +	struct sg_lb_stats *sgs;
> +	unsigned long imbalance;
> +	struct sg_lb_stats idlest_sgs = {
> +			.avg_load = UINT_MAX,
> +			.group_type = group_overloaded,
> +	};
> +
> +	imbalance = scale_load_down(NICE_0_LOAD) *
> +				(sd->imbalance_pct-100) / 100;
> +
> +	do {
> +		int local_group;
> +
> +		/* Skip over this group if it has no CPUs allowed */
> +		if (!cpumask_intersects(sched_group_span(group),
> +					p->cpus_ptr))
> +			continue;
> +
> +		local_group = cpumask_test_cpu(this_cpu,
> +					       sched_group_span(group));
> +
> +		if (local_group) {
> +			sgs = &local_sgs;
> +			local = group;
> +		} else {
> +			sgs = &tmp_sgs;
> +		}
> +
> +		update_sg_wakeup_stats(sd, group, sgs, p);
> +
> +		if (!local_group && update_pick_idlest(idlest, &idlest_sgs, group, sgs)) {
> +			idlest = group;
> +			idlest_sgs = *sgs;
> +		}
> +
> +	} while (group = group->next, group != sd->groups);
> +
> +
> +	/* There is no idlest group to push tasks to */
> +	if (!idlest)
> +		return NULL;
> +
> +	/*
> +	 * If the local group is idler than the selected idlest group
> +	 * don't try and push the task.
> +	 */
> +	if (local_sgs.group_type < idlest_sgs.group_type)
> +		return NULL;
> +
> +	/*
> +	 * If the local group is busier than the selected idlest group
> +	 * try and push the task.
> +	 */
> +	if (local_sgs.group_type > idlest_sgs.group_type)
> +		return idlest;
> +
> +	switch (local_sgs.group_type) {
> +	case group_overloaded:
> +	case group_fully_busy:
> +		/*
> +		 * When comparing groups across NUMA domains, it's possible for
> +		 * the local domain to be very lightly loaded relative to the
> +		 * remote domains but "imbalance" skews the comparison making
> +		 * remote CPUs look much more favourable. When considering
> +		 * cross-domain, add imbalance to the load on the remote node
> +		 * and consider staying local.
> +		 */
> +
> +		if ((sd->flags & SD_NUMA) &&
> +		    ((idlest_sgs.avg_load + imbalance) >= local_sgs.avg_load))
> +			return NULL;
> +
> +		/*
> +		 * If the local group is less loaded than the selected
> +		 * idlest group don't try and push any tasks.
> +		 */
> +		if (idlest_sgs.avg_load >= (local_sgs.avg_load + imbalance))
> +			return NULL;
> +
> +		if (100 * local_sgs.avg_load <= sd->imbalance_pct * idlest_sgs.avg_load)
> +			return NULL;
> +		break;
> +
> +	case group_imbalanced:
> +	case group_asym_packing:
> +		/* Those type are not used in the slow wakeup path */
> +		return NULL;

I suppose group_asym_packing could be handled similarly to misfit, right?
i.e. make the group type group_asym_packing if

  !sched_asym_prefer(sg.asym_prefer_cpu, local.asym_prefer_cpu)

> +
> +	case group_misfit_task:
> +		/* Select group with the highest max capacity */
> +		if (local->sgc->max_capacity >= idlest->sgc->max_capacity)
> +			return NULL;

Got confused a bit here due to the naming; in this case 'group_misfit_task'
only means 'if placed on this group, the task will be misfit'. If the
idlest group will cause us to remain misfit, but can give us some extra
capacity, I think it makes sense to move.

> +		break;
> +
> +	case group_has_spare:
> +		if (sd->flags & SD_NUMA) {
> +#ifdef CONFIG_NUMA_BALANCING
> +			int idlest_cpu;
> +			/*
> +			 * If there is spare capacity at NUMA, try to select
> +			 * the preferred node
> +			 */
> +			if (cpu_to_node(this_cpu) == p->numa_preferred_nid)
> +				return NULL;
> +
> +			idlest_cpu = cpumask_first(sched_group_span(idlest));
> +			if (cpu_to_node(idlest_cpu) == p->numa_preferred_nid)
> +				return idlest;
> +#endif
> +			/*
> +			 * Otherwise, keep the task on this node to stay close
> +			 * its wakeup source and improve locality. If there is
> +			 * a real need of migration, periodic load balance will
> +			 * take care of it.
> +			 */
> +			if (local_sgs.idle_cpus)
> +				return NULL;
> +		}
> +
> +		/*
> +		 * Select group with highest number of idle cpus. We could also
> +		 * compare the utilization which is more stable but it can end
> +		 * up that the group has less spare capacity but finally more
> +		 * idle cpus which means more opportunity to run task.
> +		 */
> +		if (local_sgs.idle_cpus >= idlest_sgs.idle_cpus)
> +			return NULL;
> +		break;
> +	}
> +
> +	return idlest;
> +}
> +
>  /**
>   * update_sd_lb_stats - Update sched_domain's statistics for load balancing.
>   * @env: The load balancing environment.
> 

  parent reply	other threads:[~2019-11-22 14:34 UTC|newest]

Thread overview: 89+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-18 13:26 [PATCH v4 00/10] sched/fair: rework the CFS load balance Vincent Guittot
2019-10-18 13:26 ` [PATCH v4 01/11] sched/fair: clean up asym packing Vincent Guittot
2019-10-21  9:12   ` [tip: sched/core] sched/fair: Clean " tip-bot2 for Vincent Guittot
2019-10-30 14:51   ` [PATCH v4 01/11] sched/fair: clean " Mel Gorman
2019-10-30 16:03     ` Vincent Guittot
2019-10-18 13:26 ` [PATCH v4 02/11] sched/fair: rename sum_nr_running to sum_h_nr_running Vincent Guittot
2019-10-21  9:12   ` [tip: sched/core] sched/fair: Rename sg_lb_stats::sum_nr_running " tip-bot2 for Vincent Guittot
2019-10-30 14:53   ` [PATCH v4 02/11] sched/fair: rename sum_nr_running " Mel Gorman
2019-10-18 13:26 ` [PATCH v4 03/11] sched/fair: remove meaningless imbalance calculation Vincent Guittot
2019-10-21  9:12   ` [tip: sched/core] sched/fair: Remove " tip-bot2 for Vincent Guittot
2019-10-18 13:26 ` [PATCH v4 04/11] sched/fair: rework load_balance Vincent Guittot
2019-10-21  9:12   ` [tip: sched/core] sched/fair: Rework load_balance() tip-bot2 for Vincent Guittot
2019-10-30 15:45   ` [PATCH v4 04/11] sched/fair: rework load_balance Mel Gorman
2019-10-30 16:16     ` Valentin Schneider
2019-10-31  9:09     ` Vincent Guittot
2019-10-31 10:15       ` Mel Gorman
2019-10-31 11:13         ` Vincent Guittot
2019-10-31 11:40           ` Mel Gorman
2019-11-08 16:35             ` Vincent Guittot
2019-11-08 18:37               ` Mel Gorman
2019-11-12 10:58                 ` Vincent Guittot
2019-11-12 15:06                   ` Mel Gorman
2019-11-12 15:40                     ` Vincent Guittot
2019-11-12 17:45                       ` Mel Gorman
2019-11-18 13:50     ` Ingo Molnar
2019-11-18 13:57       ` Vincent Guittot
2019-11-18 14:51       ` Mel Gorman
2019-10-18 13:26 ` [PATCH v4 05/11] sched/fair: use rq->nr_running when balancing load Vincent Guittot
2019-10-21  9:12   ` [tip: sched/core] sched/fair: Use " tip-bot2 for Vincent Guittot
2019-10-30 15:54   ` [PATCH v4 05/11] sched/fair: use " Mel Gorman
2019-10-18 13:26 ` [PATCH v4 06/11] sched/fair: use load instead of runnable load in load_balance Vincent Guittot
2019-10-21  9:12   ` [tip: sched/core] sched/fair: Use load instead of runnable load in load_balance() tip-bot2 for Vincent Guittot
2019-10-30 15:58   ` [PATCH v4 06/11] sched/fair: use load instead of runnable load in load_balance Mel Gorman
2019-10-18 13:26 ` [PATCH v4 07/11] sched/fair: evenly spread tasks when not overloaded Vincent Guittot
2019-10-21  9:12   ` [tip: sched/core] sched/fair: Spread out tasks evenly " tip-bot2 for Vincent Guittot
2019-10-30 16:03   ` [PATCH v4 07/11] sched/fair: evenly spread tasks " Mel Gorman
2019-10-18 13:26 ` [PATCH v4 08/11] sched/fair: use utilization to select misfit task Vincent Guittot
2019-10-21  9:12   ` [tip: sched/core] sched/fair: Use " tip-bot2 for Vincent Guittot
2019-10-18 13:26 ` [PATCH v4 09/11] sched/fair: use load instead of runnable load in wakeup path Vincent Guittot
2019-10-21  9:12   ` [tip: sched/core] sched/fair: Use " tip-bot2 for Vincent Guittot
2019-10-18 13:26 ` [PATCH v4 10/11] sched/fair: optimize find_idlest_group Vincent Guittot
2019-10-21  9:12   ` [tip: sched/core] sched/fair: Optimize find_idlest_group() tip-bot2 for Vincent Guittot
2019-10-18 13:26 ` [PATCH v4 11/11] sched/fair: rework find_idlest_group Vincent Guittot
2019-10-21  9:12   ` [tip: sched/core] sched/fair: Rework find_idlest_group() tip-bot2 for Vincent Guittot
2019-10-22 16:46   ` [PATCH] sched/fair: fix rework of find_idlest_group() Vincent Guittot
2019-10-23  7:50     ` Chen, Rong A
2019-10-30 16:07     ` Mel Gorman
2019-11-18 17:42     ` [tip: sched/core] sched/fair: Fix " tip-bot2 for Vincent Guittot
2019-11-22 14:37     ` [PATCH] sched/fair: fix " Valentin Schneider
2019-11-25  9:16       ` Vincent Guittot
2019-11-25 11:03         ` Valentin Schneider
2019-11-20 11:58   ` [PATCH v4 11/11] sched/fair: rework find_idlest_group Qais Yousef
2019-11-20 13:21     ` Vincent Guittot
2019-11-20 16:53       ` Vincent Guittot
2019-11-20 17:34         ` Qais Yousef
2019-11-20 17:43           ` Vincent Guittot
2019-11-20 18:10             ` Qais Yousef
2019-11-20 18:20               ` Vincent Guittot
2019-11-20 18:27                 ` Qais Yousef
2019-11-20 19:28                   ` Vincent Guittot
2019-11-20 19:55                     ` Qais Yousef
2019-11-21 14:58                       ` Qais Yousef
2019-11-22 14:34   ` Valentin Schneider [this message]
2019-11-25  9:59     ` Vincent Guittot
2019-11-25 11:13       ` Valentin Schneider
2019-10-21  7:50 ` [PATCH v4 00/10] sched/fair: rework the CFS load balance Ingo Molnar
2019-10-21  8:44   ` Vincent Guittot
2019-10-21 12:56     ` Phil Auld
2019-10-24 12:38     ` Phil Auld
2019-10-24 13:46       ` Phil Auld
2019-10-24 14:59         ` Vincent Guittot
2019-10-25 13:33           ` Phil Auld
2019-10-28 13:03             ` Vincent Guittot
2019-10-30 14:39               ` Phil Auld
2019-10-30 16:24                 ` Dietmar Eggemann
2019-10-30 16:35                   ` Valentin Schneider
2019-10-30 17:19                     ` Phil Auld
2019-10-30 17:25                       ` Valentin Schneider
2019-10-30 17:29                         ` Phil Auld
2019-10-30 17:28                       ` Vincent Guittot
2019-10-30 17:44                         ` Phil Auld
2019-10-30 17:25                 ` Vincent Guittot
2019-10-31 13:57                   ` Phil Auld
2019-10-31 16:41                     ` Vincent Guittot
2019-10-30 16:24   ` Mel Gorman
2019-10-30 16:35     ` Vincent Guittot
2019-11-18 13:15     ` Ingo Molnar
2019-11-25 12:48 ` Valentin Schneider
2020-01-03 16:39   ` Valentin Schneider

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5b4d204f-ce18-948a-416b-1920bcea7cf7@arm.com \
    --to=valentin.schneider@arm.com \
    --cc=Morten.Rasmussen@arm.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=hdanton@sina.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=parth@linux.ibm.com \
    --cc=pauld@redhat.com \
    --cc=peterz@infradead.org \
    --cc=quentin.perret@arm.com \
    --cc=riel@surriel.com \
    --cc=srikar@linux.vnet.ibm.com \
    --cc=vincent.guittot@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).