From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752734AbdEDU33 (ORCPT ); Thu, 4 May 2017 16:29:29 -0400 Received: from mail-yb0-f194.google.com ([209.85.213.194]:33400 "EHLO mail-yb0-f194.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751214AbdEDU31 (ORCPT ); Thu, 4 May 2017 16:29:27 -0400 Date: Thu, 4 May 2017 16:29:25 -0400 From: Tejun Heo To: Ingo Molnar , Peter Zijlstra Cc: linux-kernel@vger.kernel.org, Linus Torvalds , Vincent Guittot , Mike Galbraith , Paul Turner , Chris Mason , kernel-team@fb.com Subject: [PATCH 1/3] sched/fair: Peter's shares_type patch Message-ID: <20170504202925.GB2647@htj.duckdns.org> References: <20170504202838.GA2647@htj.duckdns.org> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20170504202838.GA2647@htj.duckdns.org> User-Agent: Mutt/1.8.0 (2017-02-23) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org From: Peter Zijlstra This patch is combination of http://lkml.kernel.org/r/20170502081905.GA4626@worktop.programming.kicks-ass.net + http://lkml.kernel.org/r/20170502083009.GA3377@worktop.programming.kicks-ass.net + build fix & use shares_avg for propagating load_avg instead of runnable This fixes the propagation problem described in the following while keeping group se->load_avg.avg in line with the matching cfs_rq->load_avg.avg. http://lkml.kernel.org/r/20170424201415.GB14169@wtj.duckdns.org --- kernel/sched/fair.c | 98 +++++++++++++++++++++++++--------------------------- 1 file changed, 48 insertions(+), 50 deletions(-) --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2636,26 +2636,57 @@ account_entity_dequeue(struct cfs_rq *cf cfs_rq->nr_running--; } +enum shares_type { + shares_runnable, + shares_avg, + shares_weight, +}; + #ifdef CONFIG_FAIR_GROUP_SCHED # ifdef CONFIG_SMP -static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) +static long +calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg, + enum shares_type shares_type) { - long tg_weight, load, shares; + long tg_weight, tg_shares, load, shares; - /* - * This really should be: cfs_rq->avg.load_avg, but instead we use - * cfs_rq->load.weight, which is its upper bound. This helps ramp up - * the shares for small weight interactive tasks. - */ - load = scale_load_down(cfs_rq->load.weight); + tg_shares = READ_ONCE(tg->shares); + + switch (shares_type) { + case shares_runnable: + /* + * Instead of the correct cfs_rq->avg.load_avg we use + * cfs_rq->runnable_load_avg, which does not include the + * blocked load. + */ + load = cfs_rq->runnable_load_avg; + break; + + case shares_avg: + load = cfs_rq->avg.load_avg; + break; + + case shares_weight: + /* + * Instead of the correct cfs_rq->avg.load_avg we use + * cfs_rq->load.weight, which is its upper bound. This helps + * ramp up the shares for small weight interactive tasks. + */ + load = scale_load_down(cfs_rq->load.weight); + break; + } tg_weight = atomic_long_read(&tg->load_avg); - /* Ensure tg_weight >= load */ + /* + * This ensures the sum is up-to-date for this CPU, in case of the other + * two approximations it biases the sum towards their value and in case + * of (near) UP ensures the division ends up <= 1. + */ tg_weight -= cfs_rq->tg_load_avg_contrib; tg_weight += load; - shares = (tg->shares * load); + shares = (tg_shares * load); if (tg_weight) shares /= tg_weight; @@ -2671,15 +2702,11 @@ static long calc_cfs_shares(struct cfs_r * case no task is runnable on a CPU MIN_SHARES=2 should be returned * instead of 0. */ - if (shares < MIN_SHARES) - shares = MIN_SHARES; - if (shares > tg->shares) - shares = tg->shares; - - return shares; + return clamp_t(long, shares, MIN_SHARES, tg_shares); } # else /* CONFIG_SMP */ -static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) +static inline long +calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg, enum shares_type shares_type) { return tg->shares; } @@ -2721,7 +2748,7 @@ static void update_cfs_shares(struct sch if (likely(se->load.weight == tg->shares)) return; #endif - shares = calc_cfs_shares(cfs_rq, tg); + shares = calc_cfs_shares(cfs_rq, tg, shares_weight); reweight_entity(cfs_rq_of(se), se, shares); } @@ -3078,39 +3105,10 @@ static inline void update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se) { struct cfs_rq *gcfs_rq = group_cfs_rq(se); - long delta, load = gcfs_rq->avg.load_avg; - - /* - * If the load of group cfs_rq is null, the load of the - * sched_entity will also be null so we can skip the formula - */ - if (load) { - long tg_load; - - /* Get tg's load and ensure tg_load > 0 */ - tg_load = atomic_long_read(&gcfs_rq->tg->load_avg) + 1; - - /* Ensure tg_load >= load and updated with current load*/ - tg_load -= gcfs_rq->tg_load_avg_contrib; - tg_load += load; - - /* - * We need to compute a correction term in the case that the - * task group is consuming more CPU than a task of equal - * weight. A task with a weight equals to tg->shares will have - * a load less or equal to scale_load_down(tg->shares). - * Similarly, the sched_entities that represent the task group - * at parent level, can't have a load higher than - * scale_load_down(tg->shares). And the Sum of sched_entities' - * load must be <= scale_load_down(tg->shares). - */ - if (tg_load > scale_load_down(gcfs_rq->tg->shares)) { - /* scale gcfs_rq's load into tg's shares*/ - load *= scale_load_down(gcfs_rq->tg->shares); - load /= tg_load; - } - } + long load, delta; + load = scale_load_down(calc_cfs_shares(gcfs_rq, gcfs_rq->tg, + shares_avg)); delta = load - se->avg.load_avg; /* Nothing to update */