From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751876AbcFINIV (ORCPT ); Thu, 9 Jun 2016 09:08:21 -0400 Received: from merlin.infradead.org ([205.233.59.134]:59337 "EHLO merlin.infradead.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750810AbcFINIU (ORCPT ); Thu, 9 Jun 2016 09:08:20 -0400 Date: Thu, 9 Jun 2016 15:07:50 +0200 From: Peter Zijlstra To: Yuyang Du Cc: Chris Wilson , Andrey Ryabinin , Linus Torvalds , Mike Galbraith , Thomas Gleixner , bsegall@google.com, morten.rasmussen@arm.com, pjt@google.com, steve.muckle@linaro.org, linux-kernel@vger.kernel.org Subject: Re: Divide-by-zero in post_init_entity_util_avg Message-ID: <20160609130750.GQ30909@twins.programming.kicks-ass.net> References: <20160609090142.GS32344@nuc-i3427.alporthouse.com> <20160609013324.GH8105@intel.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20160609013324.GH8105@intel.com> User-Agent: Mutt/1.5.23.1 (2014-03-12) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Chris Wilson reported a divide by 0 at: post_init_entity_util_avg(): > 725 if (cfs_rq->avg.util_avg != 0) { > 726 sa->util_avg = cfs_rq->avg.util_avg * se->load.weight; > -> 727 sa->util_avg /= (cfs_rq->avg.load_avg + 1); > 728 > 729 if (sa->util_avg > cap) > 730 sa->util_avg = cap; > 731 } else { Which given the lack of serialization, and the code generated from update_cfs_rq_load_avg() is entirely possible. if (atomic_long_read(&cfs_rq->removed_load_avg)) { s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0); sa->load_avg = max_t(long, sa->load_avg - r, 0); sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0); removed_load = 1; } turns into: ffffffff81087064: 49 8b 85 98 00 00 00 mov 0x98(%r13),%rax ffffffff8108706b: 48 85 c0 test %rax,%rax ffffffff8108706e: 74 40 je ffffffff810870b0 ffffffff81087070: 4c 89 f8 mov %r15,%rax ffffffff81087073: 49 87 85 98 00 00 00 xchg %rax,0x98(%r13) ffffffff8108707a: 49 29 45 70 sub %rax,0x70(%r13) ffffffff8108707e: 4c 89 f9 mov %r15,%rcx ffffffff81087081: bb 01 00 00 00 mov $0x1,%ebx ffffffff81087086: 49 83 7d 70 00 cmpq $0x0,0x70(%r13) ffffffff8108708b: 49 0f 49 4d 70 cmovns 0x70(%r13),%rcx Which you'll note ends up with sa->load_avg -= r in memory at ffffffff8108707a. Ludicrous code generation if you ask me; I'd have expected something like (note, r15 holds 0): mov %r15, %rax xchg %rax, cfs_rq->removed_load_avg mov sa->load_avg, %rcx sub %rax, %rcx cmovs %r15, %rcx mov %rcx, sa->load_avg Adding the serialization (to _both_ call sites) should fix this. Fixes: 2b8c41daba32 ("sched/fair: Initiate a new task's util avg to a bounded value") Reported-by: Chris Wilson Signed-off-by: Peter Zijlstra (Intel) --- kernel/sched/core.c | 3 +-- kernel/sched/fair.c | 8 +++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 385c947482e1..4aff10e3bd14 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2535,10 +2535,9 @@ void wake_up_new_task(struct task_struct *p) */ set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0)); #endif - /* Post initialize new task's util average when its cfs_rq is set */ + rq = __task_rq_lock(p, &rf); post_init_entity_util_avg(&p->se); - rq = __task_rq_lock(p, &rf); activate_task(rq, p, 0); p->on_rq = TASK_ON_RQ_QUEUED; trace_sched_wakeup_new(p); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c6dd8bab010c..f379da14c7dd 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8468,8 +8468,9 @@ void free_fair_sched_group(struct task_group *tg) int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) { - struct cfs_rq *cfs_rq; struct sched_entity *se; + struct cfs_rq *cfs_rq; + struct rq *rq; int i; tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL); @@ -8484,6 +8485,8 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) init_cfs_bandwidth(tg_cfs_bandwidth(tg)); for_each_possible_cpu(i) { + rq = cpu_rq(i); + cfs_rq = kzalloc_node(sizeof(struct cfs_rq), GFP_KERNEL, cpu_to_node(i)); if (!cfs_rq) @@ -8497,7 +8500,10 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) init_cfs_rq(cfs_rq); init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); init_entity_runnable_average(se); + + raw_spin_lock_irq(&rq->lock); post_init_entity_util_avg(se); + raw_spin_unlock_irq(&rq->lock); } return 1;