From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752525AbdI0Qki (ORCPT ); Wed, 27 Sep 2017 12:40:38 -0400 Received: from bombadil.infradead.org ([65.50.211.133]:47660 "EHLO bombadil.infradead.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752456AbdI0Qkg (ORCPT ); Wed, 27 Sep 2017 12:40:36 -0400 Date: Wed, 27 Sep 2017 18:40:26 +0200 From: Peter Zijlstra To: Anna-Maria Gleixner Cc: LKML , Ingo Molnar , Christoph Hellwig , keescook@chromium.org, John Stultz , Thomas Gleixner Subject: Re: [PATCH 17/25] hrtimer: Implementation of softirq hrtimer handling Message-ID: <20170927164025.GI17526@worktop.programming.kicks-ass.net> References: <20170831105725.809317030@linutronix.de> <20170831105826.921969670@linutronix.de> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20170831105826.921969670@linutronix.de> User-Agent: Mutt/1.5.22.1 (2013-10-16) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Thu, Aug 31, 2017 at 12:23:42PM -0000, Anna-Maria Gleixner wrote: > @@ -540,12 +539,23 @@ static ktime_t __hrtimer_get_next_event( > > hrtimer_update_next_timer(cpu_base, NULL); > > + if (!cpu_base->softirq_activated) { > + active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT; > + expires_next = __hrtimer_next_event_base(cpu_base, active, > + expires_next); > + cpu_base->softirq_expires_next = expires_next; > + } > + So this bugged the living daylight out of me. That's a very asymmetric thing to do. The normal rule is that get_next_event() updates cpu_base::next_timer and hrtimer_reprogram() updates cpu_base::expires_next. And here you make a giant mess of things. The below is a fairly large diff on top of this patch which is _completely_ untested, but should hopefully restore sanity. - fixes the mixing of bool and bitfields in cpu_base - restores the whole get_next_timer() / reprogram() symmetry by adding cpu_base::softirq_next_timer. - adds a comment hopefully explaining the why of that - adds for_each_active_base() helper, we had two copies of that, and for a brief moment, I had another few, luckily those didn't survive :-) - uses irqsave/irqrestore for the cpu_base->lock fiddling around __run_hrtimer(). - folded hrtimer_reprogram() and hrtimer_reprogram_softirq() - removed superfluous local_bh_disable(), since local_irq_disable() already implies much the same. Please consider... --- --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -186,21 +186,26 @@ struct hrtimer_cpu_base { unsigned int cpu; unsigned int active_bases; unsigned int clock_was_set_seq; - bool migration_enabled; - bool nohz_active; - bool softirq_activated; - unsigned int hres_active : 1, - in_hrtirq : 1, - hang_detected : 1; + + unsigned int migration_enabled : 1; + unsigned int nohz_active : 1; + unsigned int softirq_activated : 1; + unsigned int hres_active : 1; + unsigned int in_hrtirq : 1; + unsigned int hang_detected : 1; + #ifdef CONFIG_HIGH_RES_TIMERS unsigned int nr_events; unsigned int nr_retries; unsigned int nr_hangs; unsigned int max_hang_time; #endif + ktime_t expires_next; struct hrtimer *next_timer; ktime_t softirq_expires_next; + struct hrtimer *softirq_next_timer; + struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; } ____cacheline_aligned; --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -78,6 +78,7 @@ #define MASK_SHIFT (HRTIMER_BASE_MONOTONIC_SOFT) #define HRTIMER_ACTIVE_HARD ((1U << MASK_SHIFT) - 1) #define HRTIMER_ACTIVE_SOFT (HRTIMER_ACTIVE_HARD << MASK_SHIFT) +#define HRTIMER_ACTIVE (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD) /* * The timer bases: @@ -493,33 +494,49 @@ static inline void debug_deactivate(stru trace_hrtimer_cancel(timer); } -static inline void hrtimer_update_next_timer(struct hrtimer_cpu_base *cpu_base, - struct hrtimer *timer) +static inline bool hrtimer_is_softirq(struct hrtimer *timer) { - cpu_base->next_timer = timer; + return timer->base->index >= HRTIMER_BASE_MONOTONIC_SOFT; +} + + +static struct hrtimer_block_base * +__next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active) +{ + struct hrtimer_clock_base *base = NULL; + + if (*active) { + int idx = __ffs(*active); + *active &= (1U << idx); + base = cpu_base->clock_base[idx]; + } + + return base; } +#define for_each_active_base(base, cpu_base, active) \ + while ((base = __next_base((cpu_base), &(active)))) + static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base, unsigned int active, ktime_t expires_next) { + struct hrtimer_clock_base *base; ktime_t expires; - while (active) { - unsigned int id = __ffs(active); - struct hrtimer_clock_base *base; + for_each_active_base(base, cpu_base, active) { struct timerqueue_node *next; struct hrtimer *timer; - active &= ~(1U << id); - base = cpu_base->clock_base + id; - next = timerqueue_getnext(&base->active); timer = container_of(next, struct hrtimer, node); expires = ktime_sub(hrtimer_get_expires(timer), base->offset); if (expires < expires_next) { expires_next = expires; - hrtimer_update_next_timer(cpu_base, timer); + if (hrtimer_is_softirq(timer)) + cpu_base->softirq_next_timer = timer; + else + cpu_base->next_timer = timer; } } /* @@ -529,30 +546,47 @@ static ktime_t __hrtimer_next_event_base */ if (expires_next < 0) expires_next = 0; + return expires_next; } -static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) +/* + * Recomputes cpu_base::*next_timer and returns the earliest expires_next but + * does not set cpu_base::*expires_next, that is done by hrtimer_reprogram. + * + * When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases, + * those timers will get run whenever the softirq gets handled, at the end of + * hrtimer_run_softirq(), hrtimer_update_softirq_timer() will re-add these bases. + * + * Therefore softirq values are those from the HRTIMER_ACTIVE_SOFT clock bases. + * The !softirq values are the minima across HRTIMER_ACTIVE, unless an actual + * softirq is pending, in which case they're the minima of HRTIMER_ACTIVE_HARD. + * + * @active_mask must be one of: + * - HRTIMER_ACTIVE, + * - HRTIMER_ACTIVE_SOFT, or + * - HRTIMER_ACTIVE_HARD. + */ +static ktime_t +__hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_mask) { unsigned int active; + struct hrtimer *next_timer = NULL; ktime_t expires_next = KTIME_MAX; - hrtimer_update_next_timer(cpu_base, NULL); - - if (!cpu_base->softirq_activated) { + if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) { active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT; - expires_next = __hrtimer_next_event_base(cpu_base, active, - expires_next); - cpu_base->softirq_expires_next = expires_next; - } + cpu_base->softirq_next_timer = next_timer; + expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next); - active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD; - expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next); + next_timer = cpu_base->softirq_next_timer; + } - /* - * cpu_base->expires_next is not updated here. It is set only - * in hrtimer_reprogramming path! - */ + if (active_mask & HRTIMER_ACTIVE_HARD) { + active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD; + cpu_base->next_timer = next_timer; + expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next); + } return expires_next; } @@ -621,7 +655,10 @@ hrtimer_force_reprogram(struct hrtimer_c if (!cpu_base->hres_active) return; - expires_next = __hrtimer_get_next_event(cpu_base); + /* + * Find the current next expiration time. + */ + expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE); if (skip_equal && expires_next == cpu_base->expires_next) return; @@ -727,6 +764,20 @@ static void hrtimer_reprogram(struct hrt WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); + if (hrtimer_is_softirq(timer)) { + if (cpu_base->softirq_activated) + return; + + if (!ktime_before(expires, cpu_base->softirq_expires_next)) + return; + + cpu_base->softirq_next_timer = timer; + cpu_base->softirq_expires_next = expires; + + if (!ktime_before(expires, cpu_base->expires_next)) + return; + } + /* * If the timer is not on the current cpu, we cannot reprogram * the other cpus clock event device. @@ -755,7 +806,7 @@ static void hrtimer_reprogram(struct hrt return; /* Update the pointer to the next expiring timer */ - hrtimer_update_next_timer(cpu_base, timer); + cpu_base->next_timer = timer; cpu_base->expires_next = expires; /* @@ -979,47 +1030,24 @@ static inline ktime_t hrtimer_update_low return tim; } -static void hrtimer_reprogram_softirq(struct hrtimer *timer) +static void +hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, bool reprogram) { - struct hrtimer_clock_base *base = timer->base; - struct hrtimer_cpu_base *cpu_base = base->cpu_base; ktime_t expires; /* - * The softirq timer is not rearmed, when the softirq was raised - * and has not yet run to completion. + * Find the next SOFT expiration. */ - if (cpu_base->softirq_activated) - return; - - expires = ktime_sub(hrtimer_get_expires(timer), base->offset); - - if (!ktime_before(expires, cpu_base->softirq_expires_next)) - return; - - cpu_base->softirq_expires_next = expires; - - if (!ktime_before(expires, cpu_base->expires_next)) - return; - hrtimer_reprogram(timer); -} - -static void hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, - bool reprogram) -{ - ktime_t expires; - - expires = __hrtimer_get_next_event(cpu_base); + expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT); if (!reprogram || !ktime_before(expires, cpu_base->expires_next)) return; + /* - * next_timer can be used here, because - * hrtimer_get_next_event() updated the next - * timer. expires_next is only set when reprogramming function - * is called. + * cpu_base->*next_timer is recomputed by __hrtimer_get_next_event() + * cpu_base->*expires_next is only set by hrtimer_reprogram() */ - hrtimer_reprogram(cpu_base->next_timer); + hrtimer_reprogram(cpu_base->softirq_next_timer); } static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, @@ -1060,12 +1088,9 @@ void hrtimer_start_range_ns(struct hrtim base = lock_hrtimer_base(timer, &flags); - if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base)) { - if (timer->base->index < HRTIMER_BASE_MONOTONIC_SOFT) - hrtimer_reprogram(timer); - else - hrtimer_reprogram_softirq(timer); - } + if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base)) + hrtimer_reprogram(timer); + unlock_hrtimer_base(timer, &flags); } EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); @@ -1163,7 +1188,7 @@ u64 hrtimer_get_next_event(void) raw_spin_lock_irqsave(&cpu_base->lock, flags); if (!__hrtimer_hres_active(cpu_base)) - expires = __hrtimer_get_next_event(cpu_base); + expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE); raw_spin_unlock_irqrestore(&cpu_base->lock, flags); @@ -1263,7 +1288,7 @@ EXPORT_SYMBOL_GPL(hrtimer_active); static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, struct hrtimer_clock_base *base, struct hrtimer *timer, ktime_t *now, - bool hardirq) + unsigned long flags) { enum hrtimer_restart (*fn)(struct hrtimer *); int restart; @@ -1298,19 +1323,13 @@ static void __run_hrtimer(struct hrtimer * protected against migration to a different CPU even if the lock * is dropped. */ - if (hardirq) - raw_spin_unlock(&cpu_base->lock); - else - raw_spin_unlock_irq(&cpu_base->lock); + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); trace_hrtimer_expire_entry(timer, now); restart = fn(timer); trace_hrtimer_expire_exit(timer); - if (hardirq) - raw_spin_lock(&cpu_base->lock); - else - raw_spin_lock_irq(&cpu_base->lock); + raw_spin_lock_irqsave(&cpu_base->lock, flags); /* * Note: We clear the running state after enqueue_hrtimer and @@ -1339,19 +1358,15 @@ static void __run_hrtimer(struct hrtimer } static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now, - unsigned int active_mask) + unsigned int active_mask, unsigned long flags) { unsigned int active = cpu_base->active_bases & active_mask; + struct hrtimer_clock_base *base; - while (active) { - unsigned int id = __ffs(active); - struct hrtimer_clock_base *base; + for_each_active_base(base, cpu_base, active) { struct timerqueue_node *node; ktime_t basenow; - active &= ~(1U << id); - base = cpu_base->clock_base + id; - basenow = ktime_add(now, base->offset); while ((node = timerqueue_getnext(&base->active))) { @@ -1374,8 +1389,7 @@ static void __hrtimer_run_queues(struct if (basenow < hrtimer_get_softexpires_tv64(timer)) break; - __run_hrtimer(cpu_base, base, timer, &basenow, - active_mask == HRTIMER_ACTIVE_HARD); + __run_hrtimer(cpu_base, base, timer, &basenow, flags); } } } @@ -1383,17 +1397,18 @@ static void __hrtimer_run_queues(struct static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h) { struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); + unsigned long flags; ktime_t now; - raw_spin_lock_irq(&cpu_base->lock); + raw_spin_lock_irqsave(&cpu_base->lock, flags); now = hrtimer_update_base(cpu_base); - __hrtimer_run_queues(cpu_base, now, HRTIMER_ACTIVE_SOFT); + __hrtimer_run_queues(cpu_base, now, HRTIMER_ACTIVE_SOFT, flags); cpu_base->softirq_activated = 0; hrtimer_update_softirq_timer(cpu_base, true); - raw_spin_unlock_irq(&cpu_base->lock); + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); } #ifdef CONFIG_HIGH_RES_TIMERS @@ -1406,13 +1421,14 @@ void hrtimer_interrupt(struct clock_even { struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); ktime_t expires_next, now, entry_time, delta; + unsigned long flags; int retries = 0; BUG_ON(!cpu_base->hres_active); cpu_base->nr_events++; dev->next_event = KTIME_MAX; - raw_spin_lock(&cpu_base->lock); + raw_spin_lock_irqsave(&cpu_base->lock, flags); entry_time = now = hrtimer_update_base(cpu_base); retry: cpu_base->in_hrtirq = 1; @@ -1431,17 +1447,17 @@ void hrtimer_interrupt(struct clock_even raise_softirq_irqoff(HRTIMER_SOFTIRQ); } - __hrtimer_run_queues(cpu_base, now, HRTIMER_ACTIVE_HARD); + __hrtimer_run_queues(cpu_base, now, HRTIMER_ACTIVE_HARD, flags); - /* Reevaluate the hard interrupt clock bases for the next expiry */ - expires_next = __hrtimer_get_next_event(cpu_base); + /* Reevaluate the clock bases for the next expiry */ + expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE); /* * Store the new expiry value so the migration code can verify * against it. */ cpu_base->expires_next = expires_next; cpu_base->in_hrtirq = 0; - raw_spin_unlock(&cpu_base->lock); + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); /* Reprogramming necessary ? */ if (!tick_program_event(expires_next, 0)) { @@ -1462,7 +1478,7 @@ void hrtimer_interrupt(struct clock_even * Acquire base lock for updating the offsets and retrieving * the current time. */ - raw_spin_lock(&cpu_base->lock); + raw_spin_lock_irqsave(&cpu_base->lock, flags); now = hrtimer_update_base(cpu_base); cpu_base->nr_retries++; if (++retries < 3) @@ -1475,7 +1491,8 @@ void hrtimer_interrupt(struct clock_even */ cpu_base->nr_hangs++; cpu_base->hang_detected = 1; - raw_spin_unlock(&cpu_base->lock); + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); + delta = ktime_sub(now, entry_time); if ((unsigned int)delta > cpu_base->max_hang_time) cpu_base->max_hang_time = (unsigned int) delta; @@ -1517,6 +1534,7 @@ static inline void __hrtimer_peek_ahead_ void hrtimer_run_queues(void) { struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); + unsigned long flags; ktime_t now; if (__hrtimer_hres_active(cpu_base)) @@ -1534,7 +1552,7 @@ void hrtimer_run_queues(void) return; } - raw_spin_lock(&cpu_base->lock); + raw_spin_lock_irqsave(&cpu_base->lock, flags); now = hrtimer_update_base(cpu_base); if (!ktime_before(now, cpu_base->softirq_expires_next)) { @@ -1543,8 +1561,8 @@ void hrtimer_run_queues(void) raise_softirq_irqoff(HRTIMER_SOFTIRQ); } - __hrtimer_run_queues(cpu_base, now, HRTIMER_ACTIVE_HARD); - raw_spin_unlock(&cpu_base->lock); + __hrtimer_run_queues(cpu_base, now, HRTIMER_ACTIVE_HARD, flags); + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); } /* @@ -1768,7 +1786,6 @@ int hrtimers_dead_cpu(unsigned int scpu) BUG_ON(cpu_online(scpu)); tick_cancel_sched_timer(scpu); - local_bh_disable(); local_irq_disable(); old_base = &per_cpu(hrtimer_bases, scpu); new_base = this_cpu_ptr(&hrtimer_bases); @@ -1796,7 +1813,6 @@ int hrtimers_dead_cpu(unsigned int scpu) /* Check, if we got expired work to do */ __hrtimer_peek_ahead_timers(); local_irq_enable(); - local_bh_enable(); return 0; }