linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] posix-cpu-timers: fix acounting delta_exec twice
@ 2013-04-29  6:26 kosaki.motohiro
  2013-04-29  6:26 ` [PATCH 2/2] posix-cpu-timers: fix wrong timer initialization kosaki.motohiro
                   ` (4 more replies)
  0 siblings, 5 replies; 9+ messages in thread
From: kosaki.motohiro @ 2013-04-29  6:26 UTC (permalink / raw)
  To: linux-kernel
  Cc: KOSAKI Motohiro, Olivier Langlois, Martin Schwidefsky,
	Steven Rostedt, David Miller, Thomas Gleixner,
	Frederic Weisbecker, Ingo Molnar, Peter Zijlstra

From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>

Currently glibc rt/tst-cpuclock2 test(*) sporadically fail. Because
scheduler delta can be accounted twice from thread_group_cputimer()
and account_group_exec_runtime().

Finally, clock_nanosleep() wakes up before an argument. And that is
posix violation. This issue was introduced by commit d670ec1317
(posix-cpu-timers: Cure SMP wobbles).

(*) http://sourceware.org/git/?p=glibc.git;a=blob;f=rt/tst-cpuclock2.c;h=6752721717f959e89c0d692b3f1ee082d507eec2;hb=HEAD

Cc: Olivier Langlois <olivier@trillion01.com>
CC: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: David Miller <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
---
 fs/binfmt_elf.c           |    2 +-
 fs/binfmt_elf_fdpic.c     |    2 +-
 include/linux/sched.h     |    4 ++--
 kernel/posix-cpu-timers.c |   15 ++++++++++-----
 kernel/sched/core.c       |    6 ++++--
 kernel/sched/cputime.c    |    8 ++++----
 6 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 86af964..fea51e7 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1322,7 +1322,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
 		 * This is the record for the group leader.  It shows the
 		 * group-wide total, not its individual thread total.
 		 */
-		thread_group_cputime(p, &cputime);
+		thread_group_cputime(p, true, &cputime);
 		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
 		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
 	} else {
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 9c13e02..ab5b508 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1371,7 +1371,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
 		 * This is the record for the group leader.  It shows the
 		 * group-wide total, not its individual thread total.
 		 */
-		thread_group_cputime(p, &cputime);
+		thread_group_cputime(p, true, &cputime);
 		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
 		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
 	} else {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e692a02..7863d4b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2002,7 +2002,7 @@ static inline void disable_sched_clock_irqtime(void) {}
 #endif
 
 extern unsigned long long
-task_sched_runtime(struct task_struct *task);
+task_sched_runtime(struct task_struct *task, bool add_delta);
 
 /* sched_exec is called by processes performing an exec */
 #ifdef CONFIG_SMP
@@ -2625,7 +2625,7 @@ static inline int spin_needbreak(spinlock_t *lock)
 /*
  * Thread group CPU time accounting.
  */
-void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
+void thread_group_cputime(struct task_struct *tsk, bool add_delta, struct task_cputime *times);
 void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
 
 static inline void thread_group_cputime_init(struct signal_struct *sig)
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 8fd709c..e56be4c 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -220,7 +220,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
 		cpu->cpu = virt_ticks(p);
 		break;
 	case CPUCLOCK_SCHED:
-		cpu->sched = task_sched_runtime(p);
+		cpu->sched = task_sched_runtime(p, true);
 		break;
 	}
 	return 0;
@@ -250,8 +250,13 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
 		 * values through the TIMER_ABSTIME flag, therefore we have
 		 * to synchronize the timer to the clock every time we start
 		 * it.
+		 *
+		 * Do not add the current delta, because
+		 * account_group_exec_runtime() will also this delta and we
+		 * wouldn't want to double account time and get ahead of
+		 * ourselves.
 		 */
-		thread_group_cputime(tsk, &sum);
+		thread_group_cputime(tsk, false, &sum);
 		raw_spin_lock_irqsave(&cputimer->lock, flags);
 		cputimer->running = 1;
 		update_gt_cputime(&cputimer->cputime, &sum);
@@ -275,15 +280,15 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
 	default:
 		return -EINVAL;
 	case CPUCLOCK_PROF:
-		thread_group_cputime(p, &cputime);
+		thread_group_cputime(p, true, &cputime);
 		cpu->cpu = cputime.utime + cputime.stime;
 		break;
 	case CPUCLOCK_VIRT:
-		thread_group_cputime(p, &cputime);
+		thread_group_cputime(p, true, &cputime);
 		cpu->cpu = cputime.utime;
 		break;
 	case CPUCLOCK_SCHED:
-		thread_group_cputime(p, &cputime);
+		thread_group_cputime(p, true, &cputime);
 		cpu->sched = cputime.sum_exec_runtime;
 		break;
 	}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 67d0465..ad3339f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2664,14 +2664,16 @@ unsigned long long task_delta_exec(struct task_struct *p)
  * In case the task is currently running, return the runtime plus current's
  * pending runtime that have not been accounted yet.
  */
-unsigned long long task_sched_runtime(struct task_struct *p)
+unsigned long long task_sched_runtime(struct task_struct *p, bool add_delta)
 {
 	unsigned long flags;
 	struct rq *rq;
 	u64 ns = 0;
 
 	rq = task_rq_lock(p, &flags);
-	ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
+	ns = p->se.sum_exec_runtime;
+	if (add_delta)
+		ns += do_task_delta_exec(p, rq);
 	task_rq_unlock(rq, p, &flags);
 
 	return ns;
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index e93cca9..69d3f6c 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -293,7 +293,7 @@ static __always_inline bool steal_account_process_tick(void)
  * Accumulate raw cputime values of dead tasks (sig->[us]time) and live
  * tasks (sum on group iteration) belonging to @tsk's group.
  */
-void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
+void thread_group_cputime(struct task_struct *tsk, bool add_delta, struct task_cputime *times)
 {
 	struct signal_struct *sig = tsk->signal;
 	cputime_t utime, stime;
@@ -313,7 +313,7 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 		task_cputime(t, &utime, &stime);
 		times->utime += utime;
 		times->stime += stime;
-		times->sum_exec_runtime += task_sched_runtime(t);
+		times->sum_exec_runtime += task_sched_runtime(t, add_delta);
 	} while_each_thread(tsk, t);
 out:
 	rcu_read_unlock();
@@ -459,7 +459,7 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
 {
 	struct task_cputime cputime;
 
-	thread_group_cputime(p, &cputime);
+	thread_group_cputime(p, true, &cputime);
 
 	*ut = cputime.utime;
 	*st = cputime.stime;
@@ -594,7 +594,7 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
 {
 	struct task_cputime cputime;
 
-	thread_group_cputime(p, &cputime);
+	thread_group_cputime(p, true, &cputime);
 	cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
 }
 #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] posix-cpu-timers: fix wrong timer initialization
  2013-04-29  6:26 [PATCH 1/2] posix-cpu-timers: fix acounting delta_exec twice kosaki.motohiro
@ 2013-04-29  6:26 ` kosaki.motohiro
  2013-04-29 10:36   ` Peter Zijlstra
  2013-04-29 19:17 ` [BUGFIX PATCH 3/2] posix-cpu-timers: check_thread_timers() uses task_sched_runtime() KOSAKI Motohiro
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 9+ messages in thread
From: kosaki.motohiro @ 2013-04-29  6:26 UTC (permalink / raw)
  To: linux-kernel
  Cc: KOSAKI Motohiro, Olivier Langlois, Martin Schwidefsky,
	Steven Rostedt, David Miller, Thomas Gleixner,
	Frederic Weisbecker, Ingo Molnar, Peter Zijlstra

From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>

Currently glibc's rt/tst-cputimer1 testcase is spradically fail because
a timer created by timer_create() may faire earlier than an argument.

There are two faults. 1) cpu_timer_sample_group() adds task_delta_exec(current).
But it is definity silly idea especially when multi thread. cputimer should
be initialized by committed exec runtime. i.e. it should not be added
scheduler delta. 2) expire time should be current time + timeout. In the other
words, expire calculation should take care scheduler delta.

Cc: Olivier Langlois <olivier@trillion01.com>
CC: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: David Miller <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
---
 include/linux/kernel_stat.h |    5 -----
 kernel/posix-cpu-timers.c   |   33 +++++++++++++++++++++++----------
 kernel/sched/core.c         |   13 -------------
 3 files changed, 23 insertions(+), 28 deletions(-)

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index ed5f6ed..f5d4fdf 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -117,11 +117,6 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
 	return kstat_cpu(cpu).irqs_sum;
 }
 
-/*
- * Lock/unlock the current runqueue - to extract task statistics:
- */
-extern unsigned long long task_delta_exec(struct task_struct *);
-
 extern void account_user_time(struct task_struct *, cputime_t, cputime_t);
 extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t);
 extern void account_steal_time(cputime_t);
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index e56be4c..dc61bc3 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -203,12 +203,10 @@ posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
 	return error;
 }
 
-
-/*
- * Sample a per-thread clock for the given task.
- */
-static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
-			    union cpu_time_count *cpu)
+static int do_cpu_clock_sample(const clockid_t which_clock,
+			       struct task_struct *p,
+			       bool add_delta,
+			       union cpu_time_count *cpu)
 {
 	switch (CPUCLOCK_WHICH(which_clock)) {
 	default:
@@ -220,12 +218,21 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
 		cpu->cpu = virt_ticks(p);
 		break;
 	case CPUCLOCK_SCHED:
-		cpu->sched = task_sched_runtime(p, true);
+		cpu->sched = task_sched_runtime(p, add_delta);
 		break;
 	}
 	return 0;
 }
 
+/*
+ * Sample a per-thread clock for the given task.
+ */
+static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
+			    union cpu_time_count *cpu)
+{
+	return do_cpu_clock_sample(which_clock, p, true, cpu);
+}
+
 static void update_gt_cputime(struct task_cputime *a, struct task_cputime *b)
 {
 	if (b->utime > a->utime)
@@ -635,7 +642,7 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
 		cpu->cpu = cputime.utime;
 		break;
 	case CPUCLOCK_SCHED:
-		cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
+		cpu->sched = cputime.sum_exec_runtime;
 		break;
 	}
 	return 0;
@@ -700,7 +707,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
 	 * check if it's already passed.  In short, we need a sample.
 	 */
 	if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
-		cpu_clock_sample(timer->it_clock, p, &val);
+		do_cpu_clock_sample(timer->it_clock, p, false, &val);
 	} else {
 		cpu_timer_sample_group(timer->it_clock, p, &val);
 	}
@@ -749,7 +756,13 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
 	}
 
 	if (new_expires.sched != 0 && !(flags & TIMER_ABSTIME)) {
-		cpu_time_add(timer->it_clock, &new_expires, val);
+		union cpu_time_count now;
+
+		if (CPUCLOCK_PERTHREAD(timer->it_clock))
+			cpu_clock_sample(timer->it_clock, p, &now);
+		else
+			cpu_clock_sample_group(timer->it_clock, p, &now);
+		cpu_time_add(timer->it_clock, &new_expires, now);
 	}
 
 	/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ad3339f..b817e6d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2646,19 +2646,6 @@ static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
 	return ns;
 }
 
-unsigned long long task_delta_exec(struct task_struct *p)
-{
-	unsigned long flags;
-	struct rq *rq;
-	u64 ns = 0;
-
-	rq = task_rq_lock(p, &flags);
-	ns = do_task_delta_exec(p, rq);
-	task_rq_unlock(rq, p, &flags);
-
-	return ns;
-}
-
 /*
  * Return accounted runtime for the task.
  * In case the task is currently running, return the runtime plus current's
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] posix-cpu-timers: fix wrong timer initialization
  2013-04-29  6:26 ` [PATCH 2/2] posix-cpu-timers: fix wrong timer initialization kosaki.motohiro
@ 2013-04-29 10:36   ` Peter Zijlstra
  2013-04-29 17:53     ` KOSAKI Motohiro
  0 siblings, 1 reply; 9+ messages in thread
From: Peter Zijlstra @ 2013-04-29 10:36 UTC (permalink / raw)
  To: kosaki.motohiro
  Cc: linux-kernel, KOSAKI Motohiro, Olivier Langlois,
	Martin Schwidefsky, Steven Rostedt, David Miller,
	Thomas Gleixner, Frederic Weisbecker, Ingo Molnar

On Mon, Apr 29, 2013 at 02:26:02AM -0400, kosaki.motohiro@gmail.com wrote:
> From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> 
> Currently glibc's rt/tst-cputimer1 testcase is spradically fail because
> a timer created by timer_create() may faire earlier than an argument.
> 
> There are two faults. 1) cpu_timer_sample_group() adds task_delta_exec(current).
> But it is definity silly idea especially when multi thread. cputimer should
> be initialized by committed exec runtime. i.e. it should not be added
> scheduler delta. 2) expire time should be current time + timeout. In the other
> words, expire calculation should take care scheduler delta.

I'm sorry, that completely fails to parse.

> -/*
> - * Lock/unlock the current runqueue - to extract task statistics:
> - */
> -extern unsigned long long task_delta_exec(struct task_struct *);

Yay.. this thing dying is good -- it did seem strange to compute the current
delta but not also read sum_exec_runtime under the same lock.

> diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
> index e56be4c..dc61bc3 100644
> --- a/kernel/posix-cpu-timers.c
> +++ b/kernel/posix-cpu-timers.c
> @@ -203,12 +203,10 @@ posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
>  	return error;
>  }
>  
> -
> -/*
> - * Sample a per-thread clock for the given task.
> - */
> -static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
> -			    union cpu_time_count *cpu)
> +static int do_cpu_clock_sample(const clockid_t which_clock,
> +			       struct task_struct *p,
> +			       bool add_delta,
> +			       union cpu_time_count *cpu)

Would not thread_cputime() (to mirror thread_group_cputime()) be a better name?

Also, I would think both these functions would be a good place to insert a
comment explaining the difference between timer and clock.

> +static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
> +			    union cpu_time_count *cpu)
> +{
> +	return do_cpu_clock_sample(which_clock, p, true, cpu);
> +}

> @@ -700,7 +707,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
>  	 * check if it's already passed.  In short, we need a sample.
>  	 */
>  	if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
> -		cpu_clock_sample(timer->it_clock, p, &val);
> +		do_cpu_clock_sample(timer->it_clock, p, false, &val);
>  	} else {
>  		cpu_timer_sample_group(timer->it_clock, p, &val);
>  	}

This would suggest:

static inline int cpu_timer_sample(const clockid_t which_clock, struct task_struct *p, union cpu_time_count *cpu)
{
	return do_cpu_clock_sample(which_clock, p, false, cpu);
}

That would preserve the: cpu_{timer,clock}_sample{,_group}() form.

> @@ -749,7 +756,13 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
>  	}
>  
>  	if (new_expires.sched != 0 && !(flags & TIMER_ABSTIME)) {
> -		cpu_time_add(timer->it_clock, &new_expires, val);
> +		union cpu_time_count now;
> +
> +		if (CPUCLOCK_PERTHREAD(timer->it_clock))
> +			cpu_clock_sample(timer->it_clock, p, &now);
> +		else
> +			cpu_clock_sample_group(timer->it_clock, p, &now);

This triggered a pattern match against earlier in this function; but they're
different now; timer vs clock. So nothing to merge...


So I don't mind the code changes, although its still not entirely clear to me
what exact problem is fixed how; and thus the Changelog needs TLC.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] posix-cpu-timers: fix wrong timer initialization
  2013-04-29 10:36   ` Peter Zijlstra
@ 2013-04-29 17:53     ` KOSAKI Motohiro
  2013-04-29 18:53       ` KOSAKI Motohiro
  0 siblings, 1 reply; 9+ messages in thread
From: KOSAKI Motohiro @ 2013-04-29 17:53 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: kosaki.motohiro, linux-kernel, KOSAKI Motohiro, Olivier Langlois,
	Martin Schwidefsky, Steven Rostedt, David Miller,
	Thomas Gleixner, Frederic Weisbecker, Ingo Molnar

(4/29/13 6:36 AM), Peter Zijlstra wrote:
> On Mon, Apr 29, 2013 at 02:26:02AM -0400, kosaki.motohiro@gmail.com wrote:
>> From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
>>
>> Currently glibc's rt/tst-cputimer1 testcase is spradically fail because
>> a timer created by timer_create() may faire earlier than an argument.
>>
>> There are two faults. 1) cpu_timer_sample_group() adds task_delta_exec(current).
>> But it is definity silly idea especially when multi thread. cputimer should
>> be initialized by committed exec runtime. i.e. it should not be added
>> scheduler delta. 2) expire time should be current time + timeout. In the other
>> words, expire calculation should take care scheduler delta.
> 
> I'm sorry, that completely fails to parse.
> 
>> -/*
>> - * Lock/unlock the current runqueue - to extract task statistics:
>> - */
>> -extern unsigned long long task_delta_exec(struct task_struct *);
> 
> Yay.. this thing dying is good -- it did seem strange to compute the current
> delta but not also read sum_exec_runtime under the same lock.
> 
>> diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
>> index e56be4c..dc61bc3 100644
>> --- a/kernel/posix-cpu-timers.c
>> +++ b/kernel/posix-cpu-timers.c
>> @@ -203,12 +203,10 @@ posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
>>  	return error;
>>  }
>>  
>> -
>> -/*
>> - * Sample a per-thread clock for the given task.
>> - */
>> -static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
>> -			    union cpu_time_count *cpu)
>> +static int do_cpu_clock_sample(const clockid_t which_clock,
>> +			       struct task_struct *p,
>> +			       bool add_delta,
>> +			       union cpu_time_count *cpu)
> 
> Would not thread_cputime() (to mirror thread_group_cputime()) be a better name?

agreed.


> Also, I would think both these functions would be a good place to insert a
> comment explaining the difference between timer and clock.

agreed.


> 
>> +static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
>> +			    union cpu_time_count *cpu)
>> +{
>> +	return do_cpu_clock_sample(which_clock, p, true, cpu);
>> +}
> 
>> @@ -700,7 +707,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
>>  	 * check if it's already passed.  In short, we need a sample.
>>  	 */
>>  	if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
>> -		cpu_clock_sample(timer->it_clock, p, &val);
>> +		do_cpu_clock_sample(timer->it_clock, p, false, &val);
>>  	} else {
>>  		cpu_timer_sample_group(timer->it_clock, p, &val);
>>  	}
> 
> This would suggest:
> 
> static inline int cpu_timer_sample(const clockid_t which_clock, struct task_struct *p, union cpu_time_count *cpu)
> {
> 	return do_cpu_clock_sample(which_clock, p, false, cpu);
> }
> 
> That would preserve the: cpu_{timer,clock}_sample{,_group}() form.

Yeah, agreed.
And also, all timer function should use cpu_timer_sample() instead of cpu_clock_sample().
check_thread_timers() uses p->se.sum_exec_runtime without delta. This is consitency with
per-process timer. Thus, other functions (e.g. posix_cpu_timers_get) should also use the
same.

> 
>> @@ -749,7 +756,13 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
>>  	}
>>  
>>  	if (new_expires.sched != 0 && !(flags & TIMER_ABSTIME)) {
>> -		cpu_time_add(timer->it_clock, &new_expires, val);
>> +		union cpu_time_count now;
>> +
>> +		if (CPUCLOCK_PERTHREAD(timer->it_clock))
>> +			cpu_clock_sample(timer->it_clock, p, &now);
>> +		else
>> +			cpu_clock_sample_group(timer->it_clock, p, &now);
> 
> This triggered a pattern match against earlier in this function; but they're
> different now; timer vs clock. So nothing to merge...

Not different, I think.
Relative timeout need to calculate "now + timeout" by definition.

But which time is "now"? 

Example, thread1 has 10ms sum_exec_runtime and 4ms delta and call timer_settime(4ms).
Old code calculate an expire is 10+4=14. New one calculate  10+4+4=18.

Which expire is correct? When using old one, timer will fire just after syscall. This
is posix violation. 

In the other words,

	sighandler(){
		t1 = clock_gettime()
	}

	t0 = clock_gettime()
	timer_settime(timeout);
	 ... wait to fire
	
	assert (t1 - t0 >= timeout)

This pseudo code must be true. it is snippest what glibc rt/tst-cputimer1 test and failed.




> So I don't mind the code changes, although its still not entirely clear to me
> what exact problem is fixed how; and thus the Changelog needs TLC.
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] posix-cpu-timers: fix wrong timer initialization
  2013-04-29 17:53     ` KOSAKI Motohiro
@ 2013-04-29 18:53       ` KOSAKI Motohiro
  0 siblings, 0 replies; 9+ messages in thread
From: KOSAKI Motohiro @ 2013-04-29 18:53 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Peter Zijlstra, linux-kernel, KOSAKI Motohiro, Olivier Langlois,
	Martin Schwidefsky, Steven Rostedt, David Miller,
	Thomas Gleixner, Frederic Weisbecker, Ingo Molnar

>>> @@ -749,7 +756,13 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
>>>  	}
>>>  
>>>  	if (new_expires.sched != 0 && !(flags & TIMER_ABSTIME)) {
>>> -		cpu_time_add(timer->it_clock, &new_expires, val);
>>> +		union cpu_time_count now;
>>> +
>>> +		if (CPUCLOCK_PERTHREAD(timer->it_clock))
>>> +			cpu_clock_sample(timer->it_clock, p, &now);
>>> +		else
>>> +			cpu_clock_sample_group(timer->it_clock, p, &now);
>>
>> This triggered a pattern match against earlier in this function; but they're
>> different now; timer vs clock. So nothing to merge...
> 
> Not different, I think.
> Relative timeout need to calculate "now + timeout" by definition.
> 
> But which time is "now"? 
> 
> Example, thread1 has 10ms sum_exec_runtime and 4ms delta and call timer_settime(4ms).
> Old code calculate an expire is 10+4=14. New one calculate  10+4+4=18.
> 
> Which expire is correct? When using old one, timer will fire just after syscall. This
> is posix violation. 
> 
> In the other words,
> 
> 	sighandler(){
> 		t1 = clock_gettime()
> 	}
> 
> 	t0 = clock_gettime()
> 	timer_settime(timeout);
> 	 ... wait to fire
> 	
> 	assert (t1 - t0 >= timeout)
> 
> This pseudo code must be true. it is snippest what glibc rt/tst-cputimer1 test and failed.

In the other hands, following two calculations need to timer time (aka time without delta).

1) Initialization signal->cputimer for avoiding double delta count.
2) calculating old tiemr because timer firing logic (run_posix_cpu_timers) don't care delta_exec. 




^ permalink raw reply	[flat|nested] 9+ messages in thread

* [BUGFIX PATCH 3/2] posix-cpu-timers: check_thread_timers() uses task_sched_runtime()
  2013-04-29  6:26 [PATCH 1/2] posix-cpu-timers: fix acounting delta_exec twice kosaki.motohiro
  2013-04-29  6:26 ` [PATCH 2/2] posix-cpu-timers: fix wrong timer initialization kosaki.motohiro
@ 2013-04-29 19:17 ` KOSAKI Motohiro
  2013-04-29 19:18 ` [PATCH 4/2] sched: task_sched_runtime introduce micro optimization KOSAKI Motohiro
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 9+ messages in thread
From: KOSAKI Motohiro @ 2013-04-29 19:17 UTC (permalink / raw)
  To: linux-kernel
  Cc: KOSAKI Motohiro, Olivier Langlois, Martin Schwidefsky,
	Steven Rostedt, David Miller, Thomas Gleixner,
	Frederic Weisbecker, Ingo Molnar, Peter Zijlstra,
	kosaki.motohiro

A type of tsk->se.sum_exec_runtime is u64. Thus reading it is racy when
running 32bit. We should use task_sched_runtime().

Cc: Olivier Langlois <olivier@trillion01.com>
CC: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
---
 kernel/posix-cpu-timers.c |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index dc61bc3..651b6c7 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -944,7 +944,8 @@ static void check_thread_timers(struct task_struct *tsk,
 		struct cpu_timer_list *t = list_first_entry(timers,
 						      struct cpu_timer_list,
 						      entry);
-		if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) {
+		unsigned long long runtime = task_sched_runtime(tsk, false);
+		if (!--maxfire || runtime < t->expires.sched) {
 			tsk->cputime_expires.sched_exp = t->expires.sched;
 			break;
 		}
-- 
1.7.1



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 4/2] sched: task_sched_runtime introduce micro optimization
  2013-04-29  6:26 [PATCH 1/2] posix-cpu-timers: fix acounting delta_exec twice kosaki.motohiro
  2013-04-29  6:26 ` [PATCH 2/2] posix-cpu-timers: fix wrong timer initialization kosaki.motohiro
  2013-04-29 19:17 ` [BUGFIX PATCH 3/2] posix-cpu-timers: check_thread_timers() uses task_sched_runtime() KOSAKI Motohiro
@ 2013-04-29 19:18 ` KOSAKI Motohiro
  2013-04-29 19:19 ` [PATCH 5/2] posix-cpu-timers: cleanup cpu_{clock,timer}_sample{,_group} KOSAKI Motohiro
  2013-04-29 19:20 ` [PATCH 6/2] posix-cpu-timers: timer functions must use timer time instead of clock time KOSAKI Motohiro
  4 siblings, 0 replies; 9+ messages in thread
From: KOSAKI Motohiro @ 2013-04-29 19:18 UTC (permalink / raw)
  To: linux-kernel
  Cc: KOSAKI Motohiro, Olivier Langlois, Martin Schwidefsky,
	Steven Rostedt, David Miller, Thomas Gleixner,
	Frederic Weisbecker, Ingo Molnar, Peter Zijlstra,
	kosaki.motohiro

rq lock in task_sched_runtime() is necessary for two reasons. 1)
accessing se.sum_exec_runtime is inatomic on 32bit and 2)
do_task_delta_exec() require it.

And then, 64bit can avoid holds rq lock when add_delta is false.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
---
 kernel/sched/core.c |    6 ++++++
 1 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b817e6d..24ba1c6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2657,6 +2657,12 @@ unsigned long long task_sched_runtime(struct task_struct *p, bool add_delta)
 	struct rq *rq;
 	u64 ns = 0;
 
+	/* Micro optimization. */
+#ifdef CONFIG_64BIT
+	if (!add_delta)
+		return p->se.sum_exec_runtime;
+#endif
+
 	rq = task_rq_lock(p, &flags);
 	ns = p->se.sum_exec_runtime;
 	if (add_delta)
-- 
1.7.1



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 5/2] posix-cpu-timers: cleanup cpu_{clock,timer}_sample{,_group}
  2013-04-29  6:26 [PATCH 1/2] posix-cpu-timers: fix acounting delta_exec twice kosaki.motohiro
                   ` (2 preceding siblings ...)
  2013-04-29 19:18 ` [PATCH 4/2] sched: task_sched_runtime introduce micro optimization KOSAKI Motohiro
@ 2013-04-29 19:19 ` KOSAKI Motohiro
  2013-04-29 19:20 ` [PATCH 6/2] posix-cpu-timers: timer functions must use timer time instead of clock time KOSAKI Motohiro
  4 siblings, 0 replies; 9+ messages in thread
From: KOSAKI Motohiro @ 2013-04-29 19:19 UTC (permalink / raw)
  To: linux-kernel
  Cc: KOSAKI Motohiro, Olivier Langlois, Martin Schwidefsky,
	Steven Rostedt, David Miller, Thomas Gleixner,
	Frederic Weisbecker, Ingo Molnar, Peter Zijlstra,
	kosaki.motohiro

Now we have similar four timer related functions, cpu_clock_sample(),
cpu_clock_sample_group(), cpu_timer_sample() and cpu_timer_sample_group().

For readability, making do_cpu_clock_timer_sample() and thread_cputime()
helper functions and all *_sample functions use them.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
---
 include/linux/sched.h     |    1 +
 kernel/posix-cpu-timers.c |  123 +++++++++++++++++++++------------------------
 kernel/sched/cputime.c    |   11 ++++
 3 files changed, 70 insertions(+), 65 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7863d4b..73ac8fa 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2622,6 +2622,7 @@ static inline int spin_needbreak(spinlock_t *lock)
 #endif
 }
 
+void thread_cputime(struct task_struct *tsk, bool add_delta, struct task_cputime *times);
 /*
  * Thread group CPU time accounting.
  */
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 651b6c7..9088023 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -203,36 +203,6 @@ posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
 	return error;
 }
 
-static int do_cpu_clock_sample(const clockid_t which_clock,
-			       struct task_struct *p,
-			       bool add_delta,
-			       union cpu_time_count *cpu)
-{
-	switch (CPUCLOCK_WHICH(which_clock)) {
-	default:
-		return -EINVAL;
-	case CPUCLOCK_PROF:
-		cpu->cpu = prof_ticks(p);
-		break;
-	case CPUCLOCK_VIRT:
-		cpu->cpu = virt_ticks(p);
-		break;
-	case CPUCLOCK_SCHED:
-		cpu->sched = task_sched_runtime(p, add_delta);
-		break;
-	}
-	return 0;
-}
-
-/*
- * Sample a per-thread clock for the given task.
- */
-static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
-			    union cpu_time_count *cpu)
-{
-	return do_cpu_clock_sample(which_clock, p, true, cpu);
-}
-
 static void update_gt_cputime(struct task_cputime *a, struct task_cputime *b)
 {
 	if (b->utime > a->utime)
@@ -274,34 +244,84 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
 }
 
 /*
- * Sample a process (thread group) clock for the given group_leader task.
- * Must be called with tasklist_lock held for reading.
+ * Sample time for the given task.
+ * @which_clock:	Clock id.
+ * @p:			Target task. Must be thread group leader when
+ *			thread_group is true.
+ * @thread_group:	True if want to get process time.
+ * @add_delta:		True if want to get clock time,
+ *			otherwise, get timer time.
  */
-static int cpu_clock_sample_group(const clockid_t which_clock,
-				  struct task_struct *p,
-				  union cpu_time_count *cpu)
+static int do_cpu_clock_timer_sample(const clockid_t which_clock,
+				     struct task_struct *p,
+				     bool thread_group,
+				     bool clock_time,
+				     union cpu_time_count *cpu)
 {
 	struct task_cputime cputime;
 
+	if (thread_group)
+		thread_group_cputime(p, clock_time, &cputime);
+	else
+		thread_cputime(p, clock_time, &cputime);
+
 	switch (CPUCLOCK_WHICH(which_clock)) {
 	default:
 		return -EINVAL;
 	case CPUCLOCK_PROF:
-		thread_group_cputime(p, true, &cputime);
 		cpu->cpu = cputime.utime + cputime.stime;
 		break;
 	case CPUCLOCK_VIRT:
-		thread_group_cputime(p, true, &cputime);
 		cpu->cpu = cputime.utime;
 		break;
 	case CPUCLOCK_SCHED:
-		thread_group_cputime(p, true, &cputime);
 		cpu->sched = cputime.sum_exec_runtime;
 		break;
 	}
 	return 0;
 }
 
+/*
+ * Get a per-thread clock time for the given task.
+ */
+static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
+			    union cpu_time_count *cpu)
+{
+	return do_cpu_clock_timer_sample(which_clock, p, false, true, cpu);
+}
+
+/*
+ * Get a per-process clock time for the given task.
+ * Must be called with tasklist_lock held for reading if p is not thread
+ * group leader.
+ */
+static int cpu_clock_sample_group(const clockid_t which_clock,
+				  struct task_struct *p,
+				  union cpu_time_count *cpu)
+{
+	return do_cpu_clock_timer_sample(which_clock, p, true, true, cpu);
+}
+
+/*
+ * Sample a per-thread timer time for the given task.
+ */
+static int cpu_timer_sample(const clockid_t which_clock, struct task_struct *p,
+			    union cpu_time_count *cpu)
+{
+	return do_cpu_clock_timer_sample(which_clock, p, false, false, cpu);
+}
+
+/*
+ * Sample a process timer time for the given task.
+ * Must be called with tasklist_lock held for reading if p is not thread group
+ * leader.
+ */
+static int cpu_timer_sample_group(const clockid_t which_clock,
+				  struct task_struct *p,
+				  union cpu_time_count *cpu)
+{
+	return do_cpu_clock_timer_sample(which_clock, p, true, false, cpu);
+}
 
 static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
 {
@@ -622,33 +642,6 @@ static void cpu_timer_fire(struct k_itimer *timer)
 }
 
 /*
- * Sample a process (thread group) timer for the given group_leader task.
- * Must be called with tasklist_lock held for reading.
- */
-static int cpu_timer_sample_group(const clockid_t which_clock,
-				  struct task_struct *p,
-				  union cpu_time_count *cpu)
-{
-	struct task_cputime cputime;
-
-	thread_group_cputimer(p, &cputime);
-	switch (CPUCLOCK_WHICH(which_clock)) {
-	default:
-		return -EINVAL;
-	case CPUCLOCK_PROF:
-		cpu->cpu = cputime.utime + cputime.stime;
-		break;
-	case CPUCLOCK_VIRT:
-		cpu->cpu = cputime.utime;
-		break;
-	case CPUCLOCK_SCHED:
-		cpu->sched = cputime.sum_exec_runtime;
-		break;
-	}
-	return 0;
-}
-
-/*
  * Guts of sys_timer_settime for CPU timers.
  * This is called with the timer locked and interrupts disabled.
  * If we return TIMER_RETRY, it's necessary to release the timer's lock
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 69d3f6c..29ef7d7 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -289,6 +289,17 @@ static __always_inline bool steal_account_process_tick(void)
 	return false;
 }
 
+void thread_cputime(struct task_struct *tsk, bool add_delta, struct task_cputime *times)
+{
+	struct signal_struct *sig = tsk->signal;
+	cputime_t utime, stime;
+
+	task_cputime(tsk, &utime, &stime);
+	times->utime = utime;
+	times->stime = stime;
+	times->sum_exec_runtime = task_sched_runtime(tsk, add_delta);
+}
+
 /*
  * Accumulate raw cputime values of dead tasks (sig->[us]time) and live
  * tasks (sum on group iteration) belonging to @tsk's group.
-- 
1.7.1



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 6/2] posix-cpu-timers: timer functions must use timer time instead of clock time
  2013-04-29  6:26 [PATCH 1/2] posix-cpu-timers: fix acounting delta_exec twice kosaki.motohiro
                   ` (3 preceding siblings ...)
  2013-04-29 19:19 ` [PATCH 5/2] posix-cpu-timers: cleanup cpu_{clock,timer}_sample{,_group} KOSAKI Motohiro
@ 2013-04-29 19:20 ` KOSAKI Motohiro
  4 siblings, 0 replies; 9+ messages in thread
From: KOSAKI Motohiro @ 2013-04-29 19:20 UTC (permalink / raw)
  To: linux-kernel
  Cc: KOSAKI Motohiro, Olivier Langlois, Martin Schwidefsky,
	Steven Rostedt, David Miller, Thomas Gleixner,
	Frederic Weisbecker, Ingo Molnar, Peter Zijlstra,
	kosaki.motohiro

For process timer, we use cpu_clock_sample_group() and cpu_timer_sample_group
correctly. However for thread timer, we always use cpu_clock_sample().
That's wrong becuase cpu_clock_sample() account uncommited delta_exec too.

This is not big matter because following workaround code in
posix_cpu_timer_get() hide the timer inversion issue. However it makes wrong
rq lock held and would be better to fix.

  if (cpu_time_before(timer->it_clock, now, timer->it.cpu.expires)) {
    ...
  } else {
    /*
     * The timer should have expired already, but the firing
     * hasn't taken place yet.  Say it's just about to expire.
     */
     itp->it_value.tv_nsec = 1;
     itp->it_value.tv_sec = 0;

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
---
 kernel/posix-cpu-timers.c |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 9088023..472394b 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -700,7 +700,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
 	 * check if it's already passed.  In short, we need a sample.
 	 */
 	if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
-		do_cpu_clock_sample(timer->it_clock, p, false, &val);
+		cpu_timer_sample(timer->it_clock, p, &val);
 	} else {
 		cpu_timer_sample_group(timer->it_clock, p, &val);
 	}
@@ -840,7 +840,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
 	 * Sample the clock to take the difference with the expiry time.
 	 */
 	if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
-		cpu_clock_sample(timer->it_clock, p, &now);
+		cpu_timer_sample(timer->it_clock, p, &now);
 		clear_dead = p->exit_state;
 	} else {
 		read_lock(&tasklist_lock);
@@ -1165,7 +1165,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
 	 * Fetch the current sample and update the timer's expiry time.
 	 */
 	if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
-		cpu_clock_sample(timer->it_clock, p, &now);
+		cpu_timer_sample(timer->it_clock, p, &now);
 		bump_cpu_timer(timer, now);
 		if (unlikely(p->exit_state)) {
 			clear_dead_task(timer, now);
-- 
1.7.1



^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2013-04-29 19:20 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-04-29  6:26 [PATCH 1/2] posix-cpu-timers: fix acounting delta_exec twice kosaki.motohiro
2013-04-29  6:26 ` [PATCH 2/2] posix-cpu-timers: fix wrong timer initialization kosaki.motohiro
2013-04-29 10:36   ` Peter Zijlstra
2013-04-29 17:53     ` KOSAKI Motohiro
2013-04-29 18:53       ` KOSAKI Motohiro
2013-04-29 19:17 ` [BUGFIX PATCH 3/2] posix-cpu-timers: check_thread_timers() uses task_sched_runtime() KOSAKI Motohiro
2013-04-29 19:18 ` [PATCH 4/2] sched: task_sched_runtime introduce micro optimization KOSAKI Motohiro
2013-04-29 19:19 ` [PATCH 5/2] posix-cpu-timers: cleanup cpu_{clock,timer}_sample{,_group} KOSAKI Motohiro
2013-04-29 19:20 ` [PATCH 6/2] posix-cpu-timers: timer functions must use timer time instead of clock time KOSAKI Motohiro

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).