* [RFC PATCH] posix-cpu-timers: optimize calling thread_group_cputime()
@ 2009-06-12 10:39 Stanislaw Gruszka
2009-06-12 11:09 ` Peter Zijlstra
0 siblings, 1 reply; 5+ messages in thread
From: Stanislaw Gruszka @ 2009-06-12 10:39 UTC (permalink / raw)
To: Thomas Gleixner; +Cc: linux-kernel, Oleg Nesterov, Peter Zijlstra, Ingo Molnar
When we call thread_group_cputime() we not always need all 3 cputime values.
Use additional mask to specify which fields of struct task_cputime is caller
interested. Similarly cputimer is changed to make cputimer->running mask
of currently accounted cpu times.
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
---
fs/binfmt_elf.c | 3 +-
fs/binfmt_elf_fdpic.c | 2 +-
fs/proc/array.c | 4 ++-
include/linux/sched.h | 12 +++++++++-
kernel/exit.c | 3 +-
kernel/fork.c | 2 +-
kernel/itimer.c | 6 +++-
kernel/posix-cpu-timers.c | 48 +++++++++++++++++++++++++++++---------------
kernel/sched.c | 2 +-
kernel/sched_stats.h | 6 ++--
kernel/sys.c | 7 ++++-
11 files changed, 63 insertions(+), 32 deletions(-)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 40381df..d5f4efd 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1351,7 +1351,8 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
* This is the record for the group leader. It shows the
* group-wide total, not its individual thread total.
*/
- thread_group_cputime(p, &cputime);
+ thread_group_cputime(p, &cputime,
+ TG_CPUCLOCK_UTIME | TG_CPUCLOCK_STIME);
cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
} else {
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index fdb66fa..004a141 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1398,7 +1398,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
* This is the record for the group leader. It shows the
* group-wide total, not its individual thread total.
*/
- thread_group_cputime(p, &cputime);
+ thread_group_cputime(p, &cputime, TG_UTIME | TG_STIME);
cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
} else {
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 725a650..c517495 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -416,7 +416,9 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
min_flt += sig->min_flt;
maj_flt += sig->maj_flt;
- thread_group_cputime(task, &cputime);
+ thread_group_cputime(task, &cputime,
+ TG_CPUCLOCK_UTIME |
+ TG_CPUCLOCK_STIME);
utime = cputime.utime;
stime = cputime.stime;
gtime = cputime_add(gtime, sig->gtime);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d139966..872974a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2267,8 +2267,16 @@ static inline int spin_needbreak(spinlock_t *lock)
/*
* Thread group CPU time accounting.
*/
-void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
-void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
+
+#define TG_CPUCLOCK_UTIME 0x1
+#define TG_CPUCLOCK_STIME 0x2
+#define TG_CPUCLOCK_SCHED 0x4
+#define TG_CPUCLOCK_ALL 0x7
+
+void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times,
+ int mask);
+void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times,
+ int mask);
static inline void thread_group_cputime_init(struct signal_struct *sig)
{
diff --git a/kernel/exit.c b/kernel/exit.c
index cab535c..155c4cd 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1205,7 +1205,8 @@ static int wait_task_zombie(struct task_struct *p, int options,
* group, which consolidates times for all threads in the
* group including the group leader.
*/
- thread_group_cputime(p, &cputime);
+ thread_group_cputime(p, &cputime,
+ TG_CPUCLOCK_UTIME | TG_CPUCLOCK_STIME);
spin_lock_irq(&p->parent->sighand->siglock);
psig = p->parent->signal;
sig = p->signal;
diff --git a/kernel/fork.c b/kernel/fork.c
index bb762b4..4229223 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -802,7 +802,7 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
sig->cputime_expires.prof_exp =
secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
- sig->cputimer.running = 1;
+ sig->cputimer.running = TG_CPUCLOCK_SCHED;
}
/* The timer lists. */
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 58762f7..0e29bd6 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -62,7 +62,7 @@ int do_getitimer(int which, struct itimerval *value)
struct task_cputime cputime;
cputime_t utime;
- thread_group_cputimer(tsk, &cputime);
+ thread_group_cputimer(tsk, &cputime, TG_CPUCLOCK_UTIME);
utime = cputime.utime;
if (cputime_le(cval, utime)) { /* about to fire */
cval = jiffies_to_cputime(1);
@@ -82,7 +82,9 @@ int do_getitimer(int which, struct itimerval *value)
struct task_cputime times;
cputime_t ptime;
- thread_group_cputimer(tsk, ×);
+ thread_group_cputimer(tsk, ×,
+ TG_CPUCLOCK_UTIME |
+ TG_CPUCLOCK_STIME);
ptime = cputime_add(times.utime, times.stime);
if (cputime_le(cval, ptime)) { /* about to fire */
cval = jiffies_to_cputime(1);
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 1d9cdf3..b5f1b44 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -230,7 +230,8 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
return 0;
}
-void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
+void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times,
+ int mask)
{
struct sighand_struct *sighand;
struct signal_struct *sig;
@@ -247,16 +248,22 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
t = tsk;
do {
- times->utime = cputime_add(times->utime, t->utime);
- times->stime = cputime_add(times->stime, t->stime);
- times->sum_exec_runtime += t->se.sum_exec_runtime;
+ if (mask & TG_CPUCLOCK_UTIME)
+ times->utime = cputime_add(times->utime, t->utime);
+ if (mask & TG_CPUCLOCK_STIME)
+ times->stime = cputime_add(times->stime, t->stime);
+ if (mask & TG_CPUCLOCK_SCHED)
+ times->sum_exec_runtime += t->se.sum_exec_runtime;
t = next_thread(t);
} while (t != tsk);
- times->utime = cputime_add(times->utime, sig->utime);
- times->stime = cputime_add(times->stime, sig->stime);
- times->sum_exec_runtime += sig->sum_sched_runtime;
+ if (mask & TG_CPUCLOCK_UTIME)
+ times->utime = cputime_add(times->utime, sig->utime);
+ if (mask & TG_CPUCLOCK_STIME)
+ times->stime = cputime_add(times->stime, sig->stime);
+ if (mask & TG_CPUCLOCK_SCHED)
+ times->sum_exec_runtime += sig->sum_sched_runtime;
out:
rcu_read_unlock();
}
@@ -273,22 +280,25 @@ static void update_gt_cputime(struct task_cputime *a, struct task_cputime *b)
a->sum_exec_runtime = b->sum_exec_runtime;
}
-void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
+void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times,
+ int mask)
{
struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
struct task_cputime sum;
unsigned long flags;
+ int new_clocks;
spin_lock_irqsave(&cputimer->lock, flags);
- if (!cputimer->running) {
- cputimer->running = 1;
+ if ((cputimer->running & mask) != mask) {
+ new_clocks = (cputimer->running ^ mask) & mask;
+ cputimer->running |= mask;
/*
* The POSIX timer interface allows for absolute time expiry
* values through the TIMER_ABSTIME flag, therefore we have
* to synchronize the timer to the clock every time we start
* it.
*/
- thread_group_cputime(tsk, &sum);
+ thread_group_cputime(tsk, &sum, new_clocks);
update_gt_cputime(&cputimer->cputime, &sum);
}
*times = cputimer->cputime;
@@ -309,11 +319,12 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
default:
return -EINVAL;
case CPUCLOCK_PROF:
- thread_group_cputime(p, &cputime);
+ thread_group_cputime(p, &cputime,
+ TG_CPUCLOCK_STIME | TG_CPUCLOCK_UTIME);
cpu->cpu = cputime_add(cputime.utime, cputime.stime);
break;
case CPUCLOCK_VIRT:
- thread_group_cputime(p, &cputime);
+ thread_group_cputime(p, &cputime, TG_CPUCLOCK_UTIME);
cpu->cpu = cputime.utime;
break;
case CPUCLOCK_SCHED:
@@ -523,7 +534,7 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk)
{
struct task_cputime cputime;
- thread_group_cputimer(tsk, &cputime);
+ thread_group_cputimer(tsk, &cputime, TG_CPUCLOCK_ALL);
cleanup_timers(tsk->signal->cpu_timers,
cputime.utime, cputime.stime, cputime.sum_exec_runtime);
}
@@ -691,17 +702,20 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
{
struct task_cputime cputime;
- thread_group_cputimer(p, &cputime);
switch (CPUCLOCK_WHICH(which_clock)) {
default:
return -EINVAL;
case CPUCLOCK_PROF:
+ thread_group_cputimer(p, &cputime,
+ TG_CPUCLOCK_UTIME | TG_CPUCLOCK_STIME);
cpu->cpu = cputime_add(cputime.utime, cputime.stime);
break;
case CPUCLOCK_VIRT:
+ thread_group_cputimer(p, &cputime, TG_CPUCLOCK_UTIME);
cpu->cpu = cputime.utime;
break;
case CPUCLOCK_SCHED:
+ thread_group_cputimer(p, &cputime, TG_CPUCLOCK_SCHED);
cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
break;
}
@@ -1101,7 +1115,7 @@ static void check_process_timers(struct task_struct *tsk,
/*
* Collect the current process totals.
*/
- thread_group_cputimer(tsk, &cputime);
+ thread_group_cputimer(tsk, &cputime, TG_CPUCLOCK_ALL);
utime = cputime.utime;
ptime = cputime_add(utime, cputime.stime);
sum_sched_runtime = cputime.sum_exec_runtime;
@@ -1367,7 +1381,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
if (!task_cputime_zero(&sig->cputime_expires)) {
struct task_cputime group_sample;
- thread_group_cputimer(tsk, &group_sample);
+ thread_group_cputimer(tsk, &group_sample, TG_CPUCLOCK_ALL);
if (task_cputime_expired(&group_sample, &sig->cputime_expires))
return 1;
}
diff --git a/kernel/sched.c b/kernel/sched.c
index 14c447a..fe6e0ac 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4828,7 +4828,7 @@ unsigned long long thread_group_sched_runtime(struct task_struct *p)
u64 ns;
rq = task_rq_lock(p, &flags);
- thread_group_cputime(p, &totals);
+ thread_group_cputime(p, &totals, TG_CPUCLOCK_SCHED);
ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
task_rq_unlock(rq, &flags);
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 32d2bd4..9156e44 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -303,7 +303,7 @@ static inline void account_group_user_time(struct task_struct *tsk,
cputimer = &tsk->signal->cputimer;
- if (!cputimer->running)
+ if (!(cputimer->running & TG_CPUCLOCK_UTIME))
return;
spin_lock(&cputimer->lock);
@@ -333,7 +333,7 @@ static inline void account_group_system_time(struct task_struct *tsk,
cputimer = &tsk->signal->cputimer;
- if (!cputimer->running)
+ if (!(cputimer->running & TG_CPUCLOCK_STIME))
return;
spin_lock(&cputimer->lock);
@@ -366,7 +366,7 @@ static inline void account_group_exec_runtime(struct task_struct *tsk,
cputimer = &sig->cputimer;
- if (!cputimer->running)
+ if (!(cputimer->running & TG_CPUCLOCK_SCHED))
return;
spin_lock(&cputimer->lock);
diff --git a/kernel/sys.c b/kernel/sys.c
index 0805d08..69d64f0 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -914,7 +914,8 @@ void do_sys_times(struct tms *tms)
cputime_t cutime, cstime;
spin_lock_irq(¤t->sighand->siglock);
- thread_group_cputime(current, &cputime);
+ thread_group_cputime(current, &cputime,
+ TG_CPUCLOCK_UTIME | TG_CPUCLOCK_STIME);
cutime = current->signal->cutime;
cstime = current->signal->cstime;
spin_unlock_irq(¤t->sighand->siglock);
@@ -1650,7 +1651,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
break;
case RUSAGE_SELF:
- thread_group_cputime(p, &cputime);
+ thread_group_cputime(p, &cputime,
+ TG_CPUCLOCK_UTIME |
+ TG_CPUCLOCK_STIME);
utime = cputime_add(utime, cputime.utime);
stime = cputime_add(stime, cputime.stime);
r->ru_nvcsw += p->signal->nvcsw;
--
1.6.0.6
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [RFC PATCH] posix-cpu-timers: optimize calling thread_group_cputime()
2009-06-12 10:39 [RFC PATCH] posix-cpu-timers: optimize calling thread_group_cputime() Stanislaw Gruszka
@ 2009-06-12 11:09 ` Peter Zijlstra
2009-06-12 11:20 ` Stanislaw Gruszka
0 siblings, 1 reply; 5+ messages in thread
From: Peter Zijlstra @ 2009-06-12 11:09 UTC (permalink / raw)
To: Stanislaw Gruszka
Cc: Thomas Gleixner, linux-kernel, Oleg Nesterov, Ingo Molnar
On Fri, 2009-06-12 at 12:39 +0200, Stanislaw Gruszka wrote:
> - times->utime = cputime_add(times->utime, t->utime);
> - times->stime = cputime_add(times->stime, t->stime);
> - times->sum_exec_runtime += t->se.sum_exec_runtime;
> + if (mask & TG_CPUCLOCK_UTIME)
> + times->utime = cputime_add(times->utime, t->utime);
> + if (mask & TG_CPUCLOCK_STIME)
> + times->stime = cputime_add(times->stime, t->stime);
> + if (mask & TG_CPUCLOCK_SCHED)
> + times->sum_exec_runtime += t->se.sum_exec_runtime;
Does adding 3 branches really make it faster? Since you're bound to want
at least one, I would expect the cacheline to be hot (assuming all three
variables live in the same cacheline -- if not, they should be!), so all
you're avoiding is the addition.
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [RFC PATCH] posix-cpu-timers: optimize calling thread_group_cputime()
2009-06-12 11:09 ` Peter Zijlstra
@ 2009-06-12 11:20 ` Stanislaw Gruszka
2009-06-12 14:25 ` Oleg Nesterov
0 siblings, 1 reply; 5+ messages in thread
From: Stanislaw Gruszka @ 2009-06-12 11:20 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: Thomas Gleixner, linux-kernel, Oleg Nesterov, Ingo Molnar
On Fri, 12 Jun 2009 13:09:46 +0200
Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> On Fri, 2009-06-12 at 12:39 +0200, Stanislaw Gruszka wrote:
> > - times->utime = cputime_add(times->utime, t->utime);
> > - times->stime = cputime_add(times->stime, t->stime);
> > - times->sum_exec_runtime += t->se.sum_exec_runtime;
> > + if (mask & TG_CPUCLOCK_UTIME)
> > + times->utime = cputime_add(times->utime, t->utime);
> > + if (mask & TG_CPUCLOCK_STIME)
> > + times->stime = cputime_add(times->stime, t->stime);
> > + if (mask & TG_CPUCLOCK_SCHED)
> > + times->sum_exec_runtime += t->se.sum_exec_runtime;
>
> Does adding 3 branches really make it faster?
Actually I did not any benchmarking yet, so I don't know what is the real
impact of the patch. I hope it make things taster but the result can be
opposite from my expectations.
> Since you're bound to want
> at least one, I would expect the cacheline to be hot (assuming all three
> variables live in the same cacheline -- if not, they should be!), so all
> you're avoiding is the addition.
utime, stime are probable in the same cache line, se.sum_exec_runtime is far away
from them in struct task_struct so it's in rather separate cache line.
Stanislaw
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [RFC PATCH] posix-cpu-timers: optimize calling thread_group_cputime()
2009-06-12 11:20 ` Stanislaw Gruszka
@ 2009-06-12 14:25 ` Oleg Nesterov
2009-06-15 8:55 ` Stanislaw Gruszka
0 siblings, 1 reply; 5+ messages in thread
From: Oleg Nesterov @ 2009-06-12 14:25 UTC (permalink / raw)
To: Stanislaw Gruszka
Cc: Peter Zijlstra, Thomas Gleixner, linux-kernel, Ingo Molnar
To clarify, I am not arguing against this patch, just a queston.
On 06/12, Stanislaw Gruszka wrote:
>
> On Fri, 12 Jun 2009 13:09:46 +0200
> Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
>
> > On Fri, 2009-06-12 at 12:39 +0200, Stanislaw Gruszka wrote:
> > > - times->utime = cputime_add(times->utime, t->utime);
> > > - times->stime = cputime_add(times->stime, t->stime);
> > > - times->sum_exec_runtime += t->se.sum_exec_runtime;
> > > + if (mask & TG_CPUCLOCK_UTIME)
> > > + times->utime = cputime_add(times->utime, t->utime);
> > > + if (mask & TG_CPUCLOCK_STIME)
> > > + times->stime = cputime_add(times->stime, t->stime);
> > > + if (mask & TG_CPUCLOCK_SCHED)
> > > + times->sum_exec_runtime += t->se.sum_exec_runtime;
> >
> > Does adding 3 branches really make it faster?
> Actually I did not any benchmarking yet, so I don't know what is the real
> impact of the patch. I hope it make things taster but the result can be
> opposite from my expectations.
I agree with Peter, if we complicate the code it would be nice to know
this really makes it faster. Besides, thread_group_cputime() should not
be called that often.
Perhaps it makes sense to turn ->running into bitmask though, this should
"obviously" speed up account_group_xxx() helpers.
But in that case, perhaps stop_process_timers() should accept bitmask too?
otherwise this doesn't look "complete".
Oleg.
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [RFC PATCH] posix-cpu-timers: optimize calling thread_group_cputime()
2009-06-12 14:25 ` Oleg Nesterov
@ 2009-06-15 8:55 ` Stanislaw Gruszka
0 siblings, 0 replies; 5+ messages in thread
From: Stanislaw Gruszka @ 2009-06-15 8:55 UTC (permalink / raw)
To: Oleg Nesterov; +Cc: Peter Zijlstra, Thomas Gleixner, linux-kernel, Ingo Molnar
On Fri, 12 Jun 2009 16:25:46 +0200
Oleg Nesterov <oleg@redhat.com> wrote:
> To clarify, I am not arguing against this patch, just a queston.
>
> On 06/12, Stanislaw Gruszka wrote:
> >
> > On Fri, 12 Jun 2009 13:09:46 +0200
> > Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> >
> > > On Fri, 2009-06-12 at 12:39 +0200, Stanislaw Gruszka wrote:
> > > > - times->utime = cputime_add(times->utime, t->utime);
> > > > - times->stime = cputime_add(times->stime, t->stime);
> > > > - times->sum_exec_runtime += t->se.sum_exec_runtime;
> > > > + if (mask & TG_CPUCLOCK_UTIME)
> > > > + times->utime = cputime_add(times->utime, t->utime);
> > > > + if (mask & TG_CPUCLOCK_STIME)
> > > > + times->stime = cputime_add(times->stime, t->stime);
> > > > + if (mask & TG_CPUCLOCK_SCHED)
> > > > + times->sum_exec_runtime += t->se.sum_exec_runtime;
> > >
> > > Does adding 3 branches really make it faster?
> > Actually I did not any benchmarking yet, so I don't know what is the real
> > impact of the patch. I hope it make things taster but the result can be
> > opposite from my expectations.
>
> I agree with Peter, if we complicate the code it would be nice to know
> this really makes it faster. Besides, thread_group_cputime() should not
> be called that often.
I did some benchmarking, I don't think further the patch is good, it not get
speed up I expected for clock_gettime().
First was measurement of thread_group_cycles() [1] with standart usage when is
called from posix_cpu_timers_exit_group():
Without patch:
tg_cputime_cycles 1524672
tg_cputime_n 6246
average: 244 cycles
With patch:
tg_cputime_cycles 1593876
tg_cputime_n 6368
average: 250 cycles
Second benchmark measure time of 250 thread process which do lots of
clock_gettime(), here are results:
Without patch:
[stasiu@dhcp-lab-195 Work]$ ./bench_getclock
CLK_PROF: 0.387411
CLK_VIRT: 0.371487
CLK_SCHED: 0.396650
With patch:
[stasiu@dhcp-lab-195 Work]$ ./bench_getclock
CLK_PROF: 0.336056
CLK_VIRT: 0.305781
CLK_SCHED: 0.339739
[1] Patch to measure thread_group_cputime() cycles:
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 81e4eb6..14021e8 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -22,6 +22,9 @@
#define arch_idle_time(cpu) 0
#endif
+extern unsigned long long tg_cputime_cycles;
+extern unsigned long tg_cputime_n;
+
static int show_stat(struct seq_file *p, void *v)
{
int i, j;
@@ -108,12 +111,16 @@ static int show_stat(struct seq_file *p, void *v)
"btime %lu\n"
"processes %lu\n"
"procs_running %lu\n"
- "procs_blocked %lu\n",
+ "procs_blocked %lu\n"
+ "tg_cputime_cycles %llu\n"
+ "tg_cputime_n %lu\n",
nr_context_switches(),
(unsigned long)jif,
total_forks,
nr_running(),
- nr_iowait());
+ nr_iowait(),
+ tg_cputime_cycles,
+ tg_cputime_n);
return 0;
}
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index b5f1b44..6ff946c 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -230,13 +230,20 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
return 0;
}
+unsigned long long tg_cputime_cycles;
+unsigned long tg_cputime_n;
+
void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times,
int mask)
{
struct sighand_struct *sighand;
struct signal_struct *sig;
struct task_struct *t;
+ unsigned long long xstart, xend;
+ xstart = get_cycles();
+ barrier();
+
*times = INIT_CPUTIME;
rcu_read_lock();
@@ -266,6 +273,11 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times,
times->sum_exec_runtime += sig->sum_sched_runtime;
out:
rcu_read_unlock();
+
+ barrier();
+ xend = get_cycles();
+ tg_cputime_cycles += xend - xstart;
+ tg_cputime_n++;
}
static void update_gt_cputime(struct task_cputime *a, struct task_cputime *b)
[2] Benchmark multithread process clock_gettime():
#include <sys/time.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <unistd.h>
#include <pthread.h>
#include <unistd.h>
#define CPUCLOCK_PROF 0
#define CPUCLOCK_VIRT 1
#define CPUCLOCK_SCHED 2
#define MAKE_PROCESS_CPUCLOCK(pid, clock) \
((~(clockid_t) (pid) << 3) | (clockid_t) (clock))
#define CLK_PROF MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_PROF)
#define CLK_VIRT MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_VIRT)
#define CLK_SCHED MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED)
#define NTHREADS 250
void *thread_main(void *dummy)
{
while (1)
sleep(100);
}
void create_threads(void)
{
int i, rc;
pthread_t tid;
pthread_attr_t attr;
pthread_attr_init(&attr);
for (i = 0; i < NTHREADS; i++) {
rc = pthread_create(&tid, &attr, thread_main, NULL);
if (rc) {
printf("ERROR; return code from pthread_create() is %d\n", rc);
exit(-1);
}
}
}
double timeval_diff(const struct timeval *start, const struct timeval *end)
{
return (end->tv_sec - start->tv_sec) + (end->tv_usec - start->tv_usec)/1000000.0;
}
void bench(clockid_t cid, char *str)
{
int i;
struct timespec ts;
struct timeval start, end;
double t;
gettimeofday(&start, NULL);
for (i = 0; i < 100000; i++)
clock_gettime(cid, &ts);
gettimeofday(&end, NULL);
t = timeval_diff(&start, &end);
printf("%s: %f\n", str, t);
}
int main(void)
{
create_threads();
bench(CLK_PROF, "CLK_PROF");
bench(CLK_VIRT, "CLK_VIRT");
bench(CLK_SCHED, "CLK_SCHED");
return 0;
}
^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2009-06-15 9:02 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-06-12 10:39 [RFC PATCH] posix-cpu-timers: optimize calling thread_group_cputime() Stanislaw Gruszka
2009-06-12 11:09 ` Peter Zijlstra
2009-06-12 11:20 ` Stanislaw Gruszka
2009-06-12 14:25 ` Oleg Nesterov
2009-06-15 8:55 ` Stanislaw Gruszka
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).