* [RFC PATCH] hrtimers: system-wide and per-task hrtimer slacks
@ 2012-02-20 7:49 Dmitry Antipov
2012-04-05 0:10 ` Andrew Morton
0 siblings, 1 reply; 8+ messages in thread
From: Dmitry Antipov @ 2012-02-20 7:49 UTC (permalink / raw)
To: Thomas Gleixner; +Cc: linux-kernel, linaro-dev, patches, Dmitry Antipov
This patch proposes a system-wide sysctl-aware default for the
high-resolution timer slack value, which may be changed from 0
to HRTIMER_MAX_SLACK nanoseconds. Default system-wide and per-task
values are HRTIMER_DEFAULT_SLACK. Per-task value isn't inherited
across fork(); instead, newborn task uses system-wide value by
default, and newborn thread uses it's group leader value.
Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
---
Documentation/sysctl/kernel.txt | 8 ++++++++
include/linux/hrtimer.h | 11 +++++++++++
include/linux/init_task.h | 2 +-
include/linux/sched.h | 11 ++++++++---
kernel/fork.c | 9 +++++++--
kernel/futex.c | 4 ++--
kernel/hrtimer.c | 10 +++++++---
kernel/sys.c | 9 ++++-----
kernel/sysctl.c | 10 ++++++++++
9 files changed, 58 insertions(+), 16 deletions(-)
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 6d78841..83b63ed 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -606,6 +606,14 @@ can be ORed together:
==============================================================
+timer_slack:
+
+This value can be used to query and set the default slack for
+high-resolution timers, in nanoseconds. The default value is 50
+microseconds, and can be changed from 0 nanoseconds to 1 millisecond.
+
+==============================================================
+
unknown_nmi_panic:
The value in this file affects behavior of handling NMI. When the
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index fd0dc30..77169b7 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -24,6 +24,16 @@
#include <linux/timer.h>
#include <linux/timerqueue.h>
+/*
+ * Default system-wide and per-task hrtimer slack, in nanoseconds.
+ */
+#define HRTIMER_DEFAULT_SLACK 50000
+
+/*
+ * Reasonable limit for hrtimer slack.
+ */
+#define HRTIMER_MAX_SLACK 1000000
+
struct hrtimer_clock_base;
struct hrtimer_cpu_base;
@@ -323,6 +333,7 @@ extern ktime_t ktime_get_monotonic_offset(void);
DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
+extern int default_timer_slack_ns;
/* Exported timer functions: */
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 9c66b1a..b29be0d 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -178,7 +178,7 @@ extern struct cred init_cred;
.journal_info = NULL, \
.cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
.pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \
- .timer_slack_ns = 50000, /* 50 usec default slack */ \
+ .timer_slack_ns = HRTIMER_DEFAULT_SLACK, \
.pids = { \
[PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \
[PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9b13f79..811e034 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1551,11 +1551,11 @@ struct task_struct {
struct latency_record latency_record[LT_SAVECOUNT];
#endif
/*
- * time slack values; these are used to round up poll() and
- * select() etc timeout values. These are in nanoseconds.
+ * High-resolution timer slack value, in nanoseconds.
+ * Used to round up poll()/select(), nanosleep, futex
+ * waiting, etc. timeout values of non-realtime tasks.
*/
unsigned long timer_slack_ns;
- unsigned long default_timer_slack_ns;
struct list_head *scm_work_list;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -2631,6 +2631,11 @@ static inline int spin_needbreak(spinlock_t *lock)
#endif
}
+static inline unsigned long task_timer_slack(struct task_struct *tsk)
+{
+ return rt_task(tsk) ? 0 : tsk->timer_slack_ns;
+}
+
/*
* Thread group CPU time accounting.
*/
diff --git a/kernel/fork.c b/kernel/fork.c
index b77fd55..6aaff93 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1164,8 +1164,13 @@ static struct task_struct *copy_process(unsigned long clone_flags,
#if defined(SPLIT_RSS_COUNTING)
memset(&p->rss_stat, 0, sizeof(p->rss_stat));
#endif
-
- p->default_timer_slack_ns = current->timer_slack_ns;
+ /*
+ * New thread inherits the slack from the group
+ * leader. New process uses system-default slack.
+ */
+ p->timer_slack_ns = (clone_flags & CLONE_THREAD) ?
+ current->group_leader->timer_slack_ns :
+ default_timer_slack_ns;
task_io_accounting_init(&p->ioac);
acct_clear_integrals(p);
diff --git a/kernel/futex.c b/kernel/futex.c
index 1614be2..a0d302d 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1887,7 +1887,7 @@ static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
HRTIMER_MODE_ABS);
hrtimer_init_sleeper(to, current);
hrtimer_set_expires_range_ns(&to->timer, *abs_time,
- current->timer_slack_ns);
+ task_timer_slack(current));
}
retry:
@@ -2281,7 +2281,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
HRTIMER_MODE_ABS);
hrtimer_init_sleeper(to, current);
hrtimer_set_expires_range_ns(&to->timer, *abs_time,
- current->timer_slack_ns);
+ task_timer_slack(current));
}
/*
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index ae34bf5..0c56fec 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -51,6 +51,12 @@
#include <trace/events/timer.h>
/*
+ * Default hrtimer slack value, in nanoseconds. May be changed in
+ * [0..HRTIMER_MAX_SLACK] range through kernel.timer_slack sysctl.
+ */
+__read_mostly int default_timer_slack_ns = HRTIMER_DEFAULT_SLACK;
+
+/*
* The timer bases:
*
* There are more clockids then hrtimer bases. Thus, we index
@@ -1564,9 +1570,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
int ret = 0;
unsigned long slack;
- slack = current->timer_slack_ns;
- if (rt_task(current))
- slack = 0;
+ slack = task_timer_slack(current);
hrtimer_init_on_stack(&t.timer, clockid, mode);
hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack);
diff --git a/kernel/sys.c b/kernel/sys.c
index 4070153..ac32846 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -22,6 +22,7 @@
#include <linux/device.h>
#include <linux/key.h>
#include <linux/times.h>
+#include <linux/hrtimer.h>
#include <linux/posix-timers.h>
#include <linux/security.h>
#include <linux/dcookies.h>
@@ -1917,12 +1918,10 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
error = current->timer_slack_ns;
break;
case PR_SET_TIMERSLACK:
- if (arg2 <= 0)
- current->timer_slack_ns =
- current->default_timer_slack_ns;
- else
+ if (arg2 <= HRTIMER_MAX_SLACK)
current->timer_slack_ns = arg2;
- error = 0;
+ else
+ error = -EINVAL;
break;
case PR_MCE_KILL:
if (arg4 | arg5)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index f487f25..2cd42c6 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -136,6 +136,7 @@ static int min_percpu_pagelist_fract = 8;
static int ngroups_max = NGROUPS_MAX;
static const int cap_last_cap = CAP_LAST_CAP;
+static const int slack_max = HRTIMER_MAX_SLACK;
#ifdef CONFIG_INOTIFY_USER
#include <linux/inotify.h>
@@ -1004,6 +1005,15 @@ static struct ctl_table kern_table[] = {
.proc_handler = proc_dointvec,
},
#endif
+ {
+ .procname = "timer_slack",
+ .data = &default_timer_slack_ns,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &slack_max,
+ },
{ }
};
--
1.7.7.6
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [RFC PATCH] hrtimers: system-wide and per-task hrtimer slacks
@ 2012-04-05 0:10 ` Andrew Morton
0 siblings, 0 replies; 8+ messages in thread
From: Andrew Morton @ 2012-04-05 0:10 UTC (permalink / raw)
To: Dmitry Antipov
Cc: Thomas Gleixner, linux-kernel, linaro-dev, patches, linux-man
On Mon, 20 Feb 2012 11:49:32 +0400
Dmitry Antipov <dmitry.antipov@linaro.org> wrote:
> This patch proposes a system-wide sysctl-aware default for the
> high-resolution timer slack value, which may be changed from 0
> to HRTIMER_MAX_SLACK nanoseconds. Default system-wide and per-task
> values are HRTIMER_DEFAULT_SLACK. Per-task value isn't inherited
> across fork(); instead, newborn task uses system-wide value by
> default, and newborn thread uses it's group leader value.
Well.. there are some back-incompatibilities here.
prctl(PR_SET_TIMERSLACK, -1) used to restore current's slack setting to
whatever-we-inherited-at-fork, but that has been removed. What are the
implications of this, and did we need to do it?
If we do make changes in this area then the prctl manpage should be
updated, please. And if
http://www.spinics.net/lists/linux-man/msg01149.html represents the
current state of that manpage then it should be updated anyway - that
entry doesn't say anything about the (arg2 <= 0) case.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC PATCH] hrtimers: system-wide and per-task hrtimer slacks
@ 2012-04-05 0:10 ` Andrew Morton
0 siblings, 0 replies; 8+ messages in thread
From: Andrew Morton @ 2012-04-05 0:10 UTC (permalink / raw)
To: Dmitry Antipov
Cc: Thomas Gleixner, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linaro-dev-cunTk1MwBs8s++Sfvej+rw,
patches-QSEj5FYQhm4dnm+yROfE0A, linux-man-u79uwXL29TY76Z2rM5mHXA
On Mon, 20 Feb 2012 11:49:32 +0400
Dmitry Antipov <dmitry.antipov-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org> wrote:
> This patch proposes a system-wide sysctl-aware default for the
> high-resolution timer slack value, which may be changed from 0
> to HRTIMER_MAX_SLACK nanoseconds. Default system-wide and per-task
> values are HRTIMER_DEFAULT_SLACK. Per-task value isn't inherited
> across fork(); instead, newborn task uses system-wide value by
> default, and newborn thread uses it's group leader value.
Well.. there are some back-incompatibilities here.
prctl(PR_SET_TIMERSLACK, -1) used to restore current's slack setting to
whatever-we-inherited-at-fork, but that has been removed. What are the
implications of this, and did we need to do it?
If we do make changes in this area then the prctl manpage should be
updated, please. And if
http://www.spinics.net/lists/linux-man/msg01149.html represents the
current state of that manpage then it should be updated anyway - that
entry doesn't say anything about the (arg2 <= 0) case.
--
To unsubscribe from this list: send the line "unsubscribe linux-man" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC PATCH] hrtimers: system-wide and per-task hrtimer slacks
@ 2012-04-06 9:14 ` Dmitry Antipov
0 siblings, 0 replies; 8+ messages in thread
From: Dmitry Antipov @ 2012-04-06 9:14 UTC (permalink / raw)
To: Andrew Morton
Cc: Thomas Gleixner, linux-kernel, linaro-dev, patches, linux-man
On 04/05/2012 04:10 AM, Andrew Morton wrote:
> Well.. there are some back-incompatibilities here.
> prctl(PR_SET_TIMERSLACK, -1) used to restore current's slack setting to
> whatever-we-inherited-at-fork, but that has been removed. What are the
> implications of this, and did we need to do it?
It seems you're looking at the previous version of this patch
(http://lkml.org/lkml/2012/2/20/55). Latest proposal is
http://lwn.net/Articles/484162/, which defines PR_SET_TIMERSLACK
action as:
...
case PR_SET_TIMERSLACK:
if (arg2 <= 0)
current->timer_slack_ns =
default_timer_slack_ns;
else if (arg2 <= HRTIMER_MAX_SLACK)
current->timer_slack_ns = arg2;
else
error = -EINVAL;
break;
...
> If we do make changes in this area then the prctl manpage should be
> updated, please. And if
> http://www.spinics.net/lists/linux-man/msg01149.html represents the
> current state of that manpage then it should be updated anyway - that
> entry doesn't say anything about the (arg2<= 0) case.
I sent a patch for man pages too, it should be one of the recent posts
at http://www.spinics.net/lists/linux-man/index.html.
Dmitry
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC PATCH] hrtimers: system-wide and per-task hrtimer slacks
@ 2012-04-06 9:14 ` Dmitry Antipov
0 siblings, 0 replies; 8+ messages in thread
From: Dmitry Antipov @ 2012-04-06 9:14 UTC (permalink / raw)
To: Andrew Morton
Cc: Thomas Gleixner, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linaro-dev-cunTk1MwBs8s++Sfvej+rw,
patches-QSEj5FYQhm4dnm+yROfE0A, linux-man-u79uwXL29TY76Z2rM5mHXA
On 04/05/2012 04:10 AM, Andrew Morton wrote:
> Well.. there are some back-incompatibilities here.
> prctl(PR_SET_TIMERSLACK, -1) used to restore current's slack setting to
> whatever-we-inherited-at-fork, but that has been removed. What are the
> implications of this, and did we need to do it?
It seems you're looking at the previous version of this patch
(http://lkml.org/lkml/2012/2/20/55). Latest proposal is
http://lwn.net/Articles/484162/, which defines PR_SET_TIMERSLACK
action as:
...
case PR_SET_TIMERSLACK:
if (arg2 <= 0)
current->timer_slack_ns =
default_timer_slack_ns;
else if (arg2 <= HRTIMER_MAX_SLACK)
current->timer_slack_ns = arg2;
else
error = -EINVAL;
break;
...
> If we do make changes in this area then the prctl manpage should be
> updated, please. And if
> http://www.spinics.net/lists/linux-man/msg01149.html represents the
> current state of that manpage then it should be updated anyway - that
> entry doesn't say anything about the (arg2<= 0) case.
I sent a patch for man pages too, it should be one of the recent posts
at http://www.spinics.net/lists/linux-man/index.html.
Dmitry
--
To unsubscribe from this list: send the line "unsubscribe linux-man" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC PATCH] hrtimers: system-wide and per-task hrtimer slacks
@ 2012-04-24 22:06 ` Michael Kerrisk
0 siblings, 0 replies; 8+ messages in thread
From: Michael Kerrisk @ 2012-04-24 22:06 UTC (permalink / raw)
To: Dmitry Antipov
Cc: Andrew Morton, Thomas Gleixner, linux-kernel, linaro-dev,
patches, linux-man
Dmitry,
On Fri, Apr 6, 2012 at 9:14 PM, Dmitry Antipov
<dmitry.antipov@linaro.org> wrote:
> On 04/05/2012 04:10 AM, Andrew Morton wrote:
>
>> Well.. there are some back-incompatibilities here.
>> prctl(PR_SET_TIMERSLACK, -1) used to restore current's slack setting to
>> whatever-we-inherited-at-fork, but that has been removed. What are the
>> implications of this, and did we need to do it?
>
>
> It seems you're looking at the previous version of this patch
> (http://lkml.org/lkml/2012/2/20/55). Latest proposal is
> http://lwn.net/Articles/484162/, which defines PR_SET_TIMERSLACK
> action as:
> ...
> case PR_SET_TIMERSLACK:
> if (arg2 <= 0)
> current->timer_slack_ns =
> default_timer_slack_ns;
> else if (arg2 <= HRTIMER_MAX_SLACK)
> current->timer_slack_ns = arg2;
> else
> error = -EINVAL;
> break;
> ...
>
>
>> If we do make changes in this area then the prctl manpage should be
>> updated, please. And if
>> http://www.spinics.net/lists/linux-man/msg01149.html represents the
>> current state of that manpage then it should be updated anyway - that
>> entry doesn't say anything about the (arg2<= 0) case.
>
>
> I sent a patch for man pages too, it should be one of the recent posts
> at http://www.spinics.net/lists/linux-man/index.html.
Your response didn't actually address Andrew's point. Your patch
changes user-visible semantics that have been in place since kernel
2.6.28. Specifically:
* The meaning of prctl(PS_SET_TIMESLACK, n) changes,
for the n<0 case (formerly, this reverted the timer slack
to the per-process "default", with the proposed patch, it
reverts the timer slack to a system-wide default).
* The semantics of setting the timer slack of a new thread
have changed.
Perhaps these changes are warranted/necessary, but they *are* ABI
changes, and so should be carefully explained and well justified.
Thanks,
Michael
PS As background to the discussion, here's the current draft of some
text I plan to add to prctl(2) that explains the current semantics,
which would change with Dmitry's patch:
prctl(2):
PR_SET_TIMERSLACK (since Linux 2.6.28)
Set the timer slack for the calling thread to the value in
arg2. The timer slack is a value, expressed in nanoseconds,
that is used by the kernel to group timer expirations for
this thread that are close to one another; as a consequence,
timer expirations for this thread may be up to the specified
number of nanoseconds late (but will never expire early).
Grouping timer expirations can help reduce system power con‐
sumption by minimizing CPU wake-ups.
The timer expirations affected by timer slack are those set
by select(2), pselect(2), poll(2), ppoll(2), epoll_wait(2),
epoll_pwait(2), clock_nanosleep(2), nanosleep(2), and
futex(2) (and thus the library functions implemented via
futexes: pthread_cond_timedwait(3), pthread_rwlock_timedrd‐
lock(3), pthread_rwlock_timedwrlock(3), and sem_wait(3)).
Each thread has two associated timer slack values: a
"default" value, and a "current" value. The "current" value
is the one that governs grouping of timer expirations. When
a new thread is created, the two timer slack values are made
the same as the "current" value of the creating thread.
Thereafter, a thread can adjust its timer slack value via
PR_SET_TIMERSLACK: if arg2 is greater than zero, then it
specifies a new value for the "current" timer slack for the
calling thread; if arg2 is less than or equal to zero, then
the "current" timer slack is set to the "default" value.
The timer slack value of init (PID 1), the ancestor of all
threads, is 50,000 nanoseconds (50 microseconds).
fork(2):
* The "default" timer slack of the child is set to the value of
the "current" timer slack of the parent. (See the description
of PR_SET_TIMERSLACK on prctl(2).)
--
Michael Kerrisk Linux man-pages maintainer;
http://www.kernel.org/doc/man-pages/
Author of "The Linux Programming Interface", http://blog.man7.org/
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC PATCH] hrtimers: system-wide and per-task hrtimer slacks
@ 2012-04-24 22:06 ` Michael Kerrisk
0 siblings, 0 replies; 8+ messages in thread
From: Michael Kerrisk @ 2012-04-24 22:06 UTC (permalink / raw)
To: Dmitry Antipov
Cc: Andrew Morton, Thomas Gleixner,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linaro-dev-cunTk1MwBs8s++Sfvej+rw,
patches-QSEj5FYQhm4dnm+yROfE0A, linux-man-u79uwXL29TY76Z2rM5mHXA
Dmitry,
On Fri, Apr 6, 2012 at 9:14 PM, Dmitry Antipov
<dmitry.antipov-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org> wrote:
> On 04/05/2012 04:10 AM, Andrew Morton wrote:
>
>> Well.. there are some back-incompatibilities here.
>> prctl(PR_SET_TIMERSLACK, -1) used to restore current's slack setting to
>> whatever-we-inherited-at-fork, but that has been removed. What are the
>> implications of this, and did we need to do it?
>
>
> It seems you're looking at the previous version of this patch
> (http://lkml.org/lkml/2012/2/20/55). Latest proposal is
> http://lwn.net/Articles/484162/, which defines PR_SET_TIMERSLACK
> action as:
> ...
> case PR_SET_TIMERSLACK:
> if (arg2 <= 0)
> current->timer_slack_ns =
> default_timer_slack_ns;
> else if (arg2 <= HRTIMER_MAX_SLACK)
> current->timer_slack_ns = arg2;
> else
> error = -EINVAL;
> break;
> ...
>
>
>> If we do make changes in this area then the prctl manpage should be
>> updated, please. And if
>> http://www.spinics.net/lists/linux-man/msg01149.html represents the
>> current state of that manpage then it should be updated anyway - that
>> entry doesn't say anything about the (arg2<= 0) case.
>
>
> I sent a patch for man pages too, it should be one of the recent posts
> at http://www.spinics.net/lists/linux-man/index.html.
Your response didn't actually address Andrew's point. Your patch
changes user-visible semantics that have been in place since kernel
2.6.28. Specifically:
* The meaning of prctl(PS_SET_TIMESLACK, n) changes,
for the n<0 case (formerly, this reverted the timer slack
to the per-process "default", with the proposed patch, it
reverts the timer slack to a system-wide default).
* The semantics of setting the timer slack of a new thread
have changed.
Perhaps these changes are warranted/necessary, but they *are* ABI
changes, and so should be carefully explained and well justified.
Thanks,
Michael
PS As background to the discussion, here's the current draft of some
text I plan to add to prctl(2) that explains the current semantics,
which would change with Dmitry's patch:
prctl(2):
PR_SET_TIMERSLACK (since Linux 2.6.28)
Set the timer slack for the calling thread to the value in
arg2. The timer slack is a value, expressed in nanoseconds,
that is used by the kernel to group timer expirations for
this thread that are close to one another; as a consequence,
timer expirations for this thread may be up to the specified
number of nanoseconds late (but will never expire early).
Grouping timer expirations can help reduce system power con‐
sumption by minimizing CPU wake-ups.
The timer expirations affected by timer slack are those set
by select(2), pselect(2), poll(2), ppoll(2), epoll_wait(2),
epoll_pwait(2), clock_nanosleep(2), nanosleep(2), and
futex(2) (and thus the library functions implemented via
futexes: pthread_cond_timedwait(3), pthread_rwlock_timedrd‐
lock(3), pthread_rwlock_timedwrlock(3), and sem_wait(3)).
Each thread has two associated timer slack values: a
"default" value, and a "current" value. The "current" value
is the one that governs grouping of timer expirations. When
a new thread is created, the two timer slack values are made
the same as the "current" value of the creating thread.
Thereafter, a thread can adjust its timer slack value via
PR_SET_TIMERSLACK: if arg2 is greater than zero, then it
specifies a new value for the "current" timer slack for the
calling thread; if arg2 is less than or equal to zero, then
the "current" timer slack is set to the "default" value.
The timer slack value of init (PID 1), the ancestor of all
threads, is 50,000 nanoseconds (50 microseconds).
fork(2):
* The "default" timer slack of the child is set to the value of
the "current" timer slack of the parent. (See the description
of PR_SET_TIMERSLACK on prctl(2).)
--
Michael Kerrisk Linux man-pages maintainer;
http://www.kernel.org/doc/man-pages/
Author of "The Linux Programming Interface", http://blog.man7.org/
--
To unsubscribe from this list: send the line "unsubscribe linux-man" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 8+ messages in thread
* [RFC PATCH] hrtimers: system-wide and per-task hrtimer slacks
@ 2012-02-28 8:46 Dmitry Antipov
0 siblings, 0 replies; 8+ messages in thread
From: Dmitry Antipov @ 2012-02-28 8:46 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Ingo Molnar, Peter Zijlstra, John Stultz, linux-kernel,
linaro-dev, patches, Dmitry Antipov
This patch proposes a system-wide sysctl-aware default for the
high-resolution timer slack value, which may be changed from 0
to HRTIMER_MAX_SLACK nanoseconds. Default system-wide and per-task
values are HRTIMER_DEFAULT_SLACK. Per-task value isn't inherited
across fork(); instead, newborn task uses system-wide value by
default, and newborn thread uses it's group leader value.
Signed-off-by: Dmitry Antipov <dmitry.antipov@linaro.org>
---
Documentation/sysctl/kernel.txt | 8 ++++++++
include/linux/hrtimer.h | 11 +++++++++++
include/linux/init_task.h | 2 +-
include/linux/sched.h | 11 ++++++++---
kernel/fork.c | 9 +++++++--
kernel/futex.c | 4 ++--
kernel/hrtimer.c | 10 +++++++---
kernel/sys.c | 8 +++++---
kernel/sysctl.c | 10 ++++++++++
9 files changed, 59 insertions(+), 14 deletions(-)
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 6d78841..83b63ed 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -606,6 +606,14 @@ can be ORed together:
==============================================================
+timer_slack:
+
+This value can be used to query and set the default slack for
+high-resolution timers, in nanoseconds. The default value is 50
+microseconds, and can be changed from 0 nanoseconds to 1 millisecond.
+
+==============================================================
+
unknown_nmi_panic:
The value in this file affects behavior of handling NMI. When the
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index fd0dc30..b9da137 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -24,6 +24,16 @@
#include <linux/timer.h>
#include <linux/timerqueue.h>
+/*
+ * Default system-wide and per-task hrtimer slack, in nanoseconds.
+ */
+#define HRTIMER_DEFAULT_SLACK 50000
+
+/*
+ * Reasonable limit for hrtimer slack, in nanoseconds.
+ */
+#define HRTIMER_MAX_SLACK 1000000
+
struct hrtimer_clock_base;
struct hrtimer_cpu_base;
@@ -323,6 +333,7 @@ extern ktime_t ktime_get_monotonic_offset(void);
DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
+extern int default_timer_slack_ns;
/* Exported timer functions: */
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 9c66b1a..b29be0d 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -178,7 +178,7 @@ extern struct cred init_cred;
.journal_info = NULL, \
.cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
.pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \
- .timer_slack_ns = 50000, /* 50 usec default slack */ \
+ .timer_slack_ns = HRTIMER_DEFAULT_SLACK, \
.pids = { \
[PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \
[PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7d379a6..aa0a806 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1551,11 +1551,11 @@ struct task_struct {
struct latency_record latency_record[LT_SAVECOUNT];
#endif
/*
- * time slack values; these are used to round up poll() and
- * select() etc timeout values. These are in nanoseconds.
+ * High-resolution timer slack value, in nanoseconds.
+ * Used to round up poll()/select(), nanosleep, futex
+ * waiting, etc. timeout values of non-realtime tasks.
*/
unsigned long timer_slack_ns;
- unsigned long default_timer_slack_ns;
struct list_head *scm_work_list;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -2628,6 +2628,11 @@ static inline int spin_needbreak(spinlock_t *lock)
#endif
}
+static inline unsigned long task_timer_slack(struct task_struct *tsk)
+{
+ return rt_task(tsk) ? 0 : tsk->timer_slack_ns;
+}
+
/*
* Thread group CPU time accounting.
*/
diff --git a/kernel/fork.c b/kernel/fork.c
index e2cd3e2..0f9a983 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1167,8 +1167,13 @@ static struct task_struct *copy_process(unsigned long clone_flags,
#if defined(SPLIT_RSS_COUNTING)
memset(&p->rss_stat, 0, sizeof(p->rss_stat));
#endif
-
- p->default_timer_slack_ns = current->timer_slack_ns;
+ /*
+ * New thread inherits the slack from the group
+ * leader. New process uses system-default slack.
+ */
+ p->timer_slack_ns = (clone_flags & CLONE_THREAD) ?
+ current->group_leader->timer_slack_ns :
+ default_timer_slack_ns;
task_io_accounting_init(&p->ioac);
acct_clear_integrals(p);
diff --git a/kernel/futex.c b/kernel/futex.c
index 1614be2..a0d302d 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1887,7 +1887,7 @@ static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
HRTIMER_MODE_ABS);
hrtimer_init_sleeper(to, current);
hrtimer_set_expires_range_ns(&to->timer, *abs_time,
- current->timer_slack_ns);
+ task_timer_slack(current));
}
retry:
@@ -2281,7 +2281,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
HRTIMER_MODE_ABS);
hrtimer_init_sleeper(to, current);
hrtimer_set_expires_range_ns(&to->timer, *abs_time,
- current->timer_slack_ns);
+ task_timer_slack(current));
}
/*
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index ae34bf5..0c56fec 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -51,6 +51,12 @@
#include <trace/events/timer.h>
/*
+ * Default hrtimer slack value, in nanoseconds. May be changed in
+ * [0..HRTIMER_MAX_SLACK] range through kernel.timer_slack sysctl.
+ */
+__read_mostly int default_timer_slack_ns = HRTIMER_DEFAULT_SLACK;
+
+/*
* The timer bases:
*
* There are more clockids then hrtimer bases. Thus, we index
@@ -1564,9 +1570,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
int ret = 0;
unsigned long slack;
- slack = current->timer_slack_ns;
- if (rt_task(current))
- slack = 0;
+ slack = task_timer_slack(current);
hrtimer_init_on_stack(&t.timer, clockid, mode);
hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack);
diff --git a/kernel/sys.c b/kernel/sys.c
index 4070153..e976540 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -22,6 +22,7 @@
#include <linux/device.h>
#include <linux/key.h>
#include <linux/times.h>
+#include <linux/hrtimer.h>
#include <linux/posix-timers.h>
#include <linux/security.h>
#include <linux/dcookies.h>
@@ -1919,10 +1920,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_SET_TIMERSLACK:
if (arg2 <= 0)
current->timer_slack_ns =
- current->default_timer_slack_ns;
- else
+ default_timer_slack_ns;
+ else if (arg2 <= HRTIMER_MAX_SLACK)
current->timer_slack_ns = arg2;
- error = 0;
+ else
+ error = -EINVAL;
break;
case PR_MCE_KILL:
if (arg4 | arg5)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index f487f25..2cd42c6 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -136,6 +136,7 @@ static int min_percpu_pagelist_fract = 8;
static int ngroups_max = NGROUPS_MAX;
static const int cap_last_cap = CAP_LAST_CAP;
+static const int slack_max = HRTIMER_MAX_SLACK;
#ifdef CONFIG_INOTIFY_USER
#include <linux/inotify.h>
@@ -1004,6 +1005,15 @@ static struct ctl_table kern_table[] = {
.proc_handler = proc_dointvec,
},
#endif
+ {
+ .procname = "timer_slack",
+ .data = &default_timer_slack_ns,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &slack_max,
+ },
{ }
};
--
1.7.7.6
^ permalink raw reply related [flat|nested] 8+ messages in thread
end of thread, other threads:[~2012-04-24 22:06 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-02-20 7:49 [RFC PATCH] hrtimers: system-wide and per-task hrtimer slacks Dmitry Antipov
2012-04-05 0:10 ` Andrew Morton
2012-04-05 0:10 ` Andrew Morton
2012-04-06 9:14 ` Dmitry Antipov
2012-04-06 9:14 ` Dmitry Antipov
2012-04-24 22:06 ` Michael Kerrisk
2012-04-24 22:06 ` Michael Kerrisk
2012-02-28 8:46 Dmitry Antipov
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.