All of lore.kernel.org
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@linutronix.de>
To: LKML <linux-kernel@vger.kernel.org>
Cc: x86@kernel.org, Oleg Nesterov <oleg@redhat.com>,
	"Eric W. Biederman" <ebiederm@xmission.com>,
	Frederic Weisbecker <frederic@kernel.org>,
	John Stultz <john.stultz@linaro.org>,
	Paolo Bonzini <pbonzini@redhat.com>
Subject: [patch V2 3/5] posix-cpu-timers: Provide mechanisms to defer timer handling to task_work
Date: Thu, 16 Jul 2020 22:19:26 +0200	[thread overview]
Message-ID: <20200716202044.734067877@linutronix.de> (raw)
In-Reply-To: 20200716201923.228696399@linutronix.de

Running posix cpu timers in hard interrupt context has a few downsides:

 - For PREEMPT_RT it cannot work as the expiry code needs to take
   sighand lock, which is a 'sleeping spinlock' in RT. The original RT
   approach of offloading the posix CPU timer handling into a high
   priority thread was clumsy and provided no real benefit in general.

 - For fine grained accounting it's just wrong to run this in context of
   the timer interrupt because that way a process specific cpu time is
   accounted to the timer interrupt.

 - Long running timer interrupts caused by a large amount of expiring
   timers which can be created and armed by unpriviledged user space.

There is no hard requirement to expire them in interrupt context.

Provide infrastructure to schedule task work which allows splitting the
posix CPU timer code into a quick check in interrupt context and a thread
context expiry and signal delivery function. This has to be enabled by
architectures as it requires that the architecture specific KVM
implementation handles pending task work before exiting to guest mode.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/posix-timers.h   |   17 ++++++++++++++++
 kernel/time/Kconfig            |    5 ++++
 kernel/time/posix-cpu-timers.c |   42 ++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 63 insertions(+), 1 deletion(-)

--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -112,25 +112,42 @@ struct posix_cputimer_base {
 enum {
 	CPUTIMERS_ACTIVE,
 	CPUTIMERS_EXPIRING,
+	CPUTIMERS_WORK_SCHEDULED,
 };
 
 /**
  * posix_cputimers - Container for posix CPU timer related data
  * @bases:	Base container for posix CPU clocks
  * @flags:	Flags for various CPUTIMERS_* states
+ * @task_work:	Task work to defer timer expiry into task context
  * Used in task_struct and signal_struct
  */
 struct posix_cputimers {
 	struct posix_cputimer_base	bases[CPUCLOCK_MAX];
 	unsigned long			flags;
+#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
+	struct callback_head		task_work;
+#endif
 };
 
+#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
+void posix_cpu_timers_work(struct callback_head *work);
+
+static inline void posix_cputimer_init_work(struct posix_cputimers *pct)
+{
+	pct->task_work.func = posix_cpu_timers_work;
+}
+#else
+static inline void posix_cputimer_init_work(struct posix_cputimers *pct) { }
+#endif
+
 static inline void posix_cputimers_init(struct posix_cputimers *pct)
 {
 	memset(pct, 0, sizeof(*pct));
 	pct->bases[0].nextevt = U64_MAX;
 	pct->bases[1].nextevt = U64_MAX;
 	pct->bases[2].nextevt = U64_MAX;
+	posix_cputimer_init_work(pct);
 }
 
 void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit);
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -52,6 +52,11 @@ config GENERIC_CLOCKEVENTS_MIN_ADJUST
 config GENERIC_CMOS_UPDATE
 	bool
 
+# Select to handle posix CPU timers from task_work
+# and not from the timer interrupt context
+config POSIX_CPU_TIMERS_TASK_WORK
+	bool
+
 if GENERIC_CLOCKEVENTS
 menu "Timers subsystem"
 
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -14,6 +14,7 @@
 #include <linux/tick.h>
 #include <linux/workqueue.h>
 #include <linux/compat.h>
+#include <linux/task_work.h>
 #include <linux/sched/deadline.h>
 
 #include "posix-timers.h"
@@ -1075,7 +1076,9 @@ static inline bool fastpath_timer_check(
 	return false;
 }
 
-static void __run_posix_cpu_timers(struct task_struct *tsk)
+static inline void posix_cpu_timers_enable_work(struct task_struct *tsk);
+
+static void handle_posix_cpu_timers(struct task_struct *tsk)
 {
 	struct k_itimer *timer, *next;
 	unsigned long flags;
@@ -1096,6 +1099,12 @@ static void __run_posix_cpu_timers(struc
 	check_process_timers(tsk, &firing);
 
 	/*
+	 * Allow new work to be scheduled. The expiry cache
+	 * is up to date.
+	 */
+	posix_cpu_timers_enable_work(tsk);
+
+	/*
 	 * We must release these locks before taking any timer's lock.
 	 * There is a potential race with timer deletion here, as the
 	 * siglock now protects our private firing list.  We have set
@@ -1130,6 +1139,37 @@ static void __run_posix_cpu_timers(struc
 	lockdep_posixtimer_exit();
 }
 
+#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
+
+void posix_cpu_timers_work(struct callback_head *work)
+{
+	handle_posix_cpu_timers(current);
+}
+
+static void __run_posix_cpu_timers(struct task_struct *tsk)
+{
+	struct posix_cputimers *pct = &tsk->posix_cputimers;
+
+	if (!test_and_set_bit(CPUTIMERS_WORK_SCHEDULED, &pct->flags))
+		task_work_add(tsk, &pct->task_work, true);
+}
+
+static inline void posix_cpu_timers_enable_work(struct task_struct *tsk)
+{
+	clear_bit(CPUTIMERS_WORK_SCHEDULED, &tsk->posix_cputimers.flags);
+}
+
+#else
+
+static void __run_posix_cpu_timers(struct task_struct *tsk)
+{
+	handle_posix_cpu_timers(tsk);
+}
+
+static inline void posix_cpu_timers_enable_work(struct task_struct *tsk) { }
+
+#endif
+
 /*
  * This is called from the timer interrupt handler.  The irq handler has
  * already updated our counts.  We need to check if any timers fire now.


  parent reply	other threads:[~2020-07-16 20:22 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-16 20:19 [patch V2 0/5] posix-cpu-timers: Move expiry into task work context Thomas Gleixner
2020-07-16 20:19 ` [patch V2 1/5] posix-cpu-timers: Split run_posix_cpu_timers() Thomas Gleixner
2020-07-16 20:19 ` [patch V2 2/5] posix-cpu-timers: Convert the flags to a bitmap Thomas Gleixner
2020-07-21 12:34   ` Frederic Weisbecker
2020-07-21 16:10     ` Thomas Gleixner
2020-07-21 16:23       ` David Laight
2020-07-21 18:30         ` Thomas Gleixner
2020-07-16 20:19 ` Thomas Gleixner [this message]
2020-07-16 22:50   ` [patch V2 3/5] posix-cpu-timers: Provide mechanisms to defer timer handling to task_work Peter Zijlstra
2020-07-17 18:37     ` Thomas Gleixner
2020-07-23  1:03     ` Frederic Weisbecker
2020-07-23  8:32       ` Thomas Gleixner
2020-07-23 12:15         ` Frederic Weisbecker
2020-07-16 22:54   ` Peter Zijlstra
2020-07-17 18:38     ` Thomas Gleixner
2020-07-19 19:33       ` Thomas Gleixner
2020-07-21 18:50         ` Thomas Gleixner
2020-07-17 17:26   ` Oleg Nesterov
2020-07-17 18:35     ` Thomas Gleixner
2020-07-16 20:19 ` [patch V2 4/5] posix-cpu-timers: Expiry timers directly when in task work context Thomas Gleixner
2020-07-16 20:19 ` [patch V2 5/5] x86: Select POSIX_CPU_TIMERS_TASK_WORK Thomas Gleixner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200716202044.734067877@linutronix.de \
    --to=tglx@linutronix.de \
    --cc=ebiederm@xmission.com \
    --cc=frederic@kernel.org \
    --cc=john.stultz@linaro.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=oleg@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.