linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
To: Peter Zijlstra <peterz@infradead.org>
Cc: linux-kernel@vger.kernel.org,
	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
	Aaron Lu <aaron.lu@intel.com>, Olivier Dion <odion@efficios.com>,
	michael.christie@oracle.com
Subject: [RFC PATCH] sched: Rate limit migrations
Date: Tue, 11 Apr 2023 17:41:16 -0400	[thread overview]
Message-ID: <20230411214116.361016-1-mathieu.desnoyers@efficios.com> (raw)

This WIP patch rate-limits migrations to 32 migrations per 10ms window
for each task.

The specific migration count and window size can be changed with the
following defines in kernel/sched/sched.h:

- SCHED_MIGRATION_WINDOW_NS
- SCHED_MIGRATION_LIMIT

Testing is welcome, especially to see if it helps with Aaron's
migration-heavy workload wrt rseq concurrency id performance
regression.

Link: https://lore.kernel.org/lkml/20230327080502.GA570847@ziqianlu-desk2/
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Aaron Lu <aaron.lu@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Olivier Dion <odion@efficios.com>
Cc: michael.christie@oracle.com
---
 include/linux/sched.h |  9 +++++++++
 kernel/fork.c         |  3 +++
 kernel/sched/core.c   | 42 ++++++++++++++++++++++++++++++++++++++----
 kernel/sched/sched.h  |  7 +++++++
 4 files changed, 57 insertions(+), 4 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 48d48b2c73a5..bfd5e268900c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1316,6 +1316,15 @@ struct task_struct {
 	int				last_mm_cid;	/* Most recent cid in mm */
 	int				mm_cid_active;	/* Whether cid bitmap is active */
 #endif
+	/*
+	 * Keep track of last migration time to compare sched_clock
+	 * locally from a single CPU perspective.
+	 */
+	u64				last_migration_time;
+	/* Time slice used in current migration window. */
+	u64				migration_window_time_slice;
+	/* Number of migrations in current migration window. */
+	u32				migration_count;
 
 	struct tlbflush_unmap_batch	tlb_ubc;
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 3832bea713c4..791792a218f6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1061,6 +1061,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	tsk->last_mm_cid = -1;
 	tsk->mm_cid_active = 0;
 #endif
+	tsk->last_migration_time = 0;
+	tsk->migration_window_time_slice = 0;
+	tsk->migration_count = 0;
 	return tsk;
 
 free_stack:
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2aac6f14f21c..a530727b11f3 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2087,6 +2087,7 @@ void activate_task(struct rq *rq, struct task_struct *p, int flags)
 	if (task_on_rq_migrating(p)) {
 		flags |= ENQUEUE_MIGRATED;
 		sched_mm_cid_migrate_to(rq, p);
+		p->last_migration_time = sched_clock();
 	}
 
 	enqueue_task(rq, p, flags);
@@ -3547,17 +3548,47 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
 	return dest_cpu;
 }
 
+static inline
+bool migration_allowed(struct task_struct *p, u64 current_time)
+{
+	u64 delta = current_time - p->last_migration_time;
+
+	if (delta + p->migration_window_time_slice > SCHED_MIGRATION_WINDOW_NS ||
+	    p->migration_count < SCHED_MIGRATION_LIMIT)
+		return true;
+	return false;
+}
+
+static inline
+void migration_add_delta_to_slice(struct task_struct *p, u64 current_time)
+{
+	u64 delta = current_time - p->last_migration_time;
+
+	if (delta + p->migration_window_time_slice > SCHED_MIGRATION_WINDOW_NS) {
+		/* Reset the migration window if it has ended. */
+		p->migration_window_time_slice = 0;
+		p->migration_count = 0;
+		return;
+	}
+	p->migration_window_time_slice += delta;
+	p->migration_count++;
+}
+
 /*
  * The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable.
  */
 static inline
-int select_task_rq(struct task_struct *p, int cpu, int wake_flags)
+int select_task_rq(struct task_struct *p, int prev_cpu, int wake_flags)
 {
+	u64 current_time = sched_clock();
+	int cpu = prev_cpu;
+
 	lockdep_assert_held(&p->pi_lock);
 
-	if (p->nr_cpus_allowed > 1 && !is_migration_disabled(p))
-		cpu = p->sched_class->select_task_rq(p, cpu, wake_flags);
-	else
+	if (p->nr_cpus_allowed > 1 && !is_migration_disabled(p)) {
+		if (migration_allowed(p, current_time))
+			cpu = p->sched_class->select_task_rq(p, cpu, wake_flags);
+	} else
 		cpu = cpumask_any(p->cpus_ptr);
 
 	/*
@@ -3573,6 +3604,9 @@ int select_task_rq(struct task_struct *p, int cpu, int wake_flags)
 	if (unlikely(!is_cpu_allowed(p, cpu)))
 		cpu = select_fallback_rq(task_cpu(p), p);
 
+	if (prev_cpu != cpu)
+		migration_add_delta_to_slice(p, current_time);
+
 	return cpu;
 }
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 64220134fb45..e52cc38f10fc 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -104,6 +104,13 @@ struct cpuidle_state;
 #define TASK_ON_RQ_QUEUED	1
 #define TASK_ON_RQ_MIGRATING	2
 
+/*
+ * A task can be migrated at most SCHED_MIGRATION_WINDOW_LIMIT times per
+ * sched-migration window.
+ */
+#define SCHED_MIGRATION_WINDOW_NS	(10ULL * 1000000)	/* 10 ms */
+#define SCHED_MIGRATION_LIMIT		32
+
 extern __read_mostly int scheduler_running;
 
 extern unsigned long calc_load_update;
-- 
2.25.1


             reply	other threads:[~2023-04-11 21:41 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-11 21:41 Mathieu Desnoyers [this message]
2023-04-12 11:53 ` [RFC PATCH] sched: Rate limit migrations Aaron Lu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230411214116.361016-1-mathieu.desnoyers@efficios.com \
    --to=mathieu.desnoyers@efficios.com \
    --cc=aaron.lu@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=michael.christie@oracle.com \
    --cc=odion@efficios.com \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).