All of lore.kernel.org
 help / color / mirror / Atom feed
From: Daniel Bristot de Oliveira <bristot@kernel.org>
To: Ingo Molnar <mingo@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Juri Lelli <juri.lelli@redhat.com>,
	Vincent Guittot <vincent.guittot@linaro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Ben Segall <bsegall@google.com>, Mel Gorman <mgorman@suse.de>,
	Daniel Bristot de Oliveira <bristot@redhat.com>,
	Valentin Schneider <vschneid@redhat.com>,
	linux-kernel@vger.kernel.org,
	Luca Abeni <luca.abeni@santannapisa.it>,
	Tommaso Cucinotta <tommaso.cucinotta@santannapisa.it>,
	Thomas Gleixner <tglx@linutronix.de>,
	Joel Fernandes <joel@joelfernandes.org>,
	Vineeth Pillai <vineeth@bitbyteword.org>,
	Shuah Khan <skhan@linuxfoundation.org>,
	bristot@kernel.org, Phil Auld <pauld@redhat.com>
Subject: [PATCH v5 7/7] sched/fair: Fair server interface
Date: Sat,  4 Nov 2023 11:59:24 +0100	[thread overview]
Message-ID: <26adad2378c8b15533e4f6216c2863341e587f57.1699095159.git.bristot@kernel.org> (raw)
In-Reply-To: <cover.1699095159.git.bristot@kernel.org>

Add an interface for fair server setup on debugfs.

Each rq have three files under /sys/kernel/debug/sched/rq/CPU{ID}:

 - fair_server_runtime: set runtime in ns
 - fair_server_period: set period in ns
 - fair_server_defer: on/off for the defer mechanism

Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
---
 kernel/sched/deadline.c |  89 +++++++++++++++---
 kernel/sched/debug.c    | 202 ++++++++++++++++++++++++++++++++++++++++
 kernel/sched/fair.c     |   6 --
 kernel/sched/sched.h    |   2 +
 4 files changed, 279 insertions(+), 20 deletions(-)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 69ee1fbd60e4..1092ca8892e0 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -321,19 +321,12 @@ void sub_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
 		__sub_running_bw(dl_se->dl_bw, dl_rq);
 }
 
-static void dl_change_utilization(struct task_struct *p, u64 new_bw)
+static void dl_rq_change_utilization(struct rq *rq, struct sched_dl_entity *dl_se, u64 new_bw)
 {
-	struct rq *rq;
-
-	WARN_ON_ONCE(p->dl.flags & SCHED_FLAG_SUGOV);
-
-	if (task_on_rq_queued(p))
-		return;
+	if (dl_se->dl_non_contending) {
+		sub_running_bw(dl_se, &rq->dl);
+		dl_se->dl_non_contending = 0;
 
-	rq = task_rq(p);
-	if (p->dl.dl_non_contending) {
-		sub_running_bw(&p->dl, &rq->dl);
-		p->dl.dl_non_contending = 0;
 		/*
 		 * If the timer handler is currently running and the
 		 * timer cannot be canceled, inactive_task_timer()
@@ -341,13 +334,25 @@ static void dl_change_utilization(struct task_struct *p, u64 new_bw)
 		 * will not touch the rq's active utilization,
 		 * so we are still safe.
 		 */
-		if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
-			put_task_struct(p);
+		if (hrtimer_try_to_cancel(&dl_se->inactive_timer) == 1) {
+			if (!dl_server(dl_se))
+				put_task_struct(dl_task_of(dl_se));
+		}
 	}
-	__sub_rq_bw(p->dl.dl_bw, &rq->dl);
+	__sub_rq_bw(dl_se->dl_bw, &rq->dl);
 	__add_rq_bw(new_bw, &rq->dl);
 }
 
+static void dl_change_utilization(struct task_struct *p, u64 new_bw)
+{
+	WARN_ON_ONCE(p->dl.flags & SCHED_FLAG_SUGOV);
+
+	if (task_on_rq_queued(p))
+		return;
+
+	dl_rq_change_utilization(task_rq(p), &p->dl, new_bw);
+}
+
 static void __dl_clear_params(struct sched_dl_entity *dl_se);
 
 /*
@@ -1508,10 +1513,22 @@ void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
 
 void dl_server_start(struct sched_dl_entity *dl_se)
 {
+	/*
+	 * XXX: the apply do not work fine at the init phase for the
+	 * fair server because things are not yet set. We need to improve
+	 * this before getting generic.
+	 */
 	if (!dl_server(dl_se)) {
+		u64 runtime = 50 * NSEC_PER_MSEC;
+		u64 period = 1000 * NSEC_PER_MSEC;
+
+		dl_server_apply_params(dl_se, runtime, period, 1);
+
+		dl_se->dl_zerolax = 1;
 		dl_se->dl_server = 1;
 		setup_new_dl_entity(dl_se);
 	}
+
 	enqueue_dl_entity(dl_se, ENQUEUE_WAKEUP);
 }
 
@@ -1532,6 +1549,50 @@ void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
 	dl_se->server_pick = pick;
 }
 
+int dl_server_apply_params(struct sched_dl_entity *dl_se, u64 runtime, u64 period, bool init)
+{
+	u64 old_bw = init ? 0 : to_ratio(dl_se->dl_period, dl_se->dl_runtime);
+	u64 new_bw = to_ratio(period, runtime);
+	struct rq *rq = dl_se->rq;
+	int cpu = cpu_of(rq);
+	struct dl_bw *dl_b;
+	unsigned long cap;
+	int retval = 0;
+	int cpus;
+
+	dl_b = dl_bw_of(cpu);
+	raw_spin_lock(&dl_b->lock);
+	cpus = dl_bw_cpus(cpu);
+	cap = dl_bw_capacity(cpu);
+
+	if (__dl_overflow(dl_b, cap, old_bw, new_bw)) {
+		retval = -EBUSY;
+		goto out;
+	}
+
+	if (init) {
+		__add_rq_bw(new_bw, &rq->dl);
+		__dl_add(dl_b, new_bw, cpus);
+	} else {
+		__dl_sub(dl_b, dl_se->dl_bw, cpus);
+		__dl_add(dl_b, new_bw, cpus);
+
+		dl_rq_change_utilization(rq, dl_se, new_bw);
+	}
+
+	rq->fair_server.dl_runtime = runtime;
+	rq->fair_server.dl_deadline  = period;
+	rq->fair_server.dl_period  = period;
+
+	dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
+	dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime);
+
+out:
+	raw_spin_unlock(&dl_b->lock);
+
+	return retval;
+}
+
 /*
  * Update the current task's runtime statistics (provided it is still
  * a -deadline task and has not been removed from the dl_rq).
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 4580a450700e..bd7ad6b8d3de 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -333,8 +333,208 @@ static const struct file_operations sched_debug_fops = {
 	.release	= seq_release,
 };
 
+enum dl_param {
+	DL_RUNTIME = 0,
+	DL_PERIOD,
+	DL_ZEROLAX
+};
+
+static unsigned long fair_server_period_max = (1 << 22) * NSEC_PER_USEC; /* ~4 seconds */
+static unsigned long fair_server_period_min = (100) * NSEC_PER_USEC;     /* 100 us */
+
+static ssize_t sched_fair_server_write(struct file *filp, const char __user *ubuf,
+				       size_t cnt, loff_t *ppos, enum dl_param param)
+{
+	long cpu = (long) ((struct seq_file *) filp->private_data)->private;
+	u64 runtime, period, zerolax;
+	struct rq *rq = cpu_rq(cpu);
+	size_t err;
+	int retval;
+	u64 value;
+
+	err = kstrtoull_from_user(ubuf, cnt, 10, &value);
+	if (err)
+		return err;
+
+	scoped_guard (rq_lock_irqsave, rq) {
+
+		runtime  = rq->fair_server.dl_runtime;
+		period = rq->fair_server.dl_period;
+		zerolax = rq->fair_server.dl_zerolax;
+
+		switch (param) {
+		case DL_RUNTIME:
+			if (runtime == value)
+				goto out;
+			runtime = value;
+			break;
+		case DL_PERIOD:
+			if (value == period)
+				goto out;
+			period = value;
+			break;
+		case DL_ZEROLAX:
+			if (zerolax == value)
+				goto out;
+			zerolax = value;
+			break;
+		}
+
+		if (runtime > period
+				|| period > fair_server_period_max
+				|| period < fair_server_period_min
+				|| zerolax > 1) {
+			cnt = -EINVAL;
+			goto out;
+		}
+
+		if (rq->cfs.h_nr_running) {
+			update_rq_clock(rq);
+			dl_server_stop(&rq->fair_server);
+		}
+
+		/*
+		 * The zerolax does not change utilization, so just
+		 * setting it is enough.
+		 */
+		if (rq->fair_server.dl_zerolax != zerolax) {
+			rq->fair_server.dl_zerolax = zerolax;
+		} else {
+			retval = dl_server_apply_params(&rq->fair_server, runtime, period, 0);
+			if (retval)
+				cnt = retval;
+		}
+
+		if (rq->cfs.h_nr_running)
+			dl_server_start(&rq->fair_server);
+	}
+
+out:
+	*ppos += cnt;
+	return cnt;
+}
+
+static size_t sched_fair_server_show(struct seq_file *m, void *v, enum dl_param param)
+{
+	unsigned long cpu = (unsigned long) m->private;
+	struct rq *rq = cpu_rq(cpu);
+	u64 value;
+
+	switch (param) {
+	case DL_RUNTIME:
+		value = rq->fair_server.dl_runtime;
+		break;
+	case DL_PERIOD:
+		value = rq->fair_server.dl_period;
+		break;
+	case DL_ZEROLAX:
+		value = rq->fair_server.dl_zerolax;
+	}
+
+	seq_printf(m, "%llu\n", value);
+	return 0;
+
+}
+
+static ssize_t
+sched_fair_server_runtime_write(struct file *filp, const char __user *ubuf,
+				size_t cnt, loff_t *ppos)
+{
+	return sched_fair_server_write(filp, ubuf, cnt, ppos, DL_RUNTIME);
+}
+
+static int sched_fair_server_runtime_show(struct seq_file *m, void *v)
+{
+	return sched_fair_server_show(m, v, DL_RUNTIME);
+}
+
+static int sched_fair_server_runtime_open(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, sched_fair_server_runtime_show, inode->i_private);
+}
+
+static const struct file_operations fair_server_runtime_fops = {
+	.open		= sched_fair_server_runtime_open,
+	.write		= sched_fair_server_runtime_write,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static ssize_t
+sched_fair_server_period_write(struct file *filp, const char __user *ubuf,
+			       size_t cnt, loff_t *ppos)
+{
+	return sched_fair_server_write(filp, ubuf, cnt, ppos, DL_PERIOD);
+}
+
+static int sched_fair_server_period_show(struct seq_file *m, void *v)
+{
+	return sched_fair_server_show(m, v, DL_PERIOD);
+}
+
+static int sched_fair_server_period_open(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, sched_fair_server_period_show, inode->i_private);
+}
+
+static const struct file_operations fair_server_period_fops = {
+	.open		= sched_fair_server_period_open,
+	.write		= sched_fair_server_period_write,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static ssize_t
+sched_fair_server_defer_write(struct file *filp, const char __user *ubuf,
+			      size_t cnt, loff_t *ppos)
+{
+	return sched_fair_server_write(filp, ubuf, cnt, ppos, DL_ZEROLAX);
+}
+
+static int sched_fair_server_defer_show(struct seq_file *m, void *v)
+{
+	return sched_fair_server_show(m, v, DL_ZEROLAX);
+}
+
+static int sched_fair_server_defer_open(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, sched_fair_server_defer_show, inode->i_private);
+}
+
+static const struct file_operations fair_server_defer_fops = {
+	.open		= sched_fair_server_defer_open,
+	.write		= sched_fair_server_defer_write,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 static struct dentry *debugfs_sched;
 
+static void debugfs_fair_server_init(void)
+{
+	long cpu;
+	struct dentry *rq_dentry;
+
+	rq_dentry = debugfs_create_dir("rq", debugfs_sched);
+	if (!rq_dentry)
+		return;
+
+	for_each_possible_cpu(cpu) {
+		struct dentry *d_cpu;
+		char buf[32];
+
+		snprintf(buf, sizeof(buf), "cpu%ld", cpu);
+		d_cpu = debugfs_create_dir(buf, rq_dentry);
+
+		debugfs_create_file("fair_server_runtime", 0644, d_cpu, (void *) cpu, &fair_server_runtime_fops);
+		debugfs_create_file("fair_server_period", 0644, d_cpu, (void *) cpu, &fair_server_period_fops);
+		debugfs_create_file("fair_server_defer", 0644, d_cpu, (void *) cpu, &fair_server_defer_fops);
+	}
+}
+
 static __init int sched_init_debug(void)
 {
 	struct dentry __maybe_unused *numa;
@@ -374,6 +574,8 @@ static __init int sched_init_debug(void)
 
 	debugfs_create_file("debug", 0444, debugfs_sched, NULL, &sched_debug_fops);
 
+	debugfs_fair_server_init();
+
 	return 0;
 }
 late_initcall(sched_init_debug);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 399237cd9f59..5434c52f470d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8419,12 +8419,6 @@ void fair_server_init(struct rq *rq)
 	struct sched_dl_entity *dl_se = &rq->fair_server;
 
 	init_dl_entity(dl_se);
-
-	dl_se->dl_runtime = 50 * NSEC_PER_MSEC;
-	dl_se->dl_deadline = 1000 * NSEC_PER_MSEC;
-	dl_se->dl_period = 1000 * NSEC_PER_MSEC;
-	dl_se->dl_zerolax = 1;
-
 	dl_server_init(dl_se, rq, fair_server_has_tasks, fair_server_pick);
 }
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ec0e288c8e06..312b31df5860 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -341,6 +341,8 @@ extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
 		    dl_server_pick_f pick);
 
 extern void fair_server_init(struct rq *);
+extern int dl_server_apply_params(struct sched_dl_entity *dl_se,
+		    u64 runtime, u64 period, bool init);
 
 #ifdef CONFIG_CGROUP_SCHED
 
-- 
2.40.1


  parent reply	other threads:[~2023-11-04 11:00 UTC|newest]

Thread overview: 76+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-04 10:59 [PATCH v5 0/7] SCHED_DEADLINE server infrastructure Daniel Bristot de Oliveira
2023-11-04 10:59 ` [PATCH v5 1/7] sched: Unify runtime accounting across classes Daniel Bristot de Oliveira
2023-11-15  9:04   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2023-11-04 10:59 ` [PATCH v5 2/7] sched/deadline: Collect sched_dl_entity initialization Daniel Bristot de Oliveira
2023-11-15  9:04   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2023-11-04 10:59 ` [PATCH v5 3/7] sched/deadline: Move bandwidth accounting into {en,de}queue_dl_entity Daniel Bristot de Oliveira
2023-11-15  9:04   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2023-11-04 10:59 ` [PATCH v5 4/7] sched/deadline: Introduce deadline servers Daniel Bristot de Oliveira
2023-11-15  9:04   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2023-11-04 10:59 ` [PATCH v5 5/7] sched/fair: Add trivial fair server Daniel Bristot de Oliveira
2023-11-06 14:24   ` Peter Zijlstra
2023-11-06 14:26     ` Daniel Bristot de Oliveira
2023-11-04 10:59 ` [PATCH v5 6/7] sched/deadline: Deferrable dl server Daniel Bristot de Oliveira
2023-11-06 14:55   ` Peter Zijlstra
2023-11-06 17:05     ` Daniel Bristot de Oliveira
2023-11-06 19:32   ` Joel Fernandes
2023-11-06 21:32     ` Joel Fernandes
2023-11-06 21:37       ` Joel Fernandes
2023-11-07 11:58         ` Daniel Bristot de Oliveira
2023-11-08  2:42           ` Joel Fernandes
2023-11-07 16:47         ` Steven Rostedt
2023-11-07 17:35           ` Steven Rostedt
2023-11-07 17:46             ` Steven Rostedt
2023-11-07 17:54             ` Steven Rostedt
2023-11-07 19:32               ` Steven Rostedt
2023-11-07 20:07                 ` Steven Rostedt
2023-11-07 17:37           ` Daniel Bristot de Oliveira
2023-11-07 18:50             ` Daniel Bristot de Oliveira
2023-11-08  3:20               ` Joel Fernandes
2023-11-08  8:01                 ` Daniel Bristot de Oliveira
2023-11-08 18:25                   ` Joel Fernandes
2023-11-08 12:44               ` Peter Zijlstra
2023-11-08 12:50                 ` Peter Zijlstra
2023-11-08 14:52                   ` Daniel Bristot de Oliveira
2023-11-08 13:46                 ` Daniel Bristot de Oliveira
2023-11-08 13:58                 ` Daniel Bristot de Oliveira
2023-11-08 15:14                 ` Juri Lelli
2023-11-08 16:57                   ` Peter Zijlstra
2023-11-08  2:37           ` Joel Fernandes
2023-11-07  7:30     ` Daniel Bristot de Oliveira
2023-11-07 16:37   ` Steven Rostedt
2023-11-13 15:05   ` kernel test robot
2024-03-20  0:03   ` Joel Fernandes
2024-03-20 19:24     ` Daniel Bristot de Oliveira
2024-03-21 16:15       ` Joel Fernandes
2024-03-23 14:37         ` Joel Fernandes
2024-04-05 14:35         ` Daniel Bristot de Oliveira
2024-04-08 17:11           ` Steven Rostedt
2023-11-04 10:59 ` Daniel Bristot de Oliveira [this message]
2023-11-04 15:18   ` [PATCH v5 7/7] sched/fair: Fair server interface kernel test robot
2023-11-05  0:55   ` kernel test robot
2023-11-06 15:40   ` Peter Zijlstra
2023-11-06 16:29     ` Daniel Bristot de Oliveira
2023-11-07  8:16       ` Peter Zijlstra
2023-11-07 14:06         ` Daniel Bristot de Oliveira
2023-11-07 14:44       ` Peter Zijlstra
2023-11-07 12:38   ` Peter Zijlstra
2023-11-07 13:24     ` Daniel Bristot de Oliveira
2024-01-19  1:49   ` Joel Fernandes
2024-01-19  1:55   ` Joel Fernandes
2024-01-22 14:14     ` Daniel Bristot de Oliveira
2024-01-23 15:39       ` Joel Fernandes
2024-01-23 15:44       ` Joel Fernandes
2024-02-13  2:13   ` Joel Fernandes
2024-02-13  2:21     ` Joel Fernandes
2024-02-14 14:23     ` Daniel Bristot de Oliveira
2024-02-15 13:57       ` Joel Fernandes
2024-02-15 17:27         ` Daniel Bristot de Oliveira
2024-02-15 17:41           ` Joel Fernandes
2024-04-04 17:43             ` Daniel Bristot de Oliveira
2023-12-08 21:47 ` [PATCH v5 0/7] SCHED_DEADLINE server infrastructure Joel Fernandes
2024-02-19  7:33 ` Huang, Ying
2024-02-19 10:23   ` Daniel Bristot de Oliveira
2024-02-20  3:28     ` Huang, Ying
2024-02-20  8:31       ` Daniel Bristot de Oliveira
2024-02-20  8:41         ` Huang, Ying

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=26adad2378c8b15533e4f6216c2863341e587f57.1699095159.git.bristot@kernel.org \
    --to=bristot@kernel.org \
    --cc=bristot@redhat.com \
    --cc=bsegall@google.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=joel@joelfernandes.org \
    --cc=juri.lelli@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luca.abeni@santannapisa.it \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=pauld@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=skhan@linuxfoundation.org \
    --cc=tglx@linutronix.de \
    --cc=tommaso.cucinotta@santannapisa.it \
    --cc=vincent.guittot@linaro.org \
    --cc=vineeth@bitbyteword.org \
    --cc=vschneid@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.