All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Intel-gfx@lists.freedesktop.org
Cc: "Rob Clark" <robdclark@chromium.org>,
	Kenny.Ho@amd.com, "Daniel Vetter" <daniel.vetter@ffwll.ch>,
	"Johannes Weiner" <hannes@cmpxchg.org>,
	linux-kernel@vger.kernel.org,
	"Stéphane Marchesin" <marcheu@chromium.org>,
	"Christian König" <christian.koenig@amd.com>,
	"Zefan Li" <lizefan.x@bytedance.com>,
	"Dave Airlie" <airlied@redhat.com>, "Tejun Heo" <tj@kernel.org>,
	cgroups@vger.kernel.org, "T . J . Mercier" <tjmercier@google.com>
Subject: [Intel-gfx] [RFC 17/17] drm/i915: Implement cgroup controller over budget throttling
Date: Wed, 19 Oct 2022 18:32:54 +0100	[thread overview]
Message-ID: <20221019173254.3361334-18-tvrtko.ursulin@linux.intel.com> (raw)
In-Reply-To: <20221019173254.3361334-1-tvrtko.ursulin@linux.intel.com>

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

When notified by the drm core we are over our allotted time budget, i915
instance will check if any of the GPU engines it is reponsible for is
fully saturated. If it is, and the client in question is using that
engine, it will throttle it.

For now throttling is done simplistically by lowering the scheduling
priority while client is throttled.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 22 ++++-
 drivers/gpu/drm/i915/i915_driver.c            |  1 +
 drivers/gpu/drm/i915/i915_drm_client.c        | 93 +++++++++++++++++++
 drivers/gpu/drm/i915/i915_drm_client.h        |  9 ++
 4 files changed, 123 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 391c5b5c80be..efcbd827f6a0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -8,6 +8,7 @@
 #include <linux/dma-resv.h>
 #include <linux/highmem.h>
 #include <linux/minmax.h>
+#include <linux/prandom.h>
 #include <linux/sync_file.h>
 #include <linux/uaccess.h>
 
@@ -3018,15 +3019,32 @@ static void retire_requests(struct intel_timeline *tl, struct i915_request *end)
 }
 
 #ifdef CONFIG_CGROUP_DRM
+static unsigned int
+__get_class(struct drm_i915_file_private *fpriv, const struct i915_request *rq)
+{
+	unsigned int class;
+
+	class = rq->context->engine->uabi_class;
+
+	if (WARN_ON_ONCE(class >= ARRAY_SIZE(fpriv->client->throttle)))
+		class = 0;
+
+	return class;
+}
+
 static void copy_priority(struct i915_sched_attr *attr,
-			  const struct i915_execbuffer *eb)
+			  const struct i915_execbuffer *eb,
+			  const struct i915_request *rq)
 {
+	struct drm_i915_file_private *file_priv = eb->file->driver_priv;
 	const int scale = DIV_ROUND_CLOSEST(DRM_CGROUP_PRIORITY_MAX,
 					    I915_CONTEXT_MAX_USER_PRIORITY);
 	int prio;
 
 	*attr = eb->gem_context->sched;
 	prio = attr->priority * scale + eb->file->drm_cgroup_priority;
+	if (file_priv->client->throttle[__get_class(file_priv, rq)])
+		prio -= 1 + prandom_u32_max(-DRM_CGROUP_PRIORITY_MIN / 2);
 	prio = DIV_ROUND_UP(prio, scale);
 	attr->priority = clamp(prio,
 			       I915_CONTEXT_MIN_USER_PRIORITY,
@@ -3056,7 +3074,7 @@ static int eb_request_add(struct i915_execbuffer *eb, struct i915_request *rq,
 
 	/* Check that the context wasn't destroyed before submission */
 	if (likely(!intel_context_is_closed(eb->context))) {
-		copy_priority(&attr, eb);
+		copy_priority(&attr, eb, rq);
 	} else {
 		/* Serialise with context_close via the add_to_timeline */
 		i915_request_set_error_once(rq, -ENOENT);
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index b949fd715202..abac9bb5bf27 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -1897,6 +1897,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
 static const struct drm_cgroup_ops i915_drm_cgroup_ops = {
 	.priority_levels = i915_drm_priority_levels,
 	.active_time_us = i915_drm_cgroup_get_active_time_us,
+	.signal_budget = i915_drm_cgroup_signal_budget,
 };
 #endif
 
diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
index 8527fe80d449..ce497055cc3f 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/ktime.h>
 #include <linux/slab.h>
 #include <linux/types.h>
 
@@ -173,6 +174,98 @@ u64 i915_drm_cgroup_get_active_time_us(struct drm_file *file)
 
 	return busy;
 }
+
+int i915_drm_cgroup_signal_budget(struct drm_file *file, u64 usage, u64 budget)
+{
+	struct drm_i915_file_private *fpriv = file->driver_priv;
+	u64 class_usage[I915_LAST_UABI_ENGINE_CLASS + 1];
+	u64 class_last[I915_LAST_UABI_ENGINE_CLASS + 1];
+	struct drm_i915_private *i915 = fpriv->dev_priv;
+	struct i915_drm_client *client = fpriv->client;
+	struct intel_engine_cs *engine;
+	bool over = usage > budget;
+	unsigned int i;
+	ktime_t unused;
+	int ret = 0;
+	u64 t;
+
+	if (!supports_stats(i915))
+		return -EINVAL;
+
+	if (usage == 0 && budget == 0)
+		return 0;
+
+printk("i915_drm_cgroup_signal_budget client-id=%u over=%u (%llu/%llu) <%u>\n",
+       client->id, over, usage, budget, client->over_budget);
+
+	if (over) {
+		client->over_budget++;
+		if (!client->over_budget)
+			client->over_budget = 2;
+	} else {
+		client->over_budget = 0;
+		memset(client->class_last, 0, sizeof(client->class_last));
+		memset(client->throttle, 0, sizeof(client->throttle));
+		return 0;
+	}
+
+	memset(class_usage, 0, sizeof(class_usage));
+	for_each_uabi_engine(engine, i915)
+		class_usage[engine->uabi_class] +=
+			ktime_to_ns(intel_engine_get_busy_time(engine, &unused));
+
+	memcpy(class_last, client->class_last, sizeof(class_last));
+	memcpy(client->class_last, class_usage, sizeof(class_last));
+
+	for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++)
+		class_usage[i] -= class_last[i];
+
+	t = client->last;
+	client->last = ktime_get_raw_ns();
+	t = client->last - t;
+
+	if (client->over_budget == 1)
+		return 0;
+
+	for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++) {
+		u64 client_class_usage[I915_LAST_UABI_ENGINE_CLASS + 1];
+		unsigned int capacity;
+
+		if (!i915->engine_uabi_class_count[i])
+			continue;
+
+		t = DIV_ROUND_UP_ULL(t, 1000);
+		class_usage[i] = DIV_ROUND_CLOSEST_ULL(class_usage[i], 1000);
+		usage = DIV_ROUND_CLOSEST_ULL(class_usage[i] * 100ULL,
+					      t *
+					      i915->engine_uabi_class_count[i]);
+		if (usage <= 95) {
+			/* class not oversubsribed */
+			if (client->throttle[i]) {
+				client->throttle[i] = false;
+printk("  UN-throttling class%u (phys=%lld%%)\n",
+       i, usage);
+			}
+			continue;
+		}
+
+		client_class_usage[i] =
+			get_class_active_ns(client, i, &capacity);
+
+		if (client_class_usage[i] && !client->throttle[i]) {
+			ret |= 1;
+			client->throttle[i] = true;
+			/*
+			 * QQQ maybe apply "strength" of throttling based on
+			 * usage/budget?
+			 */
+printk("  THROTTLING class%u (phys=%lld%% client=%lluus)\n",
+       i, usage, client_class_usage[i] / 1000);
+		}
+	}
+
+	return ret;
+}
 #endif
 
 #ifdef CONFIG_PROC_FS
diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
index 99b8ae01c183..b05afe01e68e 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.h
+++ b/drivers/gpu/drm/i915/i915_drm_client.h
@@ -40,6 +40,13 @@ struct i915_drm_client {
 	 * @past_runtime: Accumulation of pphwsp runtimes from closed contexts.
 	 */
 	atomic64_t past_runtime[I915_LAST_UABI_ENGINE_CLASS + 1];
+
+#ifdef CONFIG_CGROUP_DRM
+	bool throttle[I915_LAST_UABI_ENGINE_CLASS + 1];
+	unsigned int over_budget;
+	u64 last;
+	u64 class_last[I915_LAST_UABI_ENGINE_CLASS + 1];
+#endif
 };
 
 void i915_drm_clients_init(struct i915_drm_clients *clients,
@@ -70,5 +77,7 @@ void i915_drm_clients_fini(struct i915_drm_clients *clients);
 unsigned int i915_drm_priority_levels(struct drm_file *file);
 
 u64 i915_drm_cgroup_get_active_time_us(struct drm_file *file);
+int i915_drm_cgroup_signal_budget(struct drm_file *file,
+				  u64 usage, u64 budget);
 
 #endif /* !__I915_DRM_CLIENT_H__ */
-- 
2.34.1


WARNING: multiple messages have this Message-ID (diff)
From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Intel-gfx@lists.freedesktop.org
Cc: cgroups@vger.kernel.org, linux-kernel@vger.kernel.org,
	"Tejun Heo" <tj@kernel.org>,
	"Johannes Weiner" <hannes@cmpxchg.org>,
	"Zefan Li" <lizefan.x@bytedance.com>,
	"Dave Airlie" <airlied@redhat.com>,
	"Daniel Vetter" <daniel.vetter@ffwll.ch>,
	"Rob Clark" <robdclark@chromium.org>,
	"Stéphane Marchesin" <marcheu@chromium.org>,
	"T . J . Mercier" <tjmercier@google.com>,
	Kenny.Ho@amd.com, "Christian König" <christian.koenig@amd.com>,
	"Brian Welty" <brian.welty@intel.com>,
	"Tvrtko Ursulin" <tvrtko.ursulin@intel.com>
Subject: [RFC 17/17] drm/i915: Implement cgroup controller over budget throttling
Date: Wed, 19 Oct 2022 18:32:54 +0100	[thread overview]
Message-ID: <20221019173254.3361334-18-tvrtko.ursulin@linux.intel.com> (raw)
In-Reply-To: <20221019173254.3361334-1-tvrtko.ursulin@linux.intel.com>

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

When notified by the drm core we are over our allotted time budget, i915
instance will check if any of the GPU engines it is reponsible for is
fully saturated. If it is, and the client in question is using that
engine, it will throttle it.

For now throttling is done simplistically by lowering the scheduling
priority while client is throttled.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 22 ++++-
 drivers/gpu/drm/i915/i915_driver.c            |  1 +
 drivers/gpu/drm/i915/i915_drm_client.c        | 93 +++++++++++++++++++
 drivers/gpu/drm/i915/i915_drm_client.h        |  9 ++
 4 files changed, 123 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 391c5b5c80be..efcbd827f6a0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -8,6 +8,7 @@
 #include <linux/dma-resv.h>
 #include <linux/highmem.h>
 #include <linux/minmax.h>
+#include <linux/prandom.h>
 #include <linux/sync_file.h>
 #include <linux/uaccess.h>
 
@@ -3018,15 +3019,32 @@ static void retire_requests(struct intel_timeline *tl, struct i915_request *end)
 }
 
 #ifdef CONFIG_CGROUP_DRM
+static unsigned int
+__get_class(struct drm_i915_file_private *fpriv, const struct i915_request *rq)
+{
+	unsigned int class;
+
+	class = rq->context->engine->uabi_class;
+
+	if (WARN_ON_ONCE(class >= ARRAY_SIZE(fpriv->client->throttle)))
+		class = 0;
+
+	return class;
+}
+
 static void copy_priority(struct i915_sched_attr *attr,
-			  const struct i915_execbuffer *eb)
+			  const struct i915_execbuffer *eb,
+			  const struct i915_request *rq)
 {
+	struct drm_i915_file_private *file_priv = eb->file->driver_priv;
 	const int scale = DIV_ROUND_CLOSEST(DRM_CGROUP_PRIORITY_MAX,
 					    I915_CONTEXT_MAX_USER_PRIORITY);
 	int prio;
 
 	*attr = eb->gem_context->sched;
 	prio = attr->priority * scale + eb->file->drm_cgroup_priority;
+	if (file_priv->client->throttle[__get_class(file_priv, rq)])
+		prio -= 1 + prandom_u32_max(-DRM_CGROUP_PRIORITY_MIN / 2);
 	prio = DIV_ROUND_UP(prio, scale);
 	attr->priority = clamp(prio,
 			       I915_CONTEXT_MIN_USER_PRIORITY,
@@ -3056,7 +3074,7 @@ static int eb_request_add(struct i915_execbuffer *eb, struct i915_request *rq,
 
 	/* Check that the context wasn't destroyed before submission */
 	if (likely(!intel_context_is_closed(eb->context))) {
-		copy_priority(&attr, eb);
+		copy_priority(&attr, eb, rq);
 	} else {
 		/* Serialise with context_close via the add_to_timeline */
 		i915_request_set_error_once(rq, -ENOENT);
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index b949fd715202..abac9bb5bf27 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -1897,6 +1897,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
 static const struct drm_cgroup_ops i915_drm_cgroup_ops = {
 	.priority_levels = i915_drm_priority_levels,
 	.active_time_us = i915_drm_cgroup_get_active_time_us,
+	.signal_budget = i915_drm_cgroup_signal_budget,
 };
 #endif
 
diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
index 8527fe80d449..ce497055cc3f 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/ktime.h>
 #include <linux/slab.h>
 #include <linux/types.h>
 
@@ -173,6 +174,98 @@ u64 i915_drm_cgroup_get_active_time_us(struct drm_file *file)
 
 	return busy;
 }
+
+int i915_drm_cgroup_signal_budget(struct drm_file *file, u64 usage, u64 budget)
+{
+	struct drm_i915_file_private *fpriv = file->driver_priv;
+	u64 class_usage[I915_LAST_UABI_ENGINE_CLASS + 1];
+	u64 class_last[I915_LAST_UABI_ENGINE_CLASS + 1];
+	struct drm_i915_private *i915 = fpriv->dev_priv;
+	struct i915_drm_client *client = fpriv->client;
+	struct intel_engine_cs *engine;
+	bool over = usage > budget;
+	unsigned int i;
+	ktime_t unused;
+	int ret = 0;
+	u64 t;
+
+	if (!supports_stats(i915))
+		return -EINVAL;
+
+	if (usage == 0 && budget == 0)
+		return 0;
+
+printk("i915_drm_cgroup_signal_budget client-id=%u over=%u (%llu/%llu) <%u>\n",
+       client->id, over, usage, budget, client->over_budget);
+
+	if (over) {
+		client->over_budget++;
+		if (!client->over_budget)
+			client->over_budget = 2;
+	} else {
+		client->over_budget = 0;
+		memset(client->class_last, 0, sizeof(client->class_last));
+		memset(client->throttle, 0, sizeof(client->throttle));
+		return 0;
+	}
+
+	memset(class_usage, 0, sizeof(class_usage));
+	for_each_uabi_engine(engine, i915)
+		class_usage[engine->uabi_class] +=
+			ktime_to_ns(intel_engine_get_busy_time(engine, &unused));
+
+	memcpy(class_last, client->class_last, sizeof(class_last));
+	memcpy(client->class_last, class_usage, sizeof(class_last));
+
+	for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++)
+		class_usage[i] -= class_last[i];
+
+	t = client->last;
+	client->last = ktime_get_raw_ns();
+	t = client->last - t;
+
+	if (client->over_budget == 1)
+		return 0;
+
+	for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++) {
+		u64 client_class_usage[I915_LAST_UABI_ENGINE_CLASS + 1];
+		unsigned int capacity;
+
+		if (!i915->engine_uabi_class_count[i])
+			continue;
+
+		t = DIV_ROUND_UP_ULL(t, 1000);
+		class_usage[i] = DIV_ROUND_CLOSEST_ULL(class_usage[i], 1000);
+		usage = DIV_ROUND_CLOSEST_ULL(class_usage[i] * 100ULL,
+					      t *
+					      i915->engine_uabi_class_count[i]);
+		if (usage <= 95) {
+			/* class not oversubsribed */
+			if (client->throttle[i]) {
+				client->throttle[i] = false;
+printk("  UN-throttling class%u (phys=%lld%%)\n",
+       i, usage);
+			}
+			continue;
+		}
+
+		client_class_usage[i] =
+			get_class_active_ns(client, i, &capacity);
+
+		if (client_class_usage[i] && !client->throttle[i]) {
+			ret |= 1;
+			client->throttle[i] = true;
+			/*
+			 * QQQ maybe apply "strength" of throttling based on
+			 * usage/budget?
+			 */
+printk("  THROTTLING class%u (phys=%lld%% client=%lluus)\n",
+       i, usage, client_class_usage[i] / 1000);
+		}
+	}
+
+	return ret;
+}
 #endif
 
 #ifdef CONFIG_PROC_FS
diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
index 99b8ae01c183..b05afe01e68e 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.h
+++ b/drivers/gpu/drm/i915/i915_drm_client.h
@@ -40,6 +40,13 @@ struct i915_drm_client {
 	 * @past_runtime: Accumulation of pphwsp runtimes from closed contexts.
 	 */
 	atomic64_t past_runtime[I915_LAST_UABI_ENGINE_CLASS + 1];
+
+#ifdef CONFIG_CGROUP_DRM
+	bool throttle[I915_LAST_UABI_ENGINE_CLASS + 1];
+	unsigned int over_budget;
+	u64 last;
+	u64 class_last[I915_LAST_UABI_ENGINE_CLASS + 1];
+#endif
 };
 
 void i915_drm_clients_init(struct i915_drm_clients *clients,
@@ -70,5 +77,7 @@ void i915_drm_clients_fini(struct i915_drm_clients *clients);
 unsigned int i915_drm_priority_levels(struct drm_file *file);
 
 u64 i915_drm_cgroup_get_active_time_us(struct drm_file *file);
+int i915_drm_cgroup_signal_budget(struct drm_file *file,
+				  u64 usage, u64 budget);
 
 #endif /* !__I915_DRM_CLIENT_H__ */
-- 
2.34.1


WARNING: multiple messages have this Message-ID (diff)
From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Intel-gfx@lists.freedesktop.org
Cc: "Rob Clark" <robdclark@chromium.org>,
	Kenny.Ho@amd.com, "Daniel Vetter" <daniel.vetter@ffwll.ch>,
	"Johannes Weiner" <hannes@cmpxchg.org>,
	linux-kernel@vger.kernel.org,
	"Stéphane Marchesin" <marcheu@chromium.org>,
	"Christian König" <christian.koenig@amd.com>,
	"Zefan Li" <lizefan.x@bytedance.com>,
	"Dave Airlie" <airlied@redhat.com>, "Tejun Heo" <tj@kernel.org>,
	cgroups@vger.kernel.org, "T . J . Mercier" <tjmercier@google.com>
Subject: [RFC 17/17] drm/i915: Implement cgroup controller over budget throttling
Date: Wed, 19 Oct 2022 18:32:54 +0100	[thread overview]
Message-ID: <20221019173254.3361334-18-tvrtko.ursulin@linux.intel.com> (raw)
In-Reply-To: <20221019173254.3361334-1-tvrtko.ursulin@linux.intel.com>

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

When notified by the drm core we are over our allotted time budget, i915
instance will check if any of the GPU engines it is reponsible for is
fully saturated. If it is, and the client in question is using that
engine, it will throttle it.

For now throttling is done simplistically by lowering the scheduling
priority while client is throttled.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 22 ++++-
 drivers/gpu/drm/i915/i915_driver.c            |  1 +
 drivers/gpu/drm/i915/i915_drm_client.c        | 93 +++++++++++++++++++
 drivers/gpu/drm/i915/i915_drm_client.h        |  9 ++
 4 files changed, 123 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 391c5b5c80be..efcbd827f6a0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -8,6 +8,7 @@
 #include <linux/dma-resv.h>
 #include <linux/highmem.h>
 #include <linux/minmax.h>
+#include <linux/prandom.h>
 #include <linux/sync_file.h>
 #include <linux/uaccess.h>
 
@@ -3018,15 +3019,32 @@ static void retire_requests(struct intel_timeline *tl, struct i915_request *end)
 }
 
 #ifdef CONFIG_CGROUP_DRM
+static unsigned int
+__get_class(struct drm_i915_file_private *fpriv, const struct i915_request *rq)
+{
+	unsigned int class;
+
+	class = rq->context->engine->uabi_class;
+
+	if (WARN_ON_ONCE(class >= ARRAY_SIZE(fpriv->client->throttle)))
+		class = 0;
+
+	return class;
+}
+
 static void copy_priority(struct i915_sched_attr *attr,
-			  const struct i915_execbuffer *eb)
+			  const struct i915_execbuffer *eb,
+			  const struct i915_request *rq)
 {
+	struct drm_i915_file_private *file_priv = eb->file->driver_priv;
 	const int scale = DIV_ROUND_CLOSEST(DRM_CGROUP_PRIORITY_MAX,
 					    I915_CONTEXT_MAX_USER_PRIORITY);
 	int prio;
 
 	*attr = eb->gem_context->sched;
 	prio = attr->priority * scale + eb->file->drm_cgroup_priority;
+	if (file_priv->client->throttle[__get_class(file_priv, rq)])
+		prio -= 1 + prandom_u32_max(-DRM_CGROUP_PRIORITY_MIN / 2);
 	prio = DIV_ROUND_UP(prio, scale);
 	attr->priority = clamp(prio,
 			       I915_CONTEXT_MIN_USER_PRIORITY,
@@ -3056,7 +3074,7 @@ static int eb_request_add(struct i915_execbuffer *eb, struct i915_request *rq,
 
 	/* Check that the context wasn't destroyed before submission */
 	if (likely(!intel_context_is_closed(eb->context))) {
-		copy_priority(&attr, eb);
+		copy_priority(&attr, eb, rq);
 	} else {
 		/* Serialise with context_close via the add_to_timeline */
 		i915_request_set_error_once(rq, -ENOENT);
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index b949fd715202..abac9bb5bf27 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -1897,6 +1897,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
 static const struct drm_cgroup_ops i915_drm_cgroup_ops = {
 	.priority_levels = i915_drm_priority_levels,
 	.active_time_us = i915_drm_cgroup_get_active_time_us,
+	.signal_budget = i915_drm_cgroup_signal_budget,
 };
 #endif
 
diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
index 8527fe80d449..ce497055cc3f 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/ktime.h>
 #include <linux/slab.h>
 #include <linux/types.h>
 
@@ -173,6 +174,98 @@ u64 i915_drm_cgroup_get_active_time_us(struct drm_file *file)
 
 	return busy;
 }
+
+int i915_drm_cgroup_signal_budget(struct drm_file *file, u64 usage, u64 budget)
+{
+	struct drm_i915_file_private *fpriv = file->driver_priv;
+	u64 class_usage[I915_LAST_UABI_ENGINE_CLASS + 1];
+	u64 class_last[I915_LAST_UABI_ENGINE_CLASS + 1];
+	struct drm_i915_private *i915 = fpriv->dev_priv;
+	struct i915_drm_client *client = fpriv->client;
+	struct intel_engine_cs *engine;
+	bool over = usage > budget;
+	unsigned int i;
+	ktime_t unused;
+	int ret = 0;
+	u64 t;
+
+	if (!supports_stats(i915))
+		return -EINVAL;
+
+	if (usage == 0 && budget == 0)
+		return 0;
+
+printk("i915_drm_cgroup_signal_budget client-id=%u over=%u (%llu/%llu) <%u>\n",
+       client->id, over, usage, budget, client->over_budget);
+
+	if (over) {
+		client->over_budget++;
+		if (!client->over_budget)
+			client->over_budget = 2;
+	} else {
+		client->over_budget = 0;
+		memset(client->class_last, 0, sizeof(client->class_last));
+		memset(client->throttle, 0, sizeof(client->throttle));
+		return 0;
+	}
+
+	memset(class_usage, 0, sizeof(class_usage));
+	for_each_uabi_engine(engine, i915)
+		class_usage[engine->uabi_class] +=
+			ktime_to_ns(intel_engine_get_busy_time(engine, &unused));
+
+	memcpy(class_last, client->class_last, sizeof(class_last));
+	memcpy(client->class_last, class_usage, sizeof(class_last));
+
+	for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++)
+		class_usage[i] -= class_last[i];
+
+	t = client->last;
+	client->last = ktime_get_raw_ns();
+	t = client->last - t;
+
+	if (client->over_budget == 1)
+		return 0;
+
+	for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++) {
+		u64 client_class_usage[I915_LAST_UABI_ENGINE_CLASS + 1];
+		unsigned int capacity;
+
+		if (!i915->engine_uabi_class_count[i])
+			continue;
+
+		t = DIV_ROUND_UP_ULL(t, 1000);
+		class_usage[i] = DIV_ROUND_CLOSEST_ULL(class_usage[i], 1000);
+		usage = DIV_ROUND_CLOSEST_ULL(class_usage[i] * 100ULL,
+					      t *
+					      i915->engine_uabi_class_count[i]);
+		if (usage <= 95) {
+			/* class not oversubsribed */
+			if (client->throttle[i]) {
+				client->throttle[i] = false;
+printk("  UN-throttling class%u (phys=%lld%%)\n",
+       i, usage);
+			}
+			continue;
+		}
+
+		client_class_usage[i] =
+			get_class_active_ns(client, i, &capacity);
+
+		if (client_class_usage[i] && !client->throttle[i]) {
+			ret |= 1;
+			client->throttle[i] = true;
+			/*
+			 * QQQ maybe apply "strength" of throttling based on
+			 * usage/budget?
+			 */
+printk("  THROTTLING class%u (phys=%lld%% client=%lluus)\n",
+       i, usage, client_class_usage[i] / 1000);
+		}
+	}
+
+	return ret;
+}
 #endif
 
 #ifdef CONFIG_PROC_FS
diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
index 99b8ae01c183..b05afe01e68e 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.h
+++ b/drivers/gpu/drm/i915/i915_drm_client.h
@@ -40,6 +40,13 @@ struct i915_drm_client {
 	 * @past_runtime: Accumulation of pphwsp runtimes from closed contexts.
 	 */
 	atomic64_t past_runtime[I915_LAST_UABI_ENGINE_CLASS + 1];
+
+#ifdef CONFIG_CGROUP_DRM
+	bool throttle[I915_LAST_UABI_ENGINE_CLASS + 1];
+	unsigned int over_budget;
+	u64 last;
+	u64 class_last[I915_LAST_UABI_ENGINE_CLASS + 1];
+#endif
 };
 
 void i915_drm_clients_init(struct i915_drm_clients *clients,
@@ -70,5 +77,7 @@ void i915_drm_clients_fini(struct i915_drm_clients *clients);
 unsigned int i915_drm_priority_levels(struct drm_file *file);
 
 u64 i915_drm_cgroup_get_active_time_us(struct drm_file *file);
+int i915_drm_cgroup_signal_budget(struct drm_file *file,
+				  u64 usage, u64 budget);
 
 #endif /* !__I915_DRM_CLIENT_H__ */
-- 
2.34.1


  parent reply	other threads:[~2022-10-19 17:34 UTC|newest]

Thread overview: 86+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-19 17:32 [RFC 00/17] DRM scheduling cgroup controller Tvrtko Ursulin
2022-10-19 17:32 ` Tvrtko Ursulin
2022-10-19 17:32 ` [Intel-gfx] " Tvrtko Ursulin
2022-10-19 17:32 ` [RFC 01/17] cgroup: Add the DRM " Tvrtko Ursulin
2022-10-19 17:32   ` Tvrtko Ursulin
2022-10-19 17:32   ` [Intel-gfx] " Tvrtko Ursulin
2022-10-19 17:32 ` [RFC 02/17] drm: Track clients per owning process Tvrtko Ursulin
2022-10-19 17:32   ` Tvrtko Ursulin
2022-10-19 17:32   ` [Intel-gfx] " Tvrtko Ursulin
2022-10-20  6:40   ` Christian König
2022-10-20  6:40     ` Christian König
2022-10-20  6:40     ` [Intel-gfx] " Christian König
2022-10-20  7:34     ` Tvrtko Ursulin
2022-10-20  7:34       ` Tvrtko Ursulin
2022-10-20  7:34       ` [Intel-gfx] " Tvrtko Ursulin
2022-10-20 11:33       ` Christian König
2022-10-20 11:33         ` Christian König
2022-10-20 11:33         ` [Intel-gfx] " Christian König
2022-10-27 14:35         ` Tvrtko Ursulin
2022-10-27 14:35           ` Tvrtko Ursulin
2022-10-27 14:35           ` [Intel-gfx] " Tvrtko Ursulin
2022-10-19 17:32 ` [RFC 03/17] cgroup/drm: Support cgroup priority control Tvrtko Ursulin
2022-10-19 17:32   ` Tvrtko Ursulin
2022-10-19 17:32   ` [Intel-gfx] " Tvrtko Ursulin
2022-10-19 17:32 ` [RFC 04/17] drm/cgroup: Allow safe external access to file_priv Tvrtko Ursulin
2022-10-19 17:32   ` Tvrtko Ursulin
2022-10-19 17:32   ` [Intel-gfx] " Tvrtko Ursulin
2022-10-19 17:32 ` [RFC 05/17] drm: Connect priority updates to drm core Tvrtko Ursulin
2022-10-19 17:32   ` Tvrtko Ursulin
2022-10-19 17:32   ` [Intel-gfx] " Tvrtko Ursulin
2022-10-20  9:50   ` kernel test robot
2022-10-19 17:32 ` [RFC 06/17] drm: Only track clients which are providing drm_cgroup_ops Tvrtko Ursulin
2022-10-19 17:32   ` Tvrtko Ursulin
2022-10-19 17:32   ` [Intel-gfx] " Tvrtko Ursulin
2022-10-19 17:32 ` [Intel-gfx] [RFC 07/17] drm/i915: i915 priority Tvrtko Ursulin
2022-10-19 17:32   ` Tvrtko Ursulin
2022-10-19 17:32   ` Tvrtko Ursulin
2022-10-19 17:32 ` [RFC 08/17] drm: Allow for migration of clients Tvrtko Ursulin
2022-10-19 17:32   ` Tvrtko Ursulin
2022-10-19 17:32   ` [Intel-gfx] " Tvrtko Ursulin
2022-10-19 17:32 ` [RFC 09/17] cgroup/drm: Introduce weight based drm cgroup control Tvrtko Ursulin
2022-10-19 17:32   ` Tvrtko Ursulin
2022-10-19 17:32   ` [Intel-gfx] " Tvrtko Ursulin
2022-10-19 17:32 ` [RFC 10/17] drm: Add ability to query drm cgroup GPU time Tvrtko Ursulin
2022-10-19 17:32   ` Tvrtko Ursulin
2022-10-19 17:32   ` [Intel-gfx] " Tvrtko Ursulin
2022-10-19 17:32 ` [RFC 11/17] drm: Add over budget signalling callback Tvrtko Ursulin
2022-10-19 17:32   ` Tvrtko Ursulin
2022-10-19 17:32   ` [Intel-gfx] " Tvrtko Ursulin
2022-10-19 17:32 ` [RFC 12/17] cgroup/drm: Client exit hook Tvrtko Ursulin
2022-10-19 17:32   ` Tvrtko Ursulin
2022-10-19 17:32   ` [Intel-gfx] " Tvrtko Ursulin
2022-10-19 17:32 ` [RFC 13/17] cgroup/drm: Ability to periodically scan cgroups for over budget GPU usage Tvrtko Ursulin
2022-10-19 17:32   ` Tvrtko Ursulin
2022-10-19 17:32   ` [Intel-gfx] " Tvrtko Ursulin
2022-10-21 22:52   ` T.J. Mercier
2022-10-21 22:52     ` T.J. Mercier
2022-10-21 22:52     ` [Intel-gfx] " T.J. Mercier
2022-10-27 14:45     ` Tvrtko Ursulin
2022-10-27 14:45       ` Tvrtko Ursulin
2022-10-27 14:45       ` [Intel-gfx] " Tvrtko Ursulin
2022-10-19 17:32 ` [Intel-gfx] [RFC 14/17] cgroup/drm: Show group budget signaling capability in sysfs Tvrtko Ursulin
2022-10-19 17:32   ` Tvrtko Ursulin
2022-10-19 17:32   ` Tvrtko Ursulin
2022-10-19 17:32 ` [RFC 15/17] drm/i915: Migrate client to new owner on context create Tvrtko Ursulin
2022-10-19 17:32   ` Tvrtko Ursulin
2022-10-19 17:32   ` [Intel-gfx] " Tvrtko Ursulin
2022-10-19 17:32 ` [Intel-gfx] [RFC 16/17] drm/i915: Wire up with drm controller GPU time query Tvrtko Ursulin
2022-10-19 17:32   ` Tvrtko Ursulin
2022-10-19 17:32   ` Tvrtko Ursulin
2022-10-19 17:32 ` Tvrtko Ursulin [this message]
2022-10-19 17:32   ` [RFC 17/17] drm/i915: Implement cgroup controller over budget throttling Tvrtko Ursulin
2022-10-19 17:32   ` Tvrtko Ursulin
2022-10-19 18:45 ` [RFC 00/17] DRM scheduling cgroup controller Tejun Heo
2022-10-19 18:45   ` Tejun Heo
2022-10-19 18:45   ` [Intel-gfx] " Tejun Heo
2022-10-27 14:32   ` Tvrtko Ursulin
2022-10-27 14:32     ` Tvrtko Ursulin
2022-10-27 14:32     ` [Intel-gfx] " Tvrtko Ursulin
2022-10-31 20:20     ` Tejun Heo
2022-10-31 20:20       ` Tejun Heo
2022-10-31 20:20       ` [Intel-gfx] " Tejun Heo
2022-11-09 16:59       ` Tvrtko Ursulin
2022-11-09 16:59         ` Tvrtko Ursulin
2022-11-09 16:59         ` Tvrtko Ursulin
2022-10-19 19:25 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221019173254.3361334-18-tvrtko.ursulin@linux.intel.com \
    --to=tvrtko.ursulin@linux.intel.com \
    --cc=Intel-gfx@lists.freedesktop.org \
    --cc=Kenny.Ho@amd.com \
    --cc=airlied@redhat.com \
    --cc=cgroups@vger.kernel.org \
    --cc=christian.koenig@amd.com \
    --cc=daniel.vetter@ffwll.ch \
    --cc=hannes@cmpxchg.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lizefan.x@bytedance.com \
    --cc=marcheu@chromium.org \
    --cc=robdclark@chromium.org \
    --cc=tj@kernel.org \
    --cc=tjmercier@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.