All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Fix context ban and hang accounting for client
@ 2018-06-15 10:18 Mika Kuoppala
  2018-06-15 10:28 ` Chris Wilson
                   ` (6 more replies)
  0 siblings, 7 replies; 9+ messages in thread
From: Mika Kuoppala @ 2018-06-15 10:18 UTC (permalink / raw)
  To: intel-gfx

If client is smart or lucky enough to create a new context
after each hang, our context banning mechanism will never
catch up, and as a result of that it will be saved from
client banning. This can result in a never ending streak of
gpu hangs caused by bad or malicious client, preventing
access from other legit gpu clients.

Fix this by always incrementing per client ban score if
it hangs in short successions regardless of context ban
scoring. The exception are non bannable contexts. They remain
detached from client ban scoring mechanism.

v2: xchg timestamp, tidyup (Chris)

Fixes: b083a0870c79 ("drm/i915: Add per client max context ban limit")
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         | 20 ++++++---
 drivers/gpu/drm/i915/i915_gem.c         | 57 +++++++++++++++++--------
 drivers/gpu/drm/i915/i915_gem_context.c |  2 +-
 3 files changed, 54 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 74dd88d8563e..93aa8e7dfaba 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -352,14 +352,20 @@ struct drm_i915_file_private {
 
 	unsigned int bsd_engine;
 
-/* Client can have a maximum of 3 contexts banned before
- * it is denied of creating new contexts. As one context
- * ban needs 4 consecutive hangs, and more if there is
- * progress in between, this is a last resort stop gap measure
- * to limit the badly behaving clients access to gpu.
+/* Every context ban increments per client ban score. Also
+ * hangs in short succession increments ban score. If client suffers 3
+ * context bans, 9 hangs in quick succession or combination of those,
+ * it is banned and submitting more work will fail. This is a stop gap
+ * measure to limit the badly behaving clients access to gpu.
+ * Note that unbannable contexts never increment the client ban score.
  */
-#define I915_MAX_CLIENT_CONTEXT_BANS 3
-	atomic_t context_bans;
+#define I915_CLIENT_SCORE_HANG_FAST	1
+#define   I915_CLIENT_FAST_HANG_JIFFIES (60 * HZ)
+#define I915_CLIENT_SCORE_CONTEXT_BAN   3
+#define I915_CLIENT_SCORE_BANNED	9
+	/** ban_score: Accumulated score of all ctx bans and fast hangs. */
+	atomic_t ban_score;
+	unsigned long hang_timestamp;
 };
 
 /* Interface history:
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8dd4d35655af..f06fe1c636e5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2941,32 +2941,54 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
 	return 0;
 }
 
+static void i915_gem_client_mark_guilty(struct drm_i915_file_private *file_priv,
+					const struct i915_gem_context *ctx)
+{
+	unsigned int score;
+	unsigned long prev_hang;
+
+	if (i915_gem_context_is_banned(ctx))
+		score = I915_CLIENT_SCORE_CONTEXT_BAN;
+	else
+		score = 0;
+
+	prev_hang = xchg(&file_priv->hang_timestamp, jiffies);
+	if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES))
+		score += I915_CLIENT_SCORE_HANG_FAST;
+
+	if (score) {
+		atomic_add(score, &file_priv->ban_score);
+
+		DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n",
+				 ctx->name, score,
+				 atomic_read(&file_priv->ban_score));
+	}
+}
+
 static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx)
 {
-	bool banned;
+	unsigned int score;
+	bool banned, bannable;
 
 	atomic_inc(&ctx->guilty_count);
 
-	banned = false;
-	if (i915_gem_context_is_bannable(ctx)) {
-		unsigned int score;
+	bannable = i915_gem_context_is_bannable(ctx);
+	score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score);
+	banned = score >= CONTEXT_SCORE_BAN_THRESHOLD;
 
-		score = atomic_add_return(CONTEXT_SCORE_GUILTY,
-					  &ctx->ban_score);
-		banned = score >= CONTEXT_SCORE_BAN_THRESHOLD;
+	DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, ban %s:%s\n",
+			 ctx->name, atomic_read(&ctx->guilty_count),
+			 score, yesno(bannable), yesno(banned));
 
-		DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n",
-				 ctx->name, score, yesno(banned));
-	}
-	if (!banned)
+	/* Cool contexts don't accumulate client ban score */
+	if (!bannable)
 		return;
 
-	i915_gem_context_set_banned(ctx);
-	if (!IS_ERR_OR_NULL(ctx->file_priv)) {
-		atomic_inc(&ctx->file_priv->context_bans);
-		DRM_DEBUG_DRIVER("client %s has had %d context banned\n",
-				 ctx->name, atomic_read(&ctx->file_priv->context_bans));
-	}
+	if (banned)
+		i915_gem_context_set_banned(ctx);
+
+	if (!IS_ERR_OR_NULL(ctx->file_priv))
+		i915_gem_client_mark_guilty(ctx->file_priv, ctx);
 }
 
 static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
@@ -5820,6 +5842,7 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
 	INIT_LIST_HEAD(&file_priv->mm.request_list);
 
 	file_priv->bsd_engine = -1;
+	file_priv->hang_timestamp = jiffies;
 
 	ret = i915_gem_context_open(i915, file);
 	if (ret)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index ef6ea4bcd773..ccf463ab6562 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -708,7 +708,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915)
 
 static bool client_is_banned(struct drm_i915_file_private *file_priv)
 {
-	return atomic_read(&file_priv->context_bans) > I915_MAX_CLIENT_CONTEXT_BANS;
+	return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED;
 }
 
 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/i915: Fix context ban and hang accounting for client
  2018-06-15 10:18 [PATCH] drm/i915: Fix context ban and hang accounting for client Mika Kuoppala
@ 2018-06-15 10:28 ` Chris Wilson
  2018-06-15 13:41   ` Mika Kuoppala
  2018-06-15 10:44 ` Mika Kuoppala
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2018-06-15 10:28 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2018-06-15 11:18:28)
> If client is smart or lucky enough to create a new context
> after each hang, our context banning mechanism will never
> catch up, and as a result of that it will be saved from
> client banning. This can result in a never ending streak of
> gpu hangs caused by bad or malicious client, preventing
> access from other legit gpu clients.
> 
> Fix this by always incrementing per client ban score if
> it hangs in short successions regardless of context ban
> scoring. The exception are non bannable contexts. They remain
> detached from client ban scoring mechanism.
> 
> v2: xchg timestamp, tidyup (Chris)
> 
> Fixes: b083a0870c79 ("drm/i915: Add per client max context ban limit")
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h         | 20 ++++++---
>  drivers/gpu/drm/i915/i915_gem.c         | 57 +++++++++++++++++--------
>  drivers/gpu/drm/i915/i915_gem_context.c |  2 +-
>  3 files changed, 54 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 74dd88d8563e..93aa8e7dfaba 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -352,14 +352,20 @@ struct drm_i915_file_private {
>  
>         unsigned int bsd_engine;
>  
> -/* Client can have a maximum of 3 contexts banned before
> - * it is denied of creating new contexts. As one context
> - * ban needs 4 consecutive hangs, and more if there is
> - * progress in between, this is a last resort stop gap measure
> - * to limit the badly behaving clients access to gpu.
> +/* Every context ban increments per client ban score. Also

/*
 * Every

One day we'll have rewritten every line of code, and every comment.

> + * hangs in short succession increments ban score. If client suffers 3
> + * context bans, 9 hangs in quick succession or combination of those,

Leave out the numbers if possible, just explain the rationale of the
multilevel system.

> + * it is banned and submitting more work will fail. This is a stop gap
> + * measure to limit the badly behaving clients access to gpu.
> + * Note that unbannable contexts never increment the client ban score.
>   */
> -#define I915_MAX_CLIENT_CONTEXT_BANS 3
> -       atomic_t context_bans;
> +#define I915_CLIENT_SCORE_HANG_FAST    1
> +#define   I915_CLIENT_FAST_HANG_JIFFIES (60 * HZ)
> +#define I915_CLIENT_SCORE_CONTEXT_BAN   3
> +#define I915_CLIENT_SCORE_BANNED       9
> +       /** ban_score: Accumulated score of all ctx bans and fast hangs. */
> +       atomic_t ban_score;
> +       unsigned long hang_timestamp;
>  };
>  
>  /* Interface history:
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 8dd4d35655af..f06fe1c636e5 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2941,32 +2941,54 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
>         return 0;
>  }
>  
> +static void i915_gem_client_mark_guilty(struct drm_i915_file_private *file_priv,
> +                                       const struct i915_gem_context *ctx)
> +{
> +       unsigned int score;
> +       unsigned long prev_hang;
> +
> +       if (i915_gem_context_is_banned(ctx))
> +               score = I915_CLIENT_SCORE_CONTEXT_BAN;
> +       else
> +               score = 0;
> +
> +       prev_hang = xchg(&file_priv->hang_timestamp, jiffies);
> +       if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES))
> +               score += I915_CLIENT_SCORE_HANG_FAST;
> +
> +       if (score) {
> +               atomic_add(score, &file_priv->ban_score);
> +
> +               DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n",
> +                                ctx->name, score,
> +                                atomic_read(&file_priv->ban_score));
> +       }
> +}
> +
>  static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx)
>  {
> -       bool banned;
> +       unsigned int score;
> +       bool banned, bannable;
>  
>         atomic_inc(&ctx->guilty_count);
>  
> -       banned = false;
> -       if (i915_gem_context_is_bannable(ctx)) {
> -               unsigned int score;
> +       bannable = i915_gem_context_is_bannable(ctx);
> +       score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score);
> +       banned = score >= CONTEXT_SCORE_BAN_THRESHOLD;
>  
> -               score = atomic_add_return(CONTEXT_SCORE_GUILTY,
> -                                         &ctx->ban_score);
> -               banned = score >= CONTEXT_SCORE_BAN_THRESHOLD;
> +       DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, ban %s:%s\n",
> +                        ctx->name, atomic_read(&ctx->guilty_count),
> +                        score, yesno(bannable), yesno(banned));

ban: no:yes

Wut? Maybe just yesno(banned && bannable) as even debug messages
shouldn't strive to confuse us further.

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH] drm/i915: Fix context ban and hang accounting for client
  2018-06-15 10:18 [PATCH] drm/i915: Fix context ban and hang accounting for client Mika Kuoppala
  2018-06-15 10:28 ` Chris Wilson
@ 2018-06-15 10:44 ` Mika Kuoppala
  2018-06-15 10:44 ` ✗ Fi.CI.SPARSE: warning for " Patchwork
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 9+ messages in thread
From: Mika Kuoppala @ 2018-06-15 10:44 UTC (permalink / raw)
  To: intel-gfx

If client is smart or lucky enough to create a new context
after each hang, our context banning mechanism will never
catch up, and as a result of that it will be saved from
client banning. This can result in a never ending streak of
gpu hangs caused by bad or malicious client, preventing
access from other legit gpu clients.

Fix this by always incrementing per client ban score if
it hangs in short successions regardless of context ban
scoring. The exception are non bannable contexts. They remain
detached from client ban scoring mechanism.

v2: xchg timestamp, tidyup (Chris)
v3: comment, bannable & banned together (Chris)

Fixes: b083a0870c79 ("drm/i915: Add per client max context ban limit")
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h         | 21 ++++++---
 drivers/gpu/drm/i915/i915_gem.c         | 57 +++++++++++++++++--------
 drivers/gpu/drm/i915/i915_gem_context.c |  2 +-
 3 files changed, 55 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 74dd88d8563e..082e84ff0881 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -352,14 +352,21 @@ struct drm_i915_file_private {
 
 	unsigned int bsd_engine;
 
-/* Client can have a maximum of 3 contexts banned before
- * it is denied of creating new contexts. As one context
- * ban needs 4 consecutive hangs, and more if there is
- * progress in between, this is a last resort stop gap measure
- * to limit the badly behaving clients access to gpu.
+/*
+ * Every context ban increments per client ban score. Also
+ * hangs in short succession increments ban score. If ban threshold
+ * is reached, client is considered banned and submitting more work
+ * will fail. This is a stop gap measure to limit the badly behaving
+ * clients access to gpu. Note that unbannable contexts never increment
+ * the client ban score.
  */
-#define I915_MAX_CLIENT_CONTEXT_BANS 3
-	atomic_t context_bans;
+#define I915_CLIENT_SCORE_HANG_FAST	1
+#define   I915_CLIENT_FAST_HANG_JIFFIES (60 * HZ)
+#define I915_CLIENT_SCORE_CONTEXT_BAN   3
+#define I915_CLIENT_SCORE_BANNED	9
+	/** ban_score: Accumulated score of all ctx bans and fast hangs. */
+	atomic_t ban_score;
+	unsigned long hang_timestamp;
 };
 
 /* Interface history:
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8dd4d35655af..977982a987c8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2941,32 +2941,54 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
 	return 0;
 }
 
+static void i915_gem_client_mark_guilty(struct drm_i915_file_private *file_priv,
+					const struct i915_gem_context *ctx)
+{
+	unsigned int score;
+	unsigned long prev_hang;
+
+	if (i915_gem_context_is_banned(ctx))
+		score = I915_CLIENT_SCORE_CONTEXT_BAN;
+	else
+		score = 0;
+
+	prev_hang = xchg(&file_priv->hang_timestamp, jiffies);
+	if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES))
+		score += I915_CLIENT_SCORE_HANG_FAST;
+
+	if (score) {
+		atomic_add(score, &file_priv->ban_score);
+
+		DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n",
+				 ctx->name, score,
+				 atomic_read(&file_priv->ban_score));
+	}
+}
+
 static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx)
 {
-	bool banned;
+	unsigned int score;
+	bool banned, bannable;
 
 	atomic_inc(&ctx->guilty_count);
 
-	banned = false;
-	if (i915_gem_context_is_bannable(ctx)) {
-		unsigned int score;
+	bannable = i915_gem_context_is_bannable(ctx);
+	score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score);
+	banned = score >= CONTEXT_SCORE_BAN_THRESHOLD;
 
-		score = atomic_add_return(CONTEXT_SCORE_GUILTY,
-					  &ctx->ban_score);
-		banned = score >= CONTEXT_SCORE_BAN_THRESHOLD;
+	DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, ban %s\n",
+			 ctx->name, atomic_read(&ctx->guilty_count),
+			 score, yesno(banned && bannable));
 
-		DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n",
-				 ctx->name, score, yesno(banned));
-	}
-	if (!banned)
+	/* Cool contexts don't accumulate client ban score */
+	if (!bannable)
 		return;
 
-	i915_gem_context_set_banned(ctx);
-	if (!IS_ERR_OR_NULL(ctx->file_priv)) {
-		atomic_inc(&ctx->file_priv->context_bans);
-		DRM_DEBUG_DRIVER("client %s has had %d context banned\n",
-				 ctx->name, atomic_read(&ctx->file_priv->context_bans));
-	}
+	if (banned)
+		i915_gem_context_set_banned(ctx);
+
+	if (!IS_ERR_OR_NULL(ctx->file_priv))
+		i915_gem_client_mark_guilty(ctx->file_priv, ctx);
 }
 
 static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
@@ -5820,6 +5842,7 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
 	INIT_LIST_HEAD(&file_priv->mm.request_list);
 
 	file_priv->bsd_engine = -1;
+	file_priv->hang_timestamp = jiffies;
 
 	ret = i915_gem_context_open(i915, file);
 	if (ret)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index ef6ea4bcd773..ccf463ab6562 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -708,7 +708,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915)
 
 static bool client_is_banned(struct drm_i915_file_private *file_priv)
 {
-	return atomic_read(&file_priv->context_bans) > I915_MAX_CLIENT_CONTEXT_BANS;
+	return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED;
 }
 
 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* ✗ Fi.CI.SPARSE: warning for drm/i915: Fix context ban and hang accounting for client
  2018-06-15 10:18 [PATCH] drm/i915: Fix context ban and hang accounting for client Mika Kuoppala
  2018-06-15 10:28 ` Chris Wilson
  2018-06-15 10:44 ` Mika Kuoppala
@ 2018-06-15 10:44 ` Patchwork
  2018-06-15 11:03 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2018-06-15 10:44 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Fix context ban and hang accounting for client
URL   : https://patchwork.freedesktop.org/series/44820/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Commit: drm/i915: Fix context ban and hang accounting for client
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3683:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3689:16: warning: expression using sizeof(void)

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915: Fix context ban and hang accounting for client
  2018-06-15 10:18 [PATCH] drm/i915: Fix context ban and hang accounting for client Mika Kuoppala
                   ` (2 preceding siblings ...)
  2018-06-15 10:44 ` ✗ Fi.CI.SPARSE: warning for " Patchwork
@ 2018-06-15 11:03 ` Patchwork
  2018-06-15 11:47 ` ✗ Fi.CI.SPARSE: warning for drm/i915: Fix context ban and hang accounting for client (rev2) Patchwork
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2018-06-15 11:03 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Fix context ban and hang accounting for client
URL   : https://patchwork.freedesktop.org/series/44820/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4323 -> Patchwork_9323 =

== Summary - WARNING ==

  Minor unknown changes coming with Patchwork_9323 need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_9323, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/44820/revisions/1/mbox/

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_9323:

  === IGT changes ===

    ==== Warnings ====

    igt@gem_exec_gttfill@basic:
      fi-pnv-d510:        PASS -> SKIP

    
== Known issues ==

  Here are the changes found in Patchwork_9323 that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@gem_exec_gttfill@basic:
      fi-byt-n2820:       PASS -> FAIL (fdo#106744)

    igt@kms_flip@basic-flip-vs-modeset:
      fi-glk-j4005:       PASS -> DMESG-WARN (fdo#106000)

    igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b:
      fi-glk-j4005:       PASS -> DMESG-WARN (fdo#106097)

    
    ==== Possible fixes ====

    igt@kms_flip@basic-flip-vs-wf_vblank:
      fi-glk-j4005:       FAIL (fdo#100368) -> PASS

    igt@kms_flip@basic-plain-flip:
      fi-glk-j4005:       DMESG-WARN (fdo#106097) -> PASS

    
  fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
  fdo#106000 https://bugs.freedesktop.org/show_bug.cgi?id=106000
  fdo#106097 https://bugs.freedesktop.org/show_bug.cgi?id=106097
  fdo#106744 https://bugs.freedesktop.org/show_bug.cgi?id=106744


== Participating hosts (43 -> 38) ==

  Missing    (5): fi-ctg-p8600 fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-hsw-4200u 


== Build changes ==

    * Linux: CI_DRM_4323 -> Patchwork_9323

  CI_DRM_4323: 25d3805133071406ffae77c994f464dbbb3bb34e @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4519: 3381a56be31defb3b5c23a4fbc19ac26a000c35b @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9323: e9aa4424223e3105de81fe8274087163cc809619 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

e9aa4424223e drm/i915: Fix context ban and hang accounting for client

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9323/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* ✗ Fi.CI.SPARSE: warning for drm/i915: Fix context ban and hang accounting for client (rev2)
  2018-06-15 10:18 [PATCH] drm/i915: Fix context ban and hang accounting for client Mika Kuoppala
                   ` (3 preceding siblings ...)
  2018-06-15 11:03 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-06-15 11:47 ` Patchwork
  2018-06-15 12:06 ` ✓ Fi.CI.BAT: success " Patchwork
  2018-06-15 18:20 ` ✗ Fi.CI.IGT: failure " Patchwork
  6 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2018-06-15 11:47 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Fix context ban and hang accounting for client (rev2)
URL   : https://patchwork.freedesktop.org/series/44820/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Commit: drm/i915: Fix context ban and hang accounting for client
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3683:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3690:16: warning: expression using sizeof(void)

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915: Fix context ban and hang accounting for client (rev2)
  2018-06-15 10:18 [PATCH] drm/i915: Fix context ban and hang accounting for client Mika Kuoppala
                   ` (4 preceding siblings ...)
  2018-06-15 11:47 ` ✗ Fi.CI.SPARSE: warning for drm/i915: Fix context ban and hang accounting for client (rev2) Patchwork
@ 2018-06-15 12:06 ` Patchwork
  2018-06-15 18:20 ` ✗ Fi.CI.IGT: failure " Patchwork
  6 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2018-06-15 12:06 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Fix context ban and hang accounting for client (rev2)
URL   : https://patchwork.freedesktop.org/series/44820/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4323 -> Patchwork_9325 =

== Summary - WARNING ==

  Minor unknown changes coming with Patchwork_9325 need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_9325, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/44820/revisions/2/mbox/

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_9325:

  === IGT changes ===

    ==== Warnings ====

    igt@kms_pipe_crc_basic@read-crc-pipe-c-frame-sequence:
      fi-glk-j4005:       PASS -> SKIP

    
== Known issues ==

  Here are the changes found in Patchwork_9325 that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@gem_exec_gttfill@basic:
      fi-byt-n2820:       PASS -> FAIL (fdo#106744)

    igt@kms_addfb_basic@invalid-set-prop-any:
      fi-glk-j4005:       PASS -> DMESG-WARN (fdo#106745)

    igt@kms_flip@basic-flip-vs-modeset:
      fi-glk-j4005:       PASS -> DMESG-WARN (fdo#106097, fdo#106000) +1

    
    ==== Possible fixes ====

    igt@kms_flip@basic-flip-vs-wf_vblank:
      fi-glk-j4005:       FAIL (fdo#100368) -> PASS

    igt@kms_flip@basic-plain-flip:
      fi-glk-j4005:       DMESG-WARN (fdo#106097) -> PASS

    
  fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
  fdo#106000 https://bugs.freedesktop.org/show_bug.cgi?id=106000
  fdo#106097 https://bugs.freedesktop.org/show_bug.cgi?id=106097
  fdo#106744 https://bugs.freedesktop.org/show_bug.cgi?id=106744
  fdo#106745 https://bugs.freedesktop.org/show_bug.cgi?id=106745


== Participating hosts (43 -> 38) ==

  Missing    (5): fi-ctg-p8600 fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-hsw-4200u 


== Build changes ==

    * Linux: CI_DRM_4323 -> Patchwork_9325

  CI_DRM_4323: 25d3805133071406ffae77c994f464dbbb3bb34e @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4519: 3381a56be31defb3b5c23a4fbc19ac26a000c35b @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9325: 1cdd28f59475c5776076afbbfeca9c72c7f94328 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

1cdd28f59475 drm/i915: Fix context ban and hang accounting for client

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9325/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/i915: Fix context ban and hang accounting for client
  2018-06-15 10:28 ` Chris Wilson
@ 2018-06-15 13:41   ` Mika Kuoppala
  0 siblings, 0 replies; 9+ messages in thread
From: Mika Kuoppala @ 2018-06-15 13:41 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2018-06-15 11:18:28)
>> If client is smart or lucky enough to create a new context
>> after each hang, our context banning mechanism will never
>> catch up, and as a result of that it will be saved from
>> client banning. This can result in a never ending streak of
>> gpu hangs caused by bad or malicious client, preventing
>> access from other legit gpu clients.
>> 
>> Fix this by always incrementing per client ban score if
>> it hangs in short successions regardless of context ban
>> scoring. The exception are non bannable contexts. They remain
>> detached from client ban scoring mechanism.
>> 
>> v2: xchg timestamp, tidyup (Chris)
>> 
>> Fixes: b083a0870c79 ("drm/i915: Add per client max context ban limit")
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>> ---
>>  drivers/gpu/drm/i915/i915_drv.h         | 20 ++++++---
>>  drivers/gpu/drm/i915/i915_gem.c         | 57 +++++++++++++++++--------
>>  drivers/gpu/drm/i915/i915_gem_context.c |  2 +-
>>  3 files changed, 54 insertions(+), 25 deletions(-)
>> 
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 74dd88d8563e..93aa8e7dfaba 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -352,14 +352,20 @@ struct drm_i915_file_private {
>>  
>>         unsigned int bsd_engine;
>>  
>> -/* Client can have a maximum of 3 contexts banned before
>> - * it is denied of creating new contexts. As one context
>> - * ban needs 4 consecutive hangs, and more if there is
>> - * progress in between, this is a last resort stop gap measure
>> - * to limit the badly behaving clients access to gpu.
>> +/* Every context ban increments per client ban score. Also
>
> /*
>  * Every
>
> One day we'll have rewritten every line of code, and every comment.
>
>> + * hangs in short succession increments ban score. If client suffers 3
>> + * context bans, 9 hangs in quick succession or combination of those,
>
> Leave out the numbers if possible, just explain the rationale of the
> multilevel system.
>
>> + * it is banned and submitting more work will fail. This is a stop gap
>> + * measure to limit the badly behaving clients access to gpu.
>> + * Note that unbannable contexts never increment the client ban score.
>>   */
>> -#define I915_MAX_CLIENT_CONTEXT_BANS 3
>> -       atomic_t context_bans;
>> +#define I915_CLIENT_SCORE_HANG_FAST    1
>> +#define   I915_CLIENT_FAST_HANG_JIFFIES (60 * HZ)
>> +#define I915_CLIENT_SCORE_CONTEXT_BAN   3
>> +#define I915_CLIENT_SCORE_BANNED       9
>> +       /** ban_score: Accumulated score of all ctx bans and fast hangs. */
>> +       atomic_t ban_score;
>> +       unsigned long hang_timestamp;
>>  };
>>  
>>  /* Interface history:
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>> index 8dd4d35655af..f06fe1c636e5 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -2941,32 +2941,54 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
>>         return 0;
>>  }
>>  
>> +static void i915_gem_client_mark_guilty(struct drm_i915_file_private *file_priv,
>> +                                       const struct i915_gem_context *ctx)
>> +{
>> +       unsigned int score;
>> +       unsigned long prev_hang;
>> +
>> +       if (i915_gem_context_is_banned(ctx))
>> +               score = I915_CLIENT_SCORE_CONTEXT_BAN;
>> +       else
>> +               score = 0;
>> +
>> +       prev_hang = xchg(&file_priv->hang_timestamp, jiffies);
>> +       if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES))
>> +               score += I915_CLIENT_SCORE_HANG_FAST;
>> +
>> +       if (score) {
>> +               atomic_add(score, &file_priv->ban_score);
>> +
>> +               DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n",
>> +                                ctx->name, score,
>> +                                atomic_read(&file_priv->ban_score));
>> +       }
>> +}
>> +
>>  static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx)
>>  {
>> -       bool banned;
>> +       unsigned int score;
>> +       bool banned, bannable;
>>  
>>         atomic_inc(&ctx->guilty_count);
>>  
>> -       banned = false;
>> -       if (i915_gem_context_is_bannable(ctx)) {
>> -               unsigned int score;
>> +       bannable = i915_gem_context_is_bannable(ctx);
>> +       score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score);
>> +       banned = score >= CONTEXT_SCORE_BAN_THRESHOLD;
>>  
>> -               score = atomic_add_return(CONTEXT_SCORE_GUILTY,
>> -                                         &ctx->ban_score);
>> -               banned = score >= CONTEXT_SCORE_BAN_THRESHOLD;
>> +       DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, ban %s:%s\n",
>> +                        ctx->name, atomic_read(&ctx->guilty_count),
>> +                        score, yesno(bannable), yesno(banned));
>
> ban: no:yes
>
> Wut? Maybe just yesno(banned && bannable) as even debug messages
> shouldn't strive to confuse us further.
>
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

Fixed the above and pushed. Thanks for review!
-Mika
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* ✗ Fi.CI.IGT: failure for drm/i915: Fix context ban and hang accounting for client (rev2)
  2018-06-15 10:18 [PATCH] drm/i915: Fix context ban and hang accounting for client Mika Kuoppala
                   ` (5 preceding siblings ...)
  2018-06-15 12:06 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-06-15 18:20 ` Patchwork
  6 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2018-06-15 18:20 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Fix context ban and hang accounting for client (rev2)
URL   : https://patchwork.freedesktop.org/series/44820/
State : failure

== Summary ==

= CI Bug Log - changes from CI_DRM_4323_full -> Patchwork_9325_full =

== Summary - FAILURE ==

  Serious unknown changes coming with Patchwork_9325_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_9325_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_9325_full:

  === IGT changes ===

    ==== Possible regressions ====

    igt@drv_selftest@live_hangcheck:
      shard-kbl:          PASS -> DMESG-FAIL

    igt@gem_eio@reset-stress:
      shard-glk:          PASS -> FAIL
      shard-hsw:          PASS -> FAIL
      shard-kbl:          PASS -> FAIL
      shard-snb:          PASS -> FAIL
      shard-apl:          PASS -> FAIL

    
    ==== Warnings ====

    igt@gem_exec_schedule@deep-bsd1:
      shard-kbl:          PASS -> SKIP +1

    igt@pm_rc6_residency@rc6-accuracy:
      shard-snb:          SKIP -> PASS

    
== Known issues ==

  Here are the changes found in Patchwork_9325_full that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@drv_suspend@shrink:
      shard-snb:          PASS -> FAIL (fdo#106886)

    igt@gem_exec_suspend@basic-s3:
      shard-kbl:          PASS -> INCOMPLETE (fdo#103665)

    igt@kms_atomic_transition@1x-modeset-transitions-nonblocking:
      shard-glk:          PASS -> FAIL (fdo#105703)

    igt@kms_cursor_crc@cursor-64x64-suspend:
      shard-snb:          PASS -> DMESG-WARN (fdo#102365)

    igt@kms_flip@2x-flip-vs-expired-vblank-interruptible:
      shard-glk:          PASS -> FAIL (fdo#105363)

    igt@kms_flip_tiling@flip-x-tiled:
      shard-glk:          PASS -> FAIL (fdo#103822, fdo#104724) +1

    igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b:
      shard-snb:          PASS -> INCOMPLETE (fdo#105411)

    
    ==== Possible fixes ====

    igt@gem_ppgtt@blt-vs-render-ctx0:
      shard-kbl:          INCOMPLETE (fdo#106023, fdo#103665) -> PASS

    igt@kms_atomic_transition@1x-modeset-transitions-nonblocking-fencing:
      shard-glk:          FAIL (fdo#105703) -> PASS

    igt@kms_flip_tiling@flip-y-tiled:
      shard-glk:          FAIL (fdo#103822, fdo#104724) -> PASS

    igt@kms_setmode@basic:
      shard-apl:          FAIL (fdo#99912) -> PASS

    
  fdo#102365 https://bugs.freedesktop.org/show_bug.cgi?id=102365
  fdo#103665 https://bugs.freedesktop.org/show_bug.cgi?id=103665
  fdo#103822 https://bugs.freedesktop.org/show_bug.cgi?id=103822
  fdo#104724 https://bugs.freedesktop.org/show_bug.cgi?id=104724
  fdo#105363 https://bugs.freedesktop.org/show_bug.cgi?id=105363
  fdo#105411 https://bugs.freedesktop.org/show_bug.cgi?id=105411
  fdo#105703 https://bugs.freedesktop.org/show_bug.cgi?id=105703
  fdo#106023 https://bugs.freedesktop.org/show_bug.cgi?id=106023
  fdo#106886 https://bugs.freedesktop.org/show_bug.cgi?id=106886
  fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912


== Participating hosts (5 -> 5) ==

  No changes in participating hosts


== Build changes ==

    * Linux: CI_DRM_4323 -> Patchwork_9325

  CI_DRM_4323: 25d3805133071406ffae77c994f464dbbb3bb34e @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4519: 3381a56be31defb3b5c23a4fbc19ac26a000c35b @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9325: 1cdd28f59475c5776076afbbfeca9c72c7f94328 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9325/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2018-06-15 18:20 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-15 10:18 [PATCH] drm/i915: Fix context ban and hang accounting for client Mika Kuoppala
2018-06-15 10:28 ` Chris Wilson
2018-06-15 13:41   ` Mika Kuoppala
2018-06-15 10:44 ` Mika Kuoppala
2018-06-15 10:44 ` ✗ Fi.CI.SPARSE: warning for " Patchwork
2018-06-15 11:03 ` ✓ Fi.CI.BAT: success " Patchwork
2018-06-15 11:47 ` ✗ Fi.CI.SPARSE: warning for drm/i915: Fix context ban and hang accounting for client (rev2) Patchwork
2018-06-15 12:06 ` ✓ Fi.CI.BAT: success " Patchwork
2018-06-15 18:20 ` ✗ Fi.CI.IGT: failure " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.