All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Fallback to reserve forcewake if primary ack missing
@ 2017-10-26 14:01 Mika Kuoppala
  2017-10-26 14:24 ` ✗ Fi.CI.BAT: failure for " Patchwork
                   ` (6 more replies)
  0 siblings, 7 replies; 13+ messages in thread
From: Mika Kuoppala @ 2017-10-26 14:01 UTC (permalink / raw)
  To: intel-gfx; +Cc: Rodrigo Vivi

There is a possibility on gen9 hardware to miss the forcewake ack
message. The recommended workaround is to use another free
bit and toggle it until original bit is successfully acknowledged.

The workaround is a bit misleadingly named as WaRsForcewakeAddDelayForAck.
The reason for naming is most likely that workaround was named before
the most recent recommendation evolved to use the reserve bit.

Some future gen9 revs might or might not fix the underlying issue but
the fallback to reserve bit dance can be considered as harmless:
without the ack timeout we never reach the reserve bit forcewake.
Thus as of now we adopt a blanket approach for all gen9 and leave
the bypassing the reserve bit approach for future patches if
corresponding hw revisions do appear.

Commit 83e3337204b2 ("drm/i915: Increase maximum polling time to 50ms
for forcewake request/clear ack") did increase the forcewake timeout.
If the issue was a delayed ack, future work could include finding
a suitable timeout value both for primary ack and reserve toggle
to reduce the worst case latency.

References: HSDES #1604254524
References: https://bugs.freedesktop.org/show_bug.cgi?id=102051
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h     |   5 +-
 drivers/gpu/drm/i915/intel_uncore.c | 112 +++++++++++++++++++++++++++++++++---
 2 files changed, 108 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f138eae82bf0..c04c634af5ae 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -7774,8 +7774,9 @@ enum {
 #define  FORCEWAKE_ACK_MEDIA_GEN9		_MMIO(0x0D88)
 #define  FORCEWAKE_ACK_RENDER_GEN9		_MMIO(0x0D84)
 #define  FORCEWAKE_ACK_BLITTER_GEN9		_MMIO(0x130044)
-#define   FORCEWAKE_KERNEL			0x1
-#define   FORCEWAKE_USER			0x2
+#define   FORCEWAKE_KERNEL			BIT(0)
+#define   FORCEWAKE_USER			BIT(1)
+#define   FORCEWAKE_RESERVE			BIT(12)
 #define  FORCEWAKE_MT_ACK			_MMIO(0x130040)
 #define  ECOBUS					_MMIO(0xa180)
 #define    FORCEWAKE_MT_ENABLE			(1<<5)
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 20e3c65c0999..fc6d090244c5 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -69,17 +69,80 @@ fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d)
 			       HRTIMER_MODE_REL);
 }
 
+static inline bool
+wait_ack_clear(const struct drm_i915_private *i915,
+	       const struct intel_uncore_forcewake_domain *d,
+	       const u32 ack)
+{
+	return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack) == 0,
+			       FORCEWAKE_ACK_TIMEOUT_MS);
+}
+
+static inline bool
+wait_ack_set(const struct drm_i915_private *i915,
+	     const struct intel_uncore_forcewake_domain *d,
+	     const u32 ack)
+{
+	return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack),
+			       FORCEWAKE_ACK_TIMEOUT_MS);
+}
+
 static inline void
 fw_domain_wait_ack_clear(const struct drm_i915_private *i915,
 			 const struct intel_uncore_forcewake_domain *d)
 {
-	if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) &
-			     FORCEWAKE_KERNEL) == 0,
-			    FORCEWAKE_ACK_TIMEOUT_MS))
+	if (wait_ack_clear(i915, d, FORCEWAKE_KERNEL))
 		DRM_ERROR("%s: timed out waiting for forcewake ack to clear.\n",
 			  intel_uncore_forcewake_domain_to_str(d->id));
 }
 
+enum ack_type {
+	ACK_CLEAR = 0,
+	ACK_SET
+};
+
+static bool
+fw_domain_reserve_fallback(const struct drm_i915_private *i915,
+			   const struct intel_uncore_forcewake_domain *d,
+			   const enum ack_type type)
+{
+	bool timeout;
+	int retry = 10;
+
+	/* Fallback to toggle another fw bit to wake up the gpu */
+	do {
+		wait_ack_clear(i915, d, FORCEWAKE_RESERVE);
+		__raw_i915_write32(i915, d->reg_set,
+				   _MASKED_BIT_ENABLE(FORCEWAKE_RESERVE));
+		wait_ack_set(i915, d, FORCEWAKE_RESERVE);
+
+		if (type == ACK_CLEAR)
+			timeout = wait_ack_clear(i915, d, FORCEWAKE_KERNEL);
+		else
+			timeout = wait_ack_set(i915, d, FORCEWAKE_KERNEL);
+
+		__raw_i915_write32(i915, d->reg_set,
+				   _MASKED_BIT_DISABLE(FORCEWAKE_RESERVE));
+	} while (timeout && --retry);
+
+	return timeout;
+}
+
+static inline void
+fw_domain_wait_ack_clear_reserve(const struct drm_i915_private *i915,
+				 const struct intel_uncore_forcewake_domain *d)
+{
+	bool timeout;
+
+	timeout = wait_ack_clear(i915, d, FORCEWAKE_KERNEL);
+	if (likely(!timeout))
+		return;
+
+	timeout = fw_domain_reserve_fallback(i915, d, ACK_CLEAR);
+	if (timeout)
+		fw_domain_wait_ack_clear(i915, d);
+}
+
 static inline void
 fw_domain_get(struct drm_i915_private *i915,
 	      const struct intel_uncore_forcewake_domain *d)
@@ -91,14 +154,27 @@ static inline void
 fw_domain_wait_ack(const struct drm_i915_private *i915,
 		   const struct intel_uncore_forcewake_domain *d)
 {
-	if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) &
-			     FORCEWAKE_KERNEL),
-			    FORCEWAKE_ACK_TIMEOUT_MS))
+	if (wait_ack_set(i915, d, FORCEWAKE_KERNEL))
 		DRM_ERROR("%s: timed out waiting for forcewake ack request.\n",
 			  intel_uncore_forcewake_domain_to_str(d->id));
 }
 
 static inline void
+fw_domain_wait_ack_set_reserve(const struct drm_i915_private *i915,
+			       const struct intel_uncore_forcewake_domain *d)
+{
+	bool timeout;
+
+	timeout = wait_ack_set(i915, d, FORCEWAKE_KERNEL);
+	if (likely(!timeout))
+		return;
+
+	timeout = fw_domain_reserve_fallback(i915, d, ACK_SET);
+	if (timeout)
+		fw_domain_wait_ack(i915, d);
+}
+
+static inline void
 fw_domain_put(const struct drm_i915_private *i915,
 	      const struct intel_uncore_forcewake_domain *d)
 {
@@ -125,6 +201,26 @@ fw_domains_get(struct drm_i915_private *i915, enum forcewake_domains fw_domains)
 }
 
 static void
+fw_domains_get_with_reserve(struct drm_i915_private *i915,
+			    enum forcewake_domains fw_domains)
+{
+	struct intel_uncore_forcewake_domain *d;
+	unsigned int tmp;
+
+	GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains);
+
+	for_each_fw_domain_masked(d, fw_domains, i915, tmp) {
+		fw_domain_wait_ack_clear_reserve(i915, d);
+		fw_domain_get(i915, d);
+	}
+
+	for_each_fw_domain_masked(d, fw_domains, i915, tmp)
+		fw_domain_wait_ack_set_reserve(i915, d);
+
+	i915->uncore.fw_domains_active |= fw_domains;
+}
+
+static void
 fw_domains_put(struct drm_i915_private *i915, enum forcewake_domains fw_domains)
 {
 	struct intel_uncore_forcewake_domain *d;
@@ -1142,7 +1238,9 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
 	}
 
 	if (INTEL_GEN(dev_priv) >= 9) {
-		dev_priv->uncore.funcs.force_wake_get = fw_domains_get;
+		/* WaRsForcewakeAddDelayForAck:skl,bxt,kbl,glk,cfl,cnl */
+		dev_priv->uncore.funcs.force_wake_get =
+			fw_domains_get_with_reserve;
 		dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
 		fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
 			       FORCEWAKE_RENDER_GEN9,
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* ✗ Fi.CI.BAT: failure for drm/i915: Fallback to reserve forcewake if primary ack missing
  2017-10-26 14:01 [PATCH] drm/i915: Fallback to reserve forcewake if primary ack missing Mika Kuoppala
@ 2017-10-26 14:24 ` Patchwork
  2017-10-26 14:34 ` [PATCH] " Chris Wilson
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 13+ messages in thread
From: Patchwork @ 2017-10-26 14:24 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Fallback to reserve forcewake if primary ack missing
URL   : https://patchwork.freedesktop.org/series/32694/
State : failure

== Summary ==

Series 32694v1 drm/i915: Fallback to reserve forcewake if primary ack missing
https://patchwork.freedesktop.org/api/1.0/series/32694/revisions/1/mbox/

Test kms_frontbuffer_tracking:
        Subgroup basic:
                pass       -> DMESG-WARN (fi-bdw-5557u) fdo#102473
Test kms_pipe_crc_basic:
        Subgroup read-crc-pipe-b-frame-sequence:
                skip       -> PASS       (fi-hsw-4770r) fdo#102332
        Subgroup suspend-read-crc-pipe-b:
                pass       -> DMESG-WARN (fi-byt-j1900) fdo#101705
                pass       -> INCOMPLETE (fi-cnl-y)

fdo#102473 https://bugs.freedesktop.org/show_bug.cgi?id=102473
fdo#102332 https://bugs.freedesktop.org/show_bug.cgi?id=102332
fdo#101705 https://bugs.freedesktop.org/show_bug.cgi?id=101705

fi-bdw-5557u     total:289  pass:267  dwarn:1   dfail:0   fail:0   skip:21  time:443s
fi-bdw-gvtdvm    total:289  pass:265  dwarn:0   dfail:0   fail:0   skip:24  time:454s
fi-blb-e6850     total:289  pass:223  dwarn:1   dfail:0   fail:0   skip:65  time:368s
fi-bsw-n3050     total:289  pass:243  dwarn:0   dfail:0   fail:0   skip:46  time:517s
fi-bwr-2160      total:289  pass:183  dwarn:0   dfail:0   fail:0   skip:106 time:263s
fi-bxt-dsi       total:289  pass:259  dwarn:0   dfail:0   fail:0   skip:30  time:495s
fi-bxt-j4205     total:289  pass:260  dwarn:0   dfail:0   fail:0   skip:29  time:493s
fi-byt-j1900     total:289  pass:253  dwarn:1   dfail:0   fail:0   skip:35  time:493s
fi-byt-n2820     total:289  pass:249  dwarn:1   dfail:0   fail:0   skip:39  time:484s
fi-cfl-s         total:289  pass:253  dwarn:4   dfail:0   fail:0   skip:32  time:548s
fi-cnl-y         total:246  pass:221  dwarn:0   dfail:0   fail:0   skip:24 
fi-elk-e7500     total:289  pass:229  dwarn:0   dfail:0   fail:0   skip:60  time:412s
fi-gdg-551       total:289  pass:178  dwarn:1   dfail:0   fail:1   skip:109 time:251s
fi-glk-1         total:289  pass:261  dwarn:0   dfail:0   fail:0   skip:28  time:586s
fi-glk-dsi       total:289  pass:258  dwarn:0   dfail:0   fail:1   skip:30  time:485s
fi-hsw-4770      total:289  pass:262  dwarn:0   dfail:0   fail:0   skip:27  time:431s
fi-hsw-4770r     total:289  pass:262  dwarn:0   dfail:0   fail:0   skip:27  time:425s
fi-ilk-650       total:289  pass:228  dwarn:0   dfail:0   fail:0   skip:61  time:429s
fi-ivb-3520m     total:289  pass:260  dwarn:0   dfail:0   fail:0   skip:29  time:491s
fi-ivb-3770      total:289  pass:260  dwarn:0   dfail:0   fail:0   skip:29  time:459s
fi-kbl-7500u     total:289  pass:264  dwarn:1   dfail:0   fail:0   skip:24  time:489s
fi-kbl-7560u     total:289  pass:270  dwarn:0   dfail:0   fail:0   skip:19  time:568s
fi-kbl-7567u     total:289  pass:269  dwarn:0   dfail:0   fail:0   skip:20  time:476s
fi-kbl-r         total:289  pass:262  dwarn:0   dfail:0   fail:0   skip:27  time:584s
fi-pnv-d510      total:289  pass:222  dwarn:1   dfail:0   fail:0   skip:66  time:544s
fi-skl-6260u     total:289  pass:269  dwarn:0   dfail:0   fail:0   skip:20  time:452s
fi-skl-6600u     total:289  pass:262  dwarn:0   dfail:0   fail:0   skip:27  time:586s
fi-skl-6700hq    total:289  pass:263  dwarn:0   dfail:0   fail:0   skip:26  time:649s
fi-skl-6700k     total:289  pass:265  dwarn:0   dfail:0   fail:0   skip:24  time:521s
fi-skl-6770hq    total:289  pass:269  dwarn:0   dfail:0   fail:0   skip:20  time:504s
fi-skl-gvtdvm    total:289  pass:266  dwarn:0   dfail:0   fail:0   skip:23  time:455s
fi-snb-2520m     total:289  pass:250  dwarn:0   dfail:0   fail:0   skip:39  time:559s
fi-snb-2600      total:289  pass:249  dwarn:0   dfail:0   fail:0   skip:40  time:418s

df3033b174059a59aa0c890f81de8af037abd11f drm-tip: 2017y-10m-26d-11h-03m-59s UTC integration manifest
85d6d3a5e337 drm/i915: Fallback to reserve forcewake if primary ack missing

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_6206/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/i915: Fallback to reserve forcewake if primary ack missing
  2017-10-26 14:01 [PATCH] drm/i915: Fallback to reserve forcewake if primary ack missing Mika Kuoppala
  2017-10-26 14:24 ` ✗ Fi.CI.BAT: failure for " Patchwork
@ 2017-10-26 14:34 ` Chris Wilson
  2017-10-27  6:34   ` Mika Kuoppala
  2017-10-27 14:04 ` Mika Kuoppala
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 13+ messages in thread
From: Chris Wilson @ 2017-10-26 14:34 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx; +Cc: Rodrigo Vivi

Quoting Mika Kuoppala (2017-10-26 15:01:44)
> There is a possibility on gen9 hardware to miss the forcewake ack
> message. The recommended workaround is to use another free
> bit and toggle it until original bit is successfully acknowledged.
> 
> The workaround is a bit misleadingly named as WaRsForcewakeAddDelayForAck.
> The reason for naming is most likely that workaround was named before
> the most recent recommendation evolved to use the reserve bit.
> 
> Some future gen9 revs might or might not fix the underlying issue but
> the fallback to reserve bit dance can be considered as harmless:
> without the ack timeout we never reach the reserve bit forcewake.
> Thus as of now we adopt a blanket approach for all gen9 and leave
> the bypassing the reserve bit approach for future patches if
> corresponding hw revisions do appear.
> 
> Commit 83e3337204b2 ("drm/i915: Increase maximum polling time to 50ms
> for forcewake request/clear ack") did increase the forcewake timeout.
> If the issue was a delayed ack, future work could include finding
> a suitable timeout value both for primary ack and reserve toggle
> to reduce the worst case latency.
> 
> References: HSDES #1604254524
> References: https://bugs.freedesktop.org/show_bug.cgi?id=102051
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_reg.h     |   5 +-
>  drivers/gpu/drm/i915/intel_uncore.c | 112 +++++++++++++++++++++++++++++++++---
>  2 files changed, 108 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index f138eae82bf0..c04c634af5ae 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -7774,8 +7774,9 @@ enum {
>  #define  FORCEWAKE_ACK_MEDIA_GEN9              _MMIO(0x0D88)
>  #define  FORCEWAKE_ACK_RENDER_GEN9             _MMIO(0x0D84)
>  #define  FORCEWAKE_ACK_BLITTER_GEN9            _MMIO(0x130044)
> -#define   FORCEWAKE_KERNEL                     0x1
> -#define   FORCEWAKE_USER                       0x2
> +#define   FORCEWAKE_KERNEL                     BIT(0)
> +#define   FORCEWAKE_USER                       BIT(1)
> +#define   FORCEWAKE_RESERVE                    BIT(12)

Why 12? How about 15?

FORCEWAKE_KERNEL2 or FORCEWAKE_KERNEL_FALLBACK

Reserved tends to imply future hw bits. So I think s/reserve/fallback/
works throughout the patch.

>  #define  FORCEWAKE_MT_ACK                      _MMIO(0x130040)
>  #define  ECOBUS                                        _MMIO(0xa180)
>  #define    FORCEWAKE_MT_ENABLE                 (1<<5)
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index 20e3c65c0999..fc6d090244c5 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -69,17 +69,80 @@ fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d)
>                                HRTIMER_MODE_REL);
>  }
>  
> +static inline bool
> +wait_ack_clear(const struct drm_i915_private *i915,
> +              const struct intel_uncore_forcewake_domain *d,
> +              const u32 ack)
> +{
> +       return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack) == 0,
> +                              FORCEWAKE_ACK_TIMEOUT_MS);
> +}
> +
> +static inline bool
> +wait_ack_set(const struct drm_i915_private *i915,
> +            const struct intel_uncore_forcewake_domain *d,
> +            const u32 ack)
> +{
> +       return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack),
> +                              FORCEWAKE_ACK_TIMEOUT_MS);
> +}

Let's make these one function, to cut down on the wait_for() expansions.

Keeping the wait_ack_clear/wait_ack_set wrappers.

static inline int
__wait_for_ack(i915, d, ack, value)
{
	return wait_for_atomic(((__raw_i915_read32(i915, d->reg_ack) & ack)) == value,
				FORCEWAKE_ACK_TIMEOUT_MS);
}

static inline int wait_ack_clear() { return __wait_for_ack(i915, d, ack, 0));
static inline int wait_ack_set() { return __wait_for_ack(i915, d, ack, ack));

>  static inline void
>  fw_domain_wait_ack_clear(const struct drm_i915_private *i915,
>                          const struct intel_uncore_forcewake_domain *d)
>  {
> -       if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) &
> -                            FORCEWAKE_KERNEL) == 0,
> -                           FORCEWAKE_ACK_TIMEOUT_MS))
> +       if (wait_ack_clear(i915, d, FORCEWAKE_KERNEL))
>                 DRM_ERROR("%s: timed out waiting for forcewake ack to clear.\n",
>                           intel_uncore_forcewake_domain_to_str(d->id));
>  }
>  
> +enum ack_type {
> +       ACK_CLEAR = 0,
> +       ACK_SET
> +};
> +
> +static bool
> +fw_domain_reserve_fallback(const struct drm_i915_private *i915,
> +                          const struct intel_uncore_forcewake_domain *d,
> +                          const enum ack_type type)
> +{
> +       bool timeout;
> +       int retry = 10;
> +
> +       /* Fallback to toggle another fw bit to wake up the gpu */

The comment needs some work; the first bit itself was meant to wake the
gpu, so why is this bit any more likely to work?

> +       do {
> +               wait_ack_clear(i915, d, FORCEWAKE_RESERVE);
> +               __raw_i915_write32(i915, d->reg_set,
> +                                  _MASKED_BIT_ENABLE(FORCEWAKE_RESERVE));
> +               wait_ack_set(i915, d, FORCEWAKE_RESERVE);
> +
> +               if (type == ACK_CLEAR)
> +                       timeout = wait_ack_clear(i915, d, FORCEWAKE_KERNEL);
> +               else
> +                       timeout = wait_ack_set(i915, d, FORCEWAKE_KERNEL);
> +
> +               __raw_i915_write32(i915, d->reg_set,
> +                                  _MASKED_BIT_DISABLE(FORCEWAKE_RESERVE));

Blindly toggling another bit to encourage the first to succeed.

Why stop at 1 extra bit????

If the wait_ack_set(RSVD) fails, you might as well abort?

Other than my face hitting the desk, the logic makes sense to me:

	Assert second fw bit; check original bit; clear second fw bit (so we
	don't keep fw longer than required).

This basically also has the side-effect of making the timeout 12x
longer.

> +       } while (timeout && --retry);
> +
> +       return timeout;
> +}
> +
> +static inline void
> +fw_domain_wait_ack_clear_reserve(const struct drm_i915_private *i915,
> +                                const struct intel_uncore_forcewake_domain *d)
> +{
> +       bool timeout;
bool err; or none at all.

timeout tends to be a value we pass around telling us how long until the
timeout; not the status, which would be timedout.

> +
> +       timeout = wait_ack_clear(i915, d, FORCEWAKE_KERNEL);
> +       if (likely(!timeout))
> +               return;
> +
> +       timeout = fw_domain_reserve_fallback(i915, d, ACK_CLEAR);
> +       if (timeout)
> +               fw_domain_wait_ack_clear(i915, d);

Ok, one last try and raise an error if it fails.

> +}
> +
>  static inline void
>  fw_domain_get(struct drm_i915_private *i915,
>               const struct intel_uncore_forcewake_domain *d)
> @@ -91,14 +154,27 @@ static inline void
>  fw_domain_wait_ack(const struct drm_i915_private *i915,
>                    const struct intel_uncore_forcewake_domain *d)
>  {
> -       if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) &
> -                            FORCEWAKE_KERNEL),
> -                           FORCEWAKE_ACK_TIMEOUT_MS))
> +       if (wait_ack_set(i915, d, FORCEWAKE_KERNEL))
>                 DRM_ERROR("%s: timed out waiting for forcewake ack request.\n",
>                           intel_uncore_forcewake_domain_to_str(d->id));
>  }
>  
>  static inline void
> +fw_domain_wait_ack_set_reserve(const struct drm_i915_private *i915,
> +                              const struct intel_uncore_forcewake_domain *d)
> +{
> +       bool timeout;
> +
> +       timeout = wait_ack_set(i915, d, FORCEWAKE_KERNEL);
> +       if (likely(!timeout))
> +               return;
> +
> +       timeout = fw_domain_reserve_fallback(i915, d, ACK_SET);
> +       if (timeout)
> +               fw_domain_wait_ack(i915, d);
> +}
> +
> +static inline void
>  fw_domain_put(const struct drm_i915_private *i915,
>               const struct intel_uncore_forcewake_domain *d)
>  {
> @@ -125,6 +201,26 @@ fw_domains_get(struct drm_i915_private *i915, enum forcewake_domains fw_domains)
>  }
>  
>  static void
> +fw_domains_get_with_reserve(struct drm_i915_private *i915,
> +                           enum forcewake_domains fw_domains)
> +{
> +       struct intel_uncore_forcewake_domain *d;
> +       unsigned int tmp;
> +
> +       GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains);
> +
> +       for_each_fw_domain_masked(d, fw_domains, i915, tmp) {
> +               fw_domain_wait_ack_clear_reserve(i915, d);
> +               fw_domain_get(i915, d);
> +       }
> +
> +       for_each_fw_domain_masked(d, fw_domains, i915, tmp)
> +               fw_domain_wait_ack_set_reserve(i915, d);
> +
> +       i915->uncore.fw_domains_active |= fw_domains;

Ok.

> +}
> +
> +static void
>  fw_domains_put(struct drm_i915_private *i915, enum forcewake_domains fw_domains)
>  {
>         struct intel_uncore_forcewake_domain *d;
> @@ -1142,7 +1238,9 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
>         }
>  
>         if (INTEL_GEN(dev_priv) >= 9) {
> -               dev_priv->uncore.funcs.force_wake_get = fw_domains_get;
> +               /* WaRsForcewakeAddDelayForAck:skl,bxt,kbl,glk,cfl,cnl */
> +               dev_priv->uncore.funcs.force_wake_get =
> +                       fw_domains_get_with_reserve;
>                 dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
>                 fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
>                                FORCEWAKE_RENDER_GEN9,
> -- 
> 2.11.0
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/i915: Fallback to reserve forcewake if primary ack missing
  2017-10-26 14:34 ` [PATCH] " Chris Wilson
@ 2017-10-27  6:34   ` Mika Kuoppala
  0 siblings, 0 replies; 13+ messages in thread
From: Mika Kuoppala @ 2017-10-27  6:34 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Rodrigo Vivi

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2017-10-26 15:01:44)
>> There is a possibility on gen9 hardware to miss the forcewake ack
>> message. The recommended workaround is to use another free
>> bit and toggle it until original bit is successfully acknowledged.
>> 
>> The workaround is a bit misleadingly named as WaRsForcewakeAddDelayForAck.
>> The reason for naming is most likely that workaround was named before
>> the most recent recommendation evolved to use the reserve bit.
>> 
>> Some future gen9 revs might or might not fix the underlying issue but
>> the fallback to reserve bit dance can be considered as harmless:
>> without the ack timeout we never reach the reserve bit forcewake.
>> Thus as of now we adopt a blanket approach for all gen9 and leave
>> the bypassing the reserve bit approach for future patches if
>> corresponding hw revisions do appear.
>> 
>> Commit 83e3337204b2 ("drm/i915: Increase maximum polling time to 50ms
>> for forcewake request/clear ack") did increase the forcewake timeout.
>> If the issue was a delayed ack, future work could include finding
>> a suitable timeout value both for primary ack and reserve toggle
>> to reduce the worst case latency.
>> 
>> References: HSDES #1604254524
>> References: https://bugs.freedesktop.org/show_bug.cgi?id=102051
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>> Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
>> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>> ---
>>  drivers/gpu/drm/i915/i915_reg.h     |   5 +-
>>  drivers/gpu/drm/i915/intel_uncore.c | 112 +++++++++++++++++++++++++++++++++---
>>  2 files changed, 108 insertions(+), 9 deletions(-)
>> 
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>> index f138eae82bf0..c04c634af5ae 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -7774,8 +7774,9 @@ enum {
>>  #define  FORCEWAKE_ACK_MEDIA_GEN9              _MMIO(0x0D88)
>>  #define  FORCEWAKE_ACK_RENDER_GEN9             _MMIO(0x0D84)
>>  #define  FORCEWAKE_ACK_BLITTER_GEN9            _MMIO(0x130044)
>> -#define   FORCEWAKE_KERNEL                     0x1
>> -#define   FORCEWAKE_USER                       0x2
>> +#define   FORCEWAKE_KERNEL                     BIT(0)
>> +#define   FORCEWAKE_USER                       BIT(1)
>> +#define   FORCEWAKE_RESERVE                    BIT(12)
>
> Why 12? How about 15?
>

I just mimiced the pseudocode from an errata sheet slavishly.

AFAIK 15 should work as well as 12 so 15.

> FORCEWAKE_KERNEL2 or FORCEWAKE_KERNEL_FALLBACK

KERNEL2 would imply similar mechanism. Fallback I like more
as this is just under the hood toggling for waking up
the real bit.

>
> Reserved tends to imply future hw bits. So I think s/reserve/fallback/
> works throughout the patch.
>

Yup, fallback is better.

>>  #define  FORCEWAKE_MT_ACK                      _MMIO(0x130040)
>>  #define  ECOBUS                                        _MMIO(0xa180)
>>  #define    FORCEWAKE_MT_ENABLE                 (1<<5)
>> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
>> index 20e3c65c0999..fc6d090244c5 100644
>> --- a/drivers/gpu/drm/i915/intel_uncore.c
>> +++ b/drivers/gpu/drm/i915/intel_uncore.c
>> @@ -69,17 +69,80 @@ fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d)
>>                                HRTIMER_MODE_REL);
>>  }
>>  
>> +static inline bool
>> +wait_ack_clear(const struct drm_i915_private *i915,
>> +              const struct intel_uncore_forcewake_domain *d,
>> +              const u32 ack)
>> +{
>> +       return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack) == 0,
>> +                              FORCEWAKE_ACK_TIMEOUT_MS);
>> +}
>> +
>> +static inline bool
>> +wait_ack_set(const struct drm_i915_private *i915,
>> +            const struct intel_uncore_forcewake_domain *d,
>> +            const u32 ack)
>> +{
>> +       return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack),
>> +                              FORCEWAKE_ACK_TIMEOUT_MS);
>> +}
>
> Let's make these one function, to cut down on the wait_for() expansions.
>
> Keeping the wait_ack_clear/wait_ack_set wrappers.
>
> static inline int
> __wait_for_ack(i915, d, ack, value)
> {
> 	return wait_for_atomic(((__raw_i915_read32(i915, d->reg_ack) & ack)) == value,
> 				FORCEWAKE_ACK_TIMEOUT_MS);
> }
>
> static inline int wait_ack_clear() { return __wait_for_ack(i915, d, ack, 0));
> static inline int wait_ack_set() { return __wait_for_ack(i915, d, ack, ack));
>
>>  static inline void
>>  fw_domain_wait_ack_clear(const struct drm_i915_private *i915,
>>                          const struct intel_uncore_forcewake_domain *d)
>>  {
>> -       if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) &
>> -                            FORCEWAKE_KERNEL) == 0,
>> -                           FORCEWAKE_ACK_TIMEOUT_MS))
>> +       if (wait_ack_clear(i915, d, FORCEWAKE_KERNEL))
>>                 DRM_ERROR("%s: timed out waiting for forcewake ack to clear.\n",
>>                           intel_uncore_forcewake_domain_to_str(d->id));
>>  }
>>  
>> +enum ack_type {
>> +       ACK_CLEAR = 0,
>> +       ACK_SET
>> +};
>> +
>> +static bool
>> +fw_domain_reserve_fallback(const struct drm_i915_private *i915,
>> +                          const struct intel_uncore_forcewake_domain *d,
>> +                          const enum ack_type type)
>> +{
>> +       bool timeout;
>> +       int retry = 10;
>> +
>> +       /* Fallback to toggle another fw bit to wake up the gpu */
>
> The comment needs some work; the first bit itself was meant to wake the
> gpu, so why is this bit any more likely to work?
>

Yeah, this needs rework. It is more like toggling an unrelated bit
will make the 'lost or pending' ack message for the KERNEL bit to
materialize.

I thought that should we also retoggle the KERNEL one but
again, this patch mimics the pseudocode I found 1:1. There
are hints that both delaying wait and a regtogging of the
original bit was used at some point of time as a workaround.

This is _supposed_ to be the latest recommendation to combat
the issue. Efforts and success to reproduce would give us
venue to measure if it is the best known.

>> +       do {
>> +               wait_ack_clear(i915, d, FORCEWAKE_RESERVE);
>> +               __raw_i915_write32(i915, d->reg_set,
>> +                                  _MASKED_BIT_ENABLE(FORCEWAKE_RESERVE));
>> +               wait_ack_set(i915, d, FORCEWAKE_RESERVE);
>> +
>> +               if (type == ACK_CLEAR)
>> +                       timeout = wait_ack_clear(i915, d, FORCEWAKE_KERNEL);
>> +               else
>> +                       timeout = wait_ack_set(i915, d, FORCEWAKE_KERNEL);
>> +
>> +               __raw_i915_write32(i915, d->reg_set,
>> +                                  _MASKED_BIT_DISABLE(FORCEWAKE_RESERVE));
>
> Blindly toggling another bit to encourage the first to succeed.
>
> Why stop at 1 extra bit????

Again 1:1 mimic of pseudocode.

What we could do instead of this toggle hammering, is to go
through all unreserved bits until one ack appears on any and
be happy.

>
> If the wait_ack_set(RSVD) fails, you might as well abort?
>

For me it looks like this toggle hammering will kick
the state machine to spit out the first ack message
that was somehow lost. I did again follow the
recomennded procedure precisely.

> Other than my face hitting the desk, the logic makes sense to me:
>
> 	Assert second fw bit; check original bit; clear second fw bit (so we
> 	don't keep fw longer than required).
>
> This basically also has the side-effect of making the timeout 12x
> longer.
>

In the worst case yes. But the first toggle should already untangle
the mess. Probability that we end up retrying all the way through
is very unlikely. Now that I have said it, it will happen :)

Thank you for the comments.

-Mika

>> +       } while (timeout && --retry);
>> +
>> +       return timeout;
>> +}
>> +
>> +static inline void
>> +fw_domain_wait_ack_clear_reserve(const struct drm_i915_private *i915,
>> +                                const struct intel_uncore_forcewake_domain *d)
>> +{
>> +       bool timeout;
> bool err; or none at all.
>
> timeout tends to be a value we pass around telling us how long until the
> timeout; not the status, which would be timedout.
>
>> +
>> +       timeout = wait_ack_clear(i915, d, FORCEWAKE_KERNEL);
>> +       if (likely(!timeout))
>> +               return;
>> +
>> +       timeout = fw_domain_reserve_fallback(i915, d, ACK_CLEAR);
>> +       if (timeout)
>> +               fw_domain_wait_ack_clear(i915, d);
>
> Ok, one last try and raise an error if it fails.
>
>> +}
>> +
>>  static inline void
>>  fw_domain_get(struct drm_i915_private *i915,
>>               const struct intel_uncore_forcewake_domain *d)
>> @@ -91,14 +154,27 @@ static inline void
>>  fw_domain_wait_ack(const struct drm_i915_private *i915,
>>                    const struct intel_uncore_forcewake_domain *d)
>>  {
>> -       if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) &
>> -                            FORCEWAKE_KERNEL),
>> -                           FORCEWAKE_ACK_TIMEOUT_MS))
>> +       if (wait_ack_set(i915, d, FORCEWAKE_KERNEL))
>>                 DRM_ERROR("%s: timed out waiting for forcewake ack request.\n",
>>                           intel_uncore_forcewake_domain_to_str(d->id));
>>  }
>>  
>>  static inline void
>> +fw_domain_wait_ack_set_reserve(const struct drm_i915_private *i915,
>> +                              const struct intel_uncore_forcewake_domain *d)
>> +{
>> +       bool timeout;
>> +
>> +       timeout = wait_ack_set(i915, d, FORCEWAKE_KERNEL);
>> +       if (likely(!timeout))
>> +               return;
>> +
>> +       timeout = fw_domain_reserve_fallback(i915, d, ACK_SET);
>> +       if (timeout)
>> +               fw_domain_wait_ack(i915, d);
>> +}
>> +
>> +static inline void
>>  fw_domain_put(const struct drm_i915_private *i915,
>>               const struct intel_uncore_forcewake_domain *d)
>>  {
>> @@ -125,6 +201,26 @@ fw_domains_get(struct drm_i915_private *i915, enum forcewake_domains fw_domains)
>>  }
>>  
>>  static void
>> +fw_domains_get_with_reserve(struct drm_i915_private *i915,
>> +                           enum forcewake_domains fw_domains)
>> +{
>> +       struct intel_uncore_forcewake_domain *d;
>> +       unsigned int tmp;
>> +
>> +       GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains);
>> +
>> +       for_each_fw_domain_masked(d, fw_domains, i915, tmp) {
>> +               fw_domain_wait_ack_clear_reserve(i915, d);
>> +               fw_domain_get(i915, d);
>> +       }
>> +
>> +       for_each_fw_domain_masked(d, fw_domains, i915, tmp)
>> +               fw_domain_wait_ack_set_reserve(i915, d);
>> +
>> +       i915->uncore.fw_domains_active |= fw_domains;
>
> Ok.
>
>> +}
>> +
>> +static void
>>  fw_domains_put(struct drm_i915_private *i915, enum forcewake_domains fw_domains)
>>  {
>>         struct intel_uncore_forcewake_domain *d;
>> @@ -1142,7 +1238,9 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
>>         }
>>  
>>         if (INTEL_GEN(dev_priv) >= 9) {
>> -               dev_priv->uncore.funcs.force_wake_get = fw_domains_get;
>> +               /* WaRsForcewakeAddDelayForAck:skl,bxt,kbl,glk,cfl,cnl */
>> +               dev_priv->uncore.funcs.force_wake_get =
>> +                       fw_domains_get_with_reserve;
>>                 dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
>>                 fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
>>                                FORCEWAKE_RENDER_GEN9,
>> -- 
>> 2.11.0
>> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH] drm/i915: Fallback to reserve forcewake if primary ack missing
  2017-10-26 14:01 [PATCH] drm/i915: Fallback to reserve forcewake if primary ack missing Mika Kuoppala
  2017-10-26 14:24 ` ✗ Fi.CI.BAT: failure for " Patchwork
  2017-10-26 14:34 ` [PATCH] " Chris Wilson
@ 2017-10-27 14:04 ` Mika Kuoppala
  2017-10-27 14:08   ` Mika Kuoppala
  2017-10-27 14:38   ` Chris Wilson
  2017-10-27 14:38 ` ✓ Fi.CI.BAT: success for drm/i915: Fallback to reserve forcewake if primary ack missing (rev2) Patchwork
                   ` (3 subsequent siblings)
  6 siblings, 2 replies; 13+ messages in thread
From: Mika Kuoppala @ 2017-10-27 14:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: Rodrigo Vivi

There is a possibility on gen9 hardware to miss the forcewake ack
message. The recommended workaround is to use another free
bit and toggle it until original bit is successfully acknowledged.

Some future gen9 revs might or might not fix the underlying issue but
the fallback to reserve bit dance can be considered as harmless:
without the ack timeout we never reach the reserve bit forcewake.
Thus as of now we adopt a blanket approach for all gen9 and leave
the bypassing the reserve bit approach for future patches if
corresponding hw revisions do appear.

Commit 83e3337204b2 ("drm/i915: Increase maximum polling time to 50ms
for forcewake request/clear ack") did increase the forcewake timeout.
If the issue was a delayed ack, future work could include finding
a suitable timeout value both for primary ack and reserve toggle
to reduce the worst case latency.

v2: use bit 15, naming, comment (Chris), only wait fallback ack

References: HSDES #1604254524
References: https://bugs.freedesktop.org/show_bug.cgi?id=102051
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h     |   5 +-
 drivers/gpu/drm/i915/intel_uncore.c | 141 +++++++++++++++++++++++++++++++++---
 2 files changed, 134 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 8c775e96b4e4..f0f8f6059652 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -7774,8 +7774,9 @@ enum {
 #define  FORCEWAKE_ACK_MEDIA_GEN9		_MMIO(0x0D88)
 #define  FORCEWAKE_ACK_RENDER_GEN9		_MMIO(0x0D84)
 #define  FORCEWAKE_ACK_BLITTER_GEN9		_MMIO(0x130044)
-#define   FORCEWAKE_KERNEL			0x1
-#define   FORCEWAKE_USER			0x2
+#define   FORCEWAKE_KERNEL			BIT(0)
+#define   FORCEWAKE_USER			BIT(1)
+#define   FORCEWAKE_KERNEL_FALLBACK		BIT(15)
 #define  FORCEWAKE_MT_ACK			_MMIO(0x130040)
 #define  ECOBUS					_MMIO(0xa180)
 #define    FORCEWAKE_MT_ENABLE			(1<<5)
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 96ee6b2754be..588ae35c8c9a 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -69,17 +69,108 @@ fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d)
 			       HRTIMER_MODE_REL);
 }
 
+static inline int
+__wait_for_ack(const struct drm_i915_private *i915,
+	       const struct intel_uncore_forcewake_domain *d,
+	       const u32 ack,
+	       const u32 value)
+{
+	return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack) == value,
+			       FORCEWAKE_ACK_TIMEOUT_MS);
+}
+
+
+static inline int
+wait_ack_clear(const struct drm_i915_private *i915,
+	       const struct intel_uncore_forcewake_domain *d,
+	       const u32 ack)
+{
+	return __wait_for_ack(i915, d, ack, 0);
+}
+
+static inline int
+wait_ack_set(const struct drm_i915_private *i915,
+	     const struct intel_uncore_forcewake_domain *d,
+	     const u32 ack)
+{
+	return __wait_for_ack(i915, d, ack, ack);
+}
+
 static inline void
 fw_domain_wait_ack_clear(const struct drm_i915_private *i915,
 			 const struct intel_uncore_forcewake_domain *d)
 {
-	if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) &
-			     FORCEWAKE_KERNEL) == 0,
-			    FORCEWAKE_ACK_TIMEOUT_MS))
+	if (wait_ack_clear(i915, d, FORCEWAKE_KERNEL))
 		DRM_ERROR("%s: timed out waiting for forcewake ack to clear.\n",
 			  intel_uncore_forcewake_domain_to_str(d->id));
 }
 
+static int
+wait_ack_with_fallback(const struct drm_i915_private *i915,
+		       const struct intel_uncore_forcewake_domain *d,
+		       const u32 ack,
+		       const u32 value)
+{
+	int ret;
+
+	/*
+	 * There is a possibility of driver's wake request colliding
+	 * with hardware's own wake requests and that can cause
+	 * hardware to not deliver the driver's ack message.
+	 *
+	 * Use a fallback bit toggle to kick the gpu state machine
+	 * in hopes that the original ack will be delivered along with
+	 * the fallback ack.
+	 *
+	 * This workaround is described in HSDES #1604254524
+	 */
+
+	wait_ack_clear(i915, d, FORCEWAKE_KERNEL_FALLBACK);
+	__raw_i915_write32(i915, d->reg_set,
+			   _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL_FALLBACK));
+	wait_ack_set(i915, d, FORCEWAKE_KERNEL_FALLBACK);
+
+	ret = (__raw_i915_read32(i915, d->reg_ack) & ack) == value;
+
+	__raw_i915_write32(i915, d->reg_set,
+			   _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL_FALLBACK));
+
+	return ret;
+}
+
+enum ack_type {
+	ACK_CLEAR = 0,
+	ACK_SET
+};
+
+static int
+fw_domain_wait_ack_with_fallback(const struct drm_i915_private *i915,
+				 const struct intel_uncore_forcewake_domain *d,
+				 const enum ack_type type)
+{
+	int retry = 10;
+	int ret;
+
+	do {
+		ret = wait_ack_with_fallback(i915, d, FORCEWAKE_KERNEL,
+					     type == ACK_SET ?
+					     FORCEWAKE_KERNEL : 0);
+	} while (ret && --retry);
+
+	return ret;
+}
+
+static inline void
+fw_domain_wait_ack_clear_fallback(const struct drm_i915_private *i915,
+				  const struct intel_uncore_forcewake_domain *d)
+{
+	if (likely(!wait_ack_clear(i915, d, FORCEWAKE_KERNEL)))
+		return;
+
+	if (fw_domain_wait_ack_with_fallback(i915, d, ACK_CLEAR))
+		fw_domain_wait_ack_clear(i915, d);
+}
+
 static inline void
 fw_domain_get(struct drm_i915_private *i915,
 	      const struct intel_uncore_forcewake_domain *d)
@@ -88,17 +179,26 @@ fw_domain_get(struct drm_i915_private *i915,
 }
 
 static inline void
-fw_domain_wait_ack(const struct drm_i915_private *i915,
-		   const struct intel_uncore_forcewake_domain *d)
+fw_domain_wait_ack_set(const struct drm_i915_private *i915,
+		       const struct intel_uncore_forcewake_domain *d)
 {
-	if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) &
-			     FORCEWAKE_KERNEL),
-			    FORCEWAKE_ACK_TIMEOUT_MS))
+	if (wait_ack_set(i915, d, FORCEWAKE_KERNEL))
 		DRM_ERROR("%s: timed out waiting for forcewake ack request.\n",
 			  intel_uncore_forcewake_domain_to_str(d->id));
 }
 
 static inline void
+fw_domain_wait_ack_set_fallback(const struct drm_i915_private *i915,
+				const struct intel_uncore_forcewake_domain *d)
+{
+	if (likely(!wait_ack_set(i915, d, FORCEWAKE_KERNEL)))
+		return;
+
+	if (fw_domain_wait_ack_with_fallback(i915, d, ACK_SET))
+		fw_domain_wait_ack_set(i915, d);
+}
+
+static inline void
 fw_domain_put(const struct drm_i915_private *i915,
 	      const struct intel_uncore_forcewake_domain *d)
 {
@@ -119,7 +219,27 @@ fw_domains_get(struct drm_i915_private *i915, enum forcewake_domains fw_domains)
 	}
 
 	for_each_fw_domain_masked(d, fw_domains, i915, tmp)
-		fw_domain_wait_ack(i915, d);
+		fw_domain_wait_ack_set(i915, d);
+
+	i915->uncore.fw_domains_active |= fw_domains;
+}
+
+static void
+fw_domains_get_with_fallback(struct drm_i915_private *i915,
+			     enum forcewake_domains fw_domains)
+{
+	struct intel_uncore_forcewake_domain *d;
+	unsigned int tmp;
+
+	GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains);
+
+	for_each_fw_domain_masked(d, fw_domains, i915, tmp) {
+		fw_domain_wait_ack_clear_fallback(i915, d);
+		fw_domain_get(i915, d);
+	}
+
+	for_each_fw_domain_masked(d, fw_domains, i915, tmp)
+		fw_domain_wait_ack_set_fallback(i915, d);
 
 	i915->uncore.fw_domains_active |= fw_domains;
 }
@@ -1142,7 +1262,8 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
 	}
 
 	if (INTEL_GEN(dev_priv) >= 9) {
-		dev_priv->uncore.funcs.force_wake_get = fw_domains_get;
+		dev_priv->uncore.funcs.force_wake_get =
+			fw_domains_get_with_fallback;
 		dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
 		fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
 			       FORCEWAKE_RENDER_GEN9,
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/i915: Fallback to reserve forcewake if primary ack missing
  2017-10-27 14:04 ` Mika Kuoppala
@ 2017-10-27 14:08   ` Mika Kuoppala
  2017-10-27 14:38   ` Chris Wilson
  1 sibling, 0 replies; 13+ messages in thread
From: Mika Kuoppala @ 2017-10-27 14:08 UTC (permalink / raw)
  To: intel-gfx; +Cc: Rodrigo Vivi

Mika Kuoppala <mika.kuoppala@linux.intel.com> writes:

> There is a possibility on gen9 hardware to miss the forcewake ack
> message. The recommended workaround is to use another free
> bit and toggle it until original bit is successfully acknowledged.
>
> Some future gen9 revs might or might not fix the underlying issue but
> the fallback to reserve bit dance can be considered as harmless:
> without the ack timeout we never reach the reserve bit forcewake.
> Thus as of now we adopt a blanket approach for all gen9 and leave
> the bypassing the reserve bit approach for future patches if
> corresponding hw revisions do appear.
>
> Commit 83e3337204b2 ("drm/i915: Increase maximum polling time to 50ms
> for forcewake request/clear ack") did increase the forcewake timeout.
> If the issue was a delayed ack, future work could include finding
> a suitable timeout value both for primary ack and reserve toggle
> to reduce the worst case latency.
>
> v2: use bit 15, naming, comment (Chris), only wait fallback ack
>

And then I forgot to change the naming in the subject and
commit message. Sigh
-Mika


> References: HSDES #1604254524
> References: https://bugs.freedesktop.org/show_bug.cgi?id=102051
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_reg.h     |   5 +-
>  drivers/gpu/drm/i915/intel_uncore.c | 141 +++++++++++++++++++++++++++++++++---
>  2 files changed, 134 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 8c775e96b4e4..f0f8f6059652 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -7774,8 +7774,9 @@ enum {
>  #define  FORCEWAKE_ACK_MEDIA_GEN9		_MMIO(0x0D88)
>  #define  FORCEWAKE_ACK_RENDER_GEN9		_MMIO(0x0D84)
>  #define  FORCEWAKE_ACK_BLITTER_GEN9		_MMIO(0x130044)
> -#define   FORCEWAKE_KERNEL			0x1
> -#define   FORCEWAKE_USER			0x2
> +#define   FORCEWAKE_KERNEL			BIT(0)
> +#define   FORCEWAKE_USER			BIT(1)
> +#define   FORCEWAKE_KERNEL_FALLBACK		BIT(15)
>  #define  FORCEWAKE_MT_ACK			_MMIO(0x130040)
>  #define  ECOBUS					_MMIO(0xa180)
>  #define    FORCEWAKE_MT_ENABLE			(1<<5)
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index 96ee6b2754be..588ae35c8c9a 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -69,17 +69,108 @@ fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d)
>  			       HRTIMER_MODE_REL);
>  }
>  
> +static inline int
> +__wait_for_ack(const struct drm_i915_private *i915,
> +	       const struct intel_uncore_forcewake_domain *d,
> +	       const u32 ack,
> +	       const u32 value)
> +{
> +	return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack) == value,
> +			       FORCEWAKE_ACK_TIMEOUT_MS);
> +}
> +
> +
> +static inline int
> +wait_ack_clear(const struct drm_i915_private *i915,
> +	       const struct intel_uncore_forcewake_domain *d,
> +	       const u32 ack)
> +{
> +	return __wait_for_ack(i915, d, ack, 0);
> +}
> +
> +static inline int
> +wait_ack_set(const struct drm_i915_private *i915,
> +	     const struct intel_uncore_forcewake_domain *d,
> +	     const u32 ack)
> +{
> +	return __wait_for_ack(i915, d, ack, ack);
> +}
> +
>  static inline void
>  fw_domain_wait_ack_clear(const struct drm_i915_private *i915,
>  			 const struct intel_uncore_forcewake_domain *d)
>  {
> -	if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) &
> -			     FORCEWAKE_KERNEL) == 0,
> -			    FORCEWAKE_ACK_TIMEOUT_MS))
> +	if (wait_ack_clear(i915, d, FORCEWAKE_KERNEL))
>  		DRM_ERROR("%s: timed out waiting for forcewake ack to clear.\n",
>  			  intel_uncore_forcewake_domain_to_str(d->id));
>  }
>  
> +static int
> +wait_ack_with_fallback(const struct drm_i915_private *i915,
> +		       const struct intel_uncore_forcewake_domain *d,
> +		       const u32 ack,
> +		       const u32 value)
> +{
> +	int ret;
> +
> +	/*
> +	 * There is a possibility of driver's wake request colliding
> +	 * with hardware's own wake requests and that can cause
> +	 * hardware to not deliver the driver's ack message.
> +	 *
> +	 * Use a fallback bit toggle to kick the gpu state machine
> +	 * in hopes that the original ack will be delivered along with
> +	 * the fallback ack.
> +	 *
> +	 * This workaround is described in HSDES #1604254524
> +	 */
> +
> +	wait_ack_clear(i915, d, FORCEWAKE_KERNEL_FALLBACK);
> +	__raw_i915_write32(i915, d->reg_set,
> +			   _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL_FALLBACK));
> +	wait_ack_set(i915, d, FORCEWAKE_KERNEL_FALLBACK);
> +
> +	ret = (__raw_i915_read32(i915, d->reg_ack) & ack) == value;
> +
> +	__raw_i915_write32(i915, d->reg_set,
> +			   _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL_FALLBACK));
> +
> +	return ret;
> +}
> +
> +enum ack_type {
> +	ACK_CLEAR = 0,
> +	ACK_SET
> +};
> +
> +static int
> +fw_domain_wait_ack_with_fallback(const struct drm_i915_private *i915,
> +				 const struct intel_uncore_forcewake_domain *d,
> +				 const enum ack_type type)
> +{
> +	int retry = 10;
> +	int ret;
> +
> +	do {
> +		ret = wait_ack_with_fallback(i915, d, FORCEWAKE_KERNEL,
> +					     type == ACK_SET ?
> +					     FORCEWAKE_KERNEL : 0);
> +	} while (ret && --retry);
> +
> +	return ret;
> +}
> +
> +static inline void
> +fw_domain_wait_ack_clear_fallback(const struct drm_i915_private *i915,
> +				  const struct intel_uncore_forcewake_domain *d)
> +{
> +	if (likely(!wait_ack_clear(i915, d, FORCEWAKE_KERNEL)))
> +		return;
> +
> +	if (fw_domain_wait_ack_with_fallback(i915, d, ACK_CLEAR))
> +		fw_domain_wait_ack_clear(i915, d);
> +}
> +
>  static inline void
>  fw_domain_get(struct drm_i915_private *i915,
>  	      const struct intel_uncore_forcewake_domain *d)
> @@ -88,17 +179,26 @@ fw_domain_get(struct drm_i915_private *i915,
>  }
>  
>  static inline void
> -fw_domain_wait_ack(const struct drm_i915_private *i915,
> -		   const struct intel_uncore_forcewake_domain *d)
> +fw_domain_wait_ack_set(const struct drm_i915_private *i915,
> +		       const struct intel_uncore_forcewake_domain *d)
>  {
> -	if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) &
> -			     FORCEWAKE_KERNEL),
> -			    FORCEWAKE_ACK_TIMEOUT_MS))
> +	if (wait_ack_set(i915, d, FORCEWAKE_KERNEL))
>  		DRM_ERROR("%s: timed out waiting for forcewake ack request.\n",
>  			  intel_uncore_forcewake_domain_to_str(d->id));
>  }
>  
>  static inline void
> +fw_domain_wait_ack_set_fallback(const struct drm_i915_private *i915,
> +				const struct intel_uncore_forcewake_domain *d)
> +{
> +	if (likely(!wait_ack_set(i915, d, FORCEWAKE_KERNEL)))
> +		return;
> +
> +	if (fw_domain_wait_ack_with_fallback(i915, d, ACK_SET))
> +		fw_domain_wait_ack_set(i915, d);
> +}
> +
> +static inline void
>  fw_domain_put(const struct drm_i915_private *i915,
>  	      const struct intel_uncore_forcewake_domain *d)
>  {
> @@ -119,7 +219,27 @@ fw_domains_get(struct drm_i915_private *i915, enum forcewake_domains fw_domains)
>  	}
>  
>  	for_each_fw_domain_masked(d, fw_domains, i915, tmp)
> -		fw_domain_wait_ack(i915, d);
> +		fw_domain_wait_ack_set(i915, d);
> +
> +	i915->uncore.fw_domains_active |= fw_domains;
> +}
> +
> +static void
> +fw_domains_get_with_fallback(struct drm_i915_private *i915,
> +			     enum forcewake_domains fw_domains)
> +{
> +	struct intel_uncore_forcewake_domain *d;
> +	unsigned int tmp;
> +
> +	GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains);
> +
> +	for_each_fw_domain_masked(d, fw_domains, i915, tmp) {
> +		fw_domain_wait_ack_clear_fallback(i915, d);
> +		fw_domain_get(i915, d);
> +	}
> +
> +	for_each_fw_domain_masked(d, fw_domains, i915, tmp)
> +		fw_domain_wait_ack_set_fallback(i915, d);
>  
>  	i915->uncore.fw_domains_active |= fw_domains;
>  }
> @@ -1142,7 +1262,8 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
>  	}
>  
>  	if (INTEL_GEN(dev_priv) >= 9) {
> -		dev_priv->uncore.funcs.force_wake_get = fw_domains_get;
> +		dev_priv->uncore.funcs.force_wake_get =
> +			fw_domains_get_with_fallback;
>  		dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
>  		fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
>  			       FORCEWAKE_RENDER_GEN9,
> -- 
> 2.11.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915: Fallback to reserve forcewake if primary ack missing (rev2)
  2017-10-26 14:01 [PATCH] drm/i915: Fallback to reserve forcewake if primary ack missing Mika Kuoppala
                   ` (2 preceding siblings ...)
  2017-10-27 14:04 ` Mika Kuoppala
@ 2017-10-27 14:38 ` Patchwork
  2017-10-27 16:07 ` ✓ Fi.CI.IGT: " Patchwork
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 13+ messages in thread
From: Patchwork @ 2017-10-27 14:38 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Fallback to reserve forcewake if primary ack missing (rev2)
URL   : https://patchwork.freedesktop.org/series/32694/
State : success

== Summary ==

Series 32694v2 drm/i915: Fallback to reserve forcewake if primary ack missing
https://patchwork.freedesktop.org/api/1.0/series/32694/revisions/2/mbox/

Test gem_exec_reloc:
        Subgroup basic-cpu-gtt-active:
                pass       -> FAIL       (fi-gdg-551) fdo#102582
Test kms_cursor_legacy:
        Subgroup basic-busy-flip-before-cursor-legacy:
                fail       -> PASS       (fi-gdg-551) fdo#102618

fdo#102582 https://bugs.freedesktop.org/show_bug.cgi?id=102582
fdo#102618 https://bugs.freedesktop.org/show_bug.cgi?id=102618

fi-bdw-5557u     total:289  pass:268  dwarn:0   dfail:0   fail:0   skip:21  time:450s
fi-bdw-gvtdvm    total:289  pass:265  dwarn:0   dfail:0   fail:0   skip:24  time:451s
fi-blb-e6850     total:289  pass:223  dwarn:1   dfail:0   fail:0   skip:65  time:374s
fi-bsw-n3050     total:289  pass:243  dwarn:0   dfail:0   fail:0   skip:46  time:527s
fi-bwr-2160      total:289  pass:183  dwarn:0   dfail:0   fail:0   skip:106 time:267s
fi-bxt-dsi       total:289  pass:259  dwarn:0   dfail:0   fail:0   skip:30  time:499s
fi-bxt-j4205     total:289  pass:260  dwarn:0   dfail:0   fail:0   skip:29  time:489s
fi-byt-j1900     total:289  pass:253  dwarn:1   dfail:0   fail:0   skip:35  time:491s
fi-byt-n2820     total:289  pass:249  dwarn:1   dfail:0   fail:0   skip:39  time:477s
fi-cnl-y         total:289  pass:262  dwarn:0   dfail:0   fail:0   skip:27  time:606s
fi-elk-e7500     total:289  pass:229  dwarn:0   dfail:0   fail:0   skip:60  time:418s
fi-gdg-551       total:289  pass:177  dwarn:1   dfail:0   fail:2   skip:109 time:250s
fi-glk-1         total:289  pass:261  dwarn:0   dfail:0   fail:0   skip:28  time:579s
fi-glk-dsi       total:289  pass:258  dwarn:0   dfail:0   fail:1   skip:30  time:488s
fi-hsw-4770      total:289  pass:262  dwarn:0   dfail:0   fail:0   skip:27  time:428s
fi-hsw-4770r     total:289  pass:262  dwarn:0   dfail:0   fail:0   skip:27  time:430s
fi-ilk-650       total:289  pass:228  dwarn:0   dfail:0   fail:0   skip:61  time:419s
fi-ivb-3520m     total:289  pass:260  dwarn:0   dfail:0   fail:0   skip:29  time:494s
fi-ivb-3770      total:289  pass:260  dwarn:0   dfail:0   fail:0   skip:29  time:460s
fi-kbl-7500u     total:289  pass:264  dwarn:1   dfail:0   fail:0   skip:24  time:491s
fi-kbl-7560u     total:289  pass:270  dwarn:0   dfail:0   fail:0   skip:19  time:574s
fi-kbl-7567u     total:289  pass:269  dwarn:0   dfail:0   fail:0   skip:20  time:480s
fi-kbl-r         total:289  pass:262  dwarn:0   dfail:0   fail:0   skip:27  time:584s
fi-pnv-d510      total:289  pass:222  dwarn:1   dfail:0   fail:0   skip:66  time:543s
fi-skl-6260u     total:289  pass:269  dwarn:0   dfail:0   fail:0   skip:20  time:447s
fi-skl-6600u     total:289  pass:262  dwarn:0   dfail:0   fail:0   skip:27  time:586s
fi-skl-6700hq    total:289  pass:263  dwarn:0   dfail:0   fail:0   skip:26  time:644s
fi-skl-6700k     total:289  pass:265  dwarn:0   dfail:0   fail:0   skip:24  time:523s
fi-skl-6770hq    total:289  pass:269  dwarn:0   dfail:0   fail:0   skip:20  time:497s
fi-skl-gvtdvm    total:289  pass:266  dwarn:0   dfail:0   fail:0   skip:23  time:453s
fi-snb-2520m     total:289  pass:250  dwarn:0   dfail:0   fail:0   skip:39  time:561s
fi-snb-2600      total:289  pass:249  dwarn:0   dfail:0   fail:0   skip:40  time:415s

795d258e67f403ff77d1f14b37551e98163f5b4e drm-tip: 2017y-10m-27d-11h-42m-39s UTC integration manifest
23f260bbcc5e drm/i915: Fallback to reserve forcewake if primary ack missing

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_6235/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/i915: Fallback to reserve forcewake if primary ack missing
  2017-10-27 14:04 ` Mika Kuoppala
  2017-10-27 14:08   ` Mika Kuoppala
@ 2017-10-27 14:38   ` Chris Wilson
  1 sibling, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2017-10-27 14:38 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx; +Cc: Rodrigo Vivi

Quoting Mika Kuoppala (2017-10-27 15:04:21)
> There is a possibility on gen9 hardware to miss the forcewake ack
> message. The recommended workaround is to use another free
> bit and toggle it until original bit is successfully acknowledged.
> 
> Some future gen9 revs might or might not fix the underlying issue but
> the fallback to reserve bit dance can be considered as harmless:
> without the ack timeout we never reach the reserve bit forcewake.
> Thus as of now we adopt a blanket approach for all gen9 and leave
> the bypassing the reserve bit approach for future patches if
> corresponding hw revisions do appear.
> 
> Commit 83e3337204b2 ("drm/i915: Increase maximum polling time to 50ms
> for forcewake request/clear ack") did increase the forcewake timeout.
> If the issue was a delayed ack, future work could include finding
> a suitable timeout value both for primary ack and reserve toggle
> to reduce the worst case latency.
> 
> v2: use bit 15, naming, comment (Chris), only wait fallback ack
> 
> References: HSDES #1604254524
> References: https://bugs.freedesktop.org/show_bug.cgi?id=102051
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_reg.h     |   5 +-
>  drivers/gpu/drm/i915/intel_uncore.c | 141 +++++++++++++++++++++++++++++++++---
>  2 files changed, 134 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 8c775e96b4e4..f0f8f6059652 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -7774,8 +7774,9 @@ enum {
>  #define  FORCEWAKE_ACK_MEDIA_GEN9              _MMIO(0x0D88)
>  #define  FORCEWAKE_ACK_RENDER_GEN9             _MMIO(0x0D84)
>  #define  FORCEWAKE_ACK_BLITTER_GEN9            _MMIO(0x130044)
> -#define   FORCEWAKE_KERNEL                     0x1
> -#define   FORCEWAKE_USER                       0x2
> +#define   FORCEWAKE_KERNEL                     BIT(0)
> +#define   FORCEWAKE_USER                       BIT(1)
> +#define   FORCEWAKE_KERNEL_FALLBACK            BIT(15)
>  #define  FORCEWAKE_MT_ACK                      _MMIO(0x130040)
>  #define  ECOBUS                                        _MMIO(0xa180)
>  #define    FORCEWAKE_MT_ENABLE                 (1<<5)
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index 96ee6b2754be..588ae35c8c9a 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -69,17 +69,108 @@ fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d)
>                                HRTIMER_MODE_REL);
>  }
>  
> +static inline int
> +__wait_for_ack(const struct drm_i915_private *i915,
> +              const struct intel_uncore_forcewake_domain *d,
> +              const u32 ack,
> +              const u32 value)
> +{
> +       return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack) == value,
> +                              FORCEWAKE_ACK_TIMEOUT_MS);
> +}
> +
> +

Double \n

> +static inline int
> +wait_ack_clear(const struct drm_i915_private *i915,
> +              const struct intel_uncore_forcewake_domain *d,
> +              const u32 ack)
> +{
> +       return __wait_for_ack(i915, d, ack, 0);
> +}
> +
> +static inline int
> +wait_ack_set(const struct drm_i915_private *i915,
> +            const struct intel_uncore_forcewake_domain *d,
> +            const u32 ack)
> +{
> +       return __wait_for_ack(i915, d, ack, ack);
> +}
> +
>  static inline void
>  fw_domain_wait_ack_clear(const struct drm_i915_private *i915,
>                          const struct intel_uncore_forcewake_domain *d)
>  {
> -       if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) &
> -                            FORCEWAKE_KERNEL) == 0,
> -                           FORCEWAKE_ACK_TIMEOUT_MS))
> +       if (wait_ack_clear(i915, d, FORCEWAKE_KERNEL))
>                 DRM_ERROR("%s: timed out waiting for forcewake ack to clear.\n",
>                           intel_uncore_forcewake_domain_to_str(d->id));
>  }
>  
> +static int
> +wait_ack_with_fallback(const struct drm_i915_private *i915,
> +                      const struct intel_uncore_forcewake_domain *d,
> +                      const u32 ack,
> +                      const u32 value)
> +{
> +       int ret;
> +
> +       /*
> +        * There is a possibility of driver's wake request colliding
> +        * with hardware's own wake requests and that can cause
> +        * hardware to not deliver the driver's ack message.
> +        *
> +        * Use a fallback bit toggle to kick the gpu state machine
> +        * in hopes that the original ack will be delivered along with
> +        * the fallback ack.
> +        *
> +        * This workaround is described in HSDES #1604254524
> +        */
> +
> +       wait_ack_clear(i915, d, FORCEWAKE_KERNEL_FALLBACK);
> +       __raw_i915_write32(i915, d->reg_set,
> +                          _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL_FALLBACK));
> +       wait_ack_set(i915, d, FORCEWAKE_KERNEL_FALLBACK);

My mind boggles when it comes to thinking how this interacts with a
second failure of the same type.  For that level of paranoia I think
you really do need to recurse onto the next fallback bit... (And then
unwind the acks all the way back.) If the bug happens twice, the
ack_clear following the clear is shortcircuited, so the subsequent
write+ack_set is ill-defined, and may not even be noticed.

Ok, I can see how this would work if the register was updated on edge
events, and one of those went missing. On the second ack, the refresh of
the register would see both bits set.

I think you want to insert a variable udelay (say 10*pass) between the
FALLBACK write + wait_ack_set(). I would also move the loops together to
make that pass known.

> +
> +       ret = (__raw_i915_read32(i915, d->reg_ack) & ack) == value;
> +
> +       __raw_i915_write32(i915, d->reg_set,
> +                          _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL_FALLBACK));
> +
> +       return ret;
> +}
> +
> +enum ack_type {
> +       ACK_CLEAR = 0,
> +       ACK_SET
> +};
> +
> +static int
> +fw_domain_wait_ack_with_fallback(const struct drm_i915_private *i915,
> +                                const struct intel_uncore_forcewake_domain *d,
> +                                const enum ack_type type)
> +{
> +       int retry = 10;
> +       int ret;
> +
> +       do {
> +               ret = wait_ack_with_fallback(i915, d, FORCEWAKE_KERNEL,
> +                                            type == ACK_SET ?
> +                                            FORCEWAKE_KERNEL : 0);

/me channeling Joonas

Stick that ternary into a local for readability.

> +       } while (ret && --retry);

Since this is the fallback path, we can afford a DRM_DEBUG_DRIVER here.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* ✓ Fi.CI.IGT: success for drm/i915: Fallback to reserve forcewake if primary ack missing (rev2)
  2017-10-26 14:01 [PATCH] drm/i915: Fallback to reserve forcewake if primary ack missing Mika Kuoppala
                   ` (3 preceding siblings ...)
  2017-10-27 14:38 ` ✓ Fi.CI.BAT: success for drm/i915: Fallback to reserve forcewake if primary ack missing (rev2) Patchwork
@ 2017-10-27 16:07 ` Patchwork
  2017-10-30 12:27 ` [PATCH] drm/i915: Fallback to reserve forcewake if primary ack missing Mika Kuoppala
  2017-10-30 12:49 ` ✗ Fi.CI.BAT: warning for drm/i915: Fallback to reserve forcewake if primary ack missing (rev3) Patchwork
  6 siblings, 0 replies; 13+ messages in thread
From: Patchwork @ 2017-10-27 16:07 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Fallback to reserve forcewake if primary ack missing (rev2)
URL   : https://patchwork.freedesktop.org/series/32694/
State : success

== Summary ==

shard-hsw        total:2539 pass:1434 dwarn:0   dfail:0   fail:8   skip:1097 time:9269s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_6235/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH] drm/i915: Fallback to reserve forcewake if primary ack missing
  2017-10-26 14:01 [PATCH] drm/i915: Fallback to reserve forcewake if primary ack missing Mika Kuoppala
                   ` (4 preceding siblings ...)
  2017-10-27 16:07 ` ✓ Fi.CI.IGT: " Patchwork
@ 2017-10-30 12:27 ` Mika Kuoppala
  2017-10-30 13:08   ` Chris Wilson
  2017-10-30 12:49 ` ✗ Fi.CI.BAT: warning for drm/i915: Fallback to reserve forcewake if primary ack missing (rev3) Patchwork
  6 siblings, 1 reply; 13+ messages in thread
From: Mika Kuoppala @ 2017-10-30 12:27 UTC (permalink / raw)
  To: intel-gfx; +Cc: Rodrigo Vivi

There is a possibility on gen9 hardware to miss the forcewake ack
message. The recommended workaround is to use another free
bit and toggle it until original bit is successfully acknowledged.

Some future gen9 revs might or might not fix the underlying issue but
the fallback to reserve bit dance can be considered as harmless:
without the ack timeout we never reach the reserve bit forcewake.
Thus as of now we adopt a blanket approach for all gen9 and leave
the bypassing the reserve bit approach for future patches if
corresponding hw revisions do appear.

Commit 83e3337204b2 ("drm/i915: Increase maximum polling time to 50ms
for forcewake request/clear ack") did increase the forcewake timeout.
If the issue was a delayed ack, future work could include finding
a suitable timeout value both for primary ack and reserve toggle
to reduce the worst case latency.

v2: use bit 15, naming, comment (Chris), only wait fallback ack
v3: fix return on fallback, backoff after fallback write (Chris)

References: HSDES #1604254524
References: https://bugs.freedesktop.org/show_bug.cgi?id=102051
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h     |   5 +-
 drivers/gpu/drm/i915/intel_uncore.c | 137 +++++++++++++++++++++++++++++++++---
 2 files changed, 130 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 8c775e96b4e4..f0f8f6059652 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -7774,8 +7774,9 @@ enum {
 #define  FORCEWAKE_ACK_MEDIA_GEN9		_MMIO(0x0D88)
 #define  FORCEWAKE_ACK_RENDER_GEN9		_MMIO(0x0D84)
 #define  FORCEWAKE_ACK_BLITTER_GEN9		_MMIO(0x130044)
-#define   FORCEWAKE_KERNEL			0x1
-#define   FORCEWAKE_USER			0x2
+#define   FORCEWAKE_KERNEL			BIT(0)
+#define   FORCEWAKE_USER			BIT(1)
+#define   FORCEWAKE_KERNEL_FALLBACK		BIT(15)
 #define  FORCEWAKE_MT_ACK			_MMIO(0x130040)
 #define  ECOBUS					_MMIO(0xa180)
 #define    FORCEWAKE_MT_ENABLE			(1<<5)
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 96ee6b2754be..5a2810774ebb 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -69,17 +69,104 @@ fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d)
 			       HRTIMER_MODE_REL);
 }
 
+static inline int
+__wait_for_ack(const struct drm_i915_private *i915,
+	       const struct intel_uncore_forcewake_domain *d,
+	       const u32 ack,
+	       const u32 value)
+{
+	return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack) == value,
+			       FORCEWAKE_ACK_TIMEOUT_MS);
+}
+
+static inline int
+wait_ack_clear(const struct drm_i915_private *i915,
+	       const struct intel_uncore_forcewake_domain *d,
+	       const u32 ack)
+{
+	return __wait_for_ack(i915, d, ack, 0);
+}
+
+static inline int
+wait_ack_set(const struct drm_i915_private *i915,
+	     const struct intel_uncore_forcewake_domain *d,
+	     const u32 ack)
+{
+	return __wait_for_ack(i915, d, ack, ack);
+}
+
 static inline void
 fw_domain_wait_ack_clear(const struct drm_i915_private *i915,
 			 const struct intel_uncore_forcewake_domain *d)
 {
-	if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) &
-			     FORCEWAKE_KERNEL) == 0,
-			    FORCEWAKE_ACK_TIMEOUT_MS))
+	if (wait_ack_clear(i915, d, FORCEWAKE_KERNEL))
 		DRM_ERROR("%s: timed out waiting for forcewake ack to clear.\n",
 			  intel_uncore_forcewake_domain_to_str(d->id));
 }
 
+enum ack_type {
+	ACK_CLEAR = 0,
+	ACK_SET
+};
+
+static int
+fw_domain_wait_ack_with_fallback(const struct drm_i915_private *i915,
+				 const struct intel_uncore_forcewake_domain *d,
+				 const enum ack_type type)
+{
+	const u32 ack_bit = FORCEWAKE_KERNEL;
+	const u32 value = type == ACK_SET ? ack_bit : 0;
+	unsigned int pass = 0;
+	bool ack_detected;
+
+	/*
+	 * There is a possibility of driver's wake request colliding
+	 * with hardware's own wake requests and that can cause
+	 * hardware to not deliver the driver's ack message.
+	 *
+	 * Use a fallback bit toggle to kick the gpu state machine
+	 * in hopes that the original ack will be delivered along with
+	 * the fallback ack.
+	 *
+	 * This workaround is described in HSDES #1604254524
+	 */
+
+	do {
+		wait_ack_clear(i915, d, FORCEWAKE_KERNEL_FALLBACK);
+
+		__raw_i915_write32(i915, d->reg_set,
+				   _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL_FALLBACK));
+		/* Give gt some time to relax before the polling frenzy */
+		udelay(10 * pass);
+		wait_ack_set(i915, d, FORCEWAKE_KERNEL_FALLBACK);
+
+		ack_detected = (__raw_i915_read32(i915, d->reg_ack) & ack_bit) == value;
+
+		__raw_i915_write32(i915, d->reg_set,
+				   _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL_FALLBACK));
+		pass++;
+	} while (!ack_detected && pass < 10);
+
+	DRM_DEBUG_DRIVER("%s had to use fallback to %s ack, 0x%x (passes %u)\n",
+			 intel_uncore_forcewake_domain_to_str(d->id),
+			 type == ACK_SET ? "set" : "clear",
+			 __raw_i915_read32(i915, d->reg_ack),
+			 pass);
+
+	return ack_detected ? 0 : -ETIMEDOUT;
+}
+
+static inline void
+fw_domain_wait_ack_clear_fallback(const struct drm_i915_private *i915,
+				  const struct intel_uncore_forcewake_domain *d)
+{
+	if (likely(!wait_ack_clear(i915, d, FORCEWAKE_KERNEL)))
+		return;
+
+	if (fw_domain_wait_ack_with_fallback(i915, d, ACK_CLEAR))
+		fw_domain_wait_ack_clear(i915, d);
+}
+
 static inline void
 fw_domain_get(struct drm_i915_private *i915,
 	      const struct intel_uncore_forcewake_domain *d)
@@ -88,17 +175,26 @@ fw_domain_get(struct drm_i915_private *i915,
 }
 
 static inline void
-fw_domain_wait_ack(const struct drm_i915_private *i915,
-		   const struct intel_uncore_forcewake_domain *d)
+fw_domain_wait_ack_set(const struct drm_i915_private *i915,
+		       const struct intel_uncore_forcewake_domain *d)
 {
-	if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) &
-			     FORCEWAKE_KERNEL),
-			    FORCEWAKE_ACK_TIMEOUT_MS))
+	if (wait_ack_set(i915, d, FORCEWAKE_KERNEL))
 		DRM_ERROR("%s: timed out waiting for forcewake ack request.\n",
 			  intel_uncore_forcewake_domain_to_str(d->id));
 }
 
 static inline void
+fw_domain_wait_ack_set_fallback(const struct drm_i915_private *i915,
+				const struct intel_uncore_forcewake_domain *d)
+{
+	if (likely(!wait_ack_set(i915, d, FORCEWAKE_KERNEL)))
+		return;
+
+	if (fw_domain_wait_ack_with_fallback(i915, d, ACK_SET))
+		fw_domain_wait_ack_set(i915, d);
+}
+
+static inline void
 fw_domain_put(const struct drm_i915_private *i915,
 	      const struct intel_uncore_forcewake_domain *d)
 {
@@ -119,7 +215,27 @@ fw_domains_get(struct drm_i915_private *i915, enum forcewake_domains fw_domains)
 	}
 
 	for_each_fw_domain_masked(d, fw_domains, i915, tmp)
-		fw_domain_wait_ack(i915, d);
+		fw_domain_wait_ack_set(i915, d);
+
+	i915->uncore.fw_domains_active |= fw_domains;
+}
+
+static void
+fw_domains_get_with_fallback(struct drm_i915_private *i915,
+			     enum forcewake_domains fw_domains)
+{
+	struct intel_uncore_forcewake_domain *d;
+	unsigned int tmp;
+
+	GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains);
+
+	for_each_fw_domain_masked(d, fw_domains, i915, tmp) {
+		fw_domain_wait_ack_clear_fallback(i915, d);
+		fw_domain_get(i915, d);
+	}
+
+	for_each_fw_domain_masked(d, fw_domains, i915, tmp)
+		fw_domain_wait_ack_set_fallback(i915, d);
 
 	i915->uncore.fw_domains_active |= fw_domains;
 }
@@ -1142,7 +1258,8 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
 	}
 
 	if (INTEL_GEN(dev_priv) >= 9) {
-		dev_priv->uncore.funcs.force_wake_get = fw_domains_get;
+		dev_priv->uncore.funcs.force_wake_get =
+			fw_domains_get_with_fallback;
 		dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
 		fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
 			       FORCEWAKE_RENDER_GEN9,
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* ✗ Fi.CI.BAT: warning for drm/i915: Fallback to reserve forcewake if primary ack missing (rev3)
  2017-10-26 14:01 [PATCH] drm/i915: Fallback to reserve forcewake if primary ack missing Mika Kuoppala
                   ` (5 preceding siblings ...)
  2017-10-30 12:27 ` [PATCH] drm/i915: Fallback to reserve forcewake if primary ack missing Mika Kuoppala
@ 2017-10-30 12:49 ` Patchwork
  6 siblings, 0 replies; 13+ messages in thread
From: Patchwork @ 2017-10-30 12:49 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Fallback to reserve forcewake if primary ack missing (rev3)
URL   : https://patchwork.freedesktop.org/series/32694/
State : warning

== Summary ==

Series 32694v3 drm/i915: Fallback to reserve forcewake if primary ack missing
https://patchwork.freedesktop.org/api/1.0/series/32694/revisions/3/mbox/

Test gem_exec_nop:
        Subgroup basic-parallel:
                dmesg-fail -> PASS       (fi-glk-dsi) fdo#103514 +4
        Subgroup basic-series:
                skip       -> PASS       (fi-glk-dsi)
Test gem_exec_parallel:
        Subgroup basic:
                skip       -> PASS       (fi-glk-dsi)
Test gem_exec_reloc:
        Subgroup basic-cpu:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-gtt:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-cpu-gtt:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-gtt-cpu:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-cpu-read:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-gtt-read:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-write-cpu:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-write-gtt:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-write-read:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-cpu-noreloc:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-gtt-noreloc:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-cpu-gtt-noreloc:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-gtt-cpu-noreloc:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-cpu-read-noreloc:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-gtt-read-noreloc:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-write-cpu-noreloc:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-write-gtt-noreloc:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-write-read-noreloc:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-cpu-active:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-gtt-active:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-cpu-gtt-active:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-gtt-cpu-active:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-cpu-read-active:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-gtt-read-active:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-write-cpu-active:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-write-gtt-active:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-write-read-active:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-softpin:
                skip       -> PASS       (fi-glk-dsi)
Test gem_exec_store:
        Subgroup basic-all:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-blt:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-bsd:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-default:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-render:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-vebox:
                skip       -> PASS       (fi-glk-dsi)
Test gem_exec_suspend:
        Subgroup basic:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-s3:
                skip       -> PASS       (fi-glk-dsi)
        Subgroup basic-s4-devices:
                skip       -> PASS       (fi-glk-dsi)
Test gem_linear_blits:
        Subgroup basic:
                skip       -> PASS       (fi-glk-dsi)
Test gem_render_linear_blits:
        Subgroup basic:
                skip       -> PASS       (fi-glk-dsi)
Test gem_render_tiled_blits:
        Subgroup basic:
                skip       -> PASS       (fi-glk-dsi)
Test gem_ringfill:
        Subgroup basic-default:
                skip       -> PASS       (fi-glk-dsi)
WARNING: Long output truncated

13309d478b5f447643fe16666d62bc22e92b9171 drm-tip: 2017y-10m-30d-10h-04m-34s UTC integration manifest
e5791f94b4ff drm/i915: Fallback to reserve forcewake if primary ack missing

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_6259/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/i915: Fallback to reserve forcewake if primary ack missing
  2017-10-30 12:27 ` [PATCH] drm/i915: Fallback to reserve forcewake if primary ack missing Mika Kuoppala
@ 2017-10-30 13:08   ` Chris Wilson
  2017-11-02 11:27     ` Mika Kuoppala
  0 siblings, 1 reply; 13+ messages in thread
From: Chris Wilson @ 2017-10-30 13:08 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx; +Cc: Rodrigo Vivi

Quoting Mika Kuoppala (2017-10-30 12:27:07)
> There is a possibility on gen9 hardware to miss the forcewake ack
> message. The recommended workaround is to use another free
> bit and toggle it until original bit is successfully acknowledged.
> 
> Some future gen9 revs might or might not fix the underlying issue but
> the fallback to reserve bit dance can be considered as harmless:
> without the ack timeout we never reach the reserve bit forcewake.
> Thus as of now we adopt a blanket approach for all gen9 and leave
> the bypassing the reserve bit approach for future patches if
> corresponding hw revisions do appear.
> 
> Commit 83e3337204b2 ("drm/i915: Increase maximum polling time to 50ms
> for forcewake request/clear ack") did increase the forcewake timeout.
> If the issue was a delayed ack, future work could include finding
> a suitable timeout value both for primary ack and reserve toggle
> to reduce the worst case latency.
> 
> v2: use bit 15, naming, comment (Chris), only wait fallback ack
> v3: fix return on fallback, backoff after fallback write (Chris)
> 
> References: HSDES #1604254524
> References: https://bugs.freedesktop.org/show_bug.cgi?id=102051
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
> +static int
> +fw_domain_wait_ack_with_fallback(const struct drm_i915_private *i915,
> +                                const struct intel_uncore_forcewake_domain *d,
> +                                const enum ack_type type)
> +{
> +       const u32 ack_bit = FORCEWAKE_KERNEL;
> +       const u32 value = type == ACK_SET ? ack_bit : 0;
> +       unsigned int pass = 0;
> +       bool ack_detected;
> +
> +       /*
> +        * There is a possibility of driver's wake request colliding
> +        * with hardware's own wake requests and that can cause
> +        * hardware to not deliver the driver's ack message.
> +        *
> +        * Use a fallback bit toggle to kick the gpu state machine
> +        * in hopes that the original ack will be delivered along with
> +        * the fallback ack.

s/in hopes/in the hope/

> +        *
> +        * This workaround is described in HSDES #1604254524
> +        */
> +
> +       do {
> +               wait_ack_clear(i915, d, FORCEWAKE_KERNEL_FALLBACK);
> +
> +               __raw_i915_write32(i915, d->reg_set,
> +                                  _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL_FALLBACK));
> +               /* Give gt some time to relax before the polling frenzy */
> +               udelay(10 * pass);
> +               wait_ack_set(i915, d, FORCEWAKE_KERNEL_FALLBACK);

I would have started from pass=1 (i.e. udelay(10)) as we already have a
0-delay for the primary wait_ack before we hit the fallback.

> +
> +               ack_detected = (__raw_i915_read32(i915, d->reg_ack) & ack_bit) == value;
> +
> +               __raw_i915_write32(i915, d->reg_set,
> +                                  _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL_FALLBACK));
> +               pass++;
> +       } while (!ack_detected && pass < 10);

	unsigned int pass = 1;
	do {
		...
	} while (!ack_detected && pass++ < 10);

> +
> +       DRM_DEBUG_DRIVER("%s had to use fallback to %s ack, 0x%x (passes %u)\n",
> +                        intel_uncore_forcewake_domain_to_str(d->id),
> +                        type == ACK_SET ? "set" : "clear",
> +                        __raw_i915_read32(i915, d->reg_ack),
> +                        pass);
> +
> +       return ack_detected ? 0 : -ETIMEDOUT;
> +}

I was going to say a-b, but given the state machine we've deduced that
explains why this w/a has any chance of succeeding, I feel a

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

is justified. (If it's wrong, I'm equally culpable ;)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/i915: Fallback to reserve forcewake if primary ack missing
  2017-10-30 13:08   ` Chris Wilson
@ 2017-11-02 11:27     ` Mika Kuoppala
  0 siblings, 0 replies; 13+ messages in thread
From: Mika Kuoppala @ 2017-11-02 11:27 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Rodrigo Vivi

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2017-10-30 12:27:07)
>> There is a possibility on gen9 hardware to miss the forcewake ack
>> message. The recommended workaround is to use another free
>> bit and toggle it until original bit is successfully acknowledged.
>> 
>> Some future gen9 revs might or might not fix the underlying issue but
>> the fallback to reserve bit dance can be considered as harmless:
>> without the ack timeout we never reach the reserve bit forcewake.
>> Thus as of now we adopt a blanket approach for all gen9 and leave
>> the bypassing the reserve bit approach for future patches if
>> corresponding hw revisions do appear.
>> 
>> Commit 83e3337204b2 ("drm/i915: Increase maximum polling time to 50ms
>> for forcewake request/clear ack") did increase the forcewake timeout.
>> If the issue was a delayed ack, future work could include finding
>> a suitable timeout value both for primary ack and reserve toggle
>> to reduce the worst case latency.
>> 
>> v2: use bit 15, naming, comment (Chris), only wait fallback ack
>> v3: fix return on fallback, backoff after fallback write (Chris)
>> 
>> References: HSDES #1604254524
>> References: https://bugs.freedesktop.org/show_bug.cgi?id=102051
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>> Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
>> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>> ---
>> +static int
>> +fw_domain_wait_ack_with_fallback(const struct drm_i915_private *i915,
>> +                                const struct intel_uncore_forcewake_domain *d,
>> +                                const enum ack_type type)
>> +{
>> +       const u32 ack_bit = FORCEWAKE_KERNEL;
>> +       const u32 value = type == ACK_SET ? ack_bit : 0;
>> +       unsigned int pass = 0;
>> +       bool ack_detected;
>> +
>> +       /*
>> +        * There is a possibility of driver's wake request colliding
>> +        * with hardware's own wake requests and that can cause
>> +        * hardware to not deliver the driver's ack message.
>> +        *
>> +        * Use a fallback bit toggle to kick the gpu state machine
>> +        * in hopes that the original ack will be delivered along with
>> +        * the fallback ack.
>
> s/in hopes/in the hope/
>
>> +        *
>> +        * This workaround is described in HSDES #1604254524
>> +        */
>> +
>> +       do {
>> +               wait_ack_clear(i915, d, FORCEWAKE_KERNEL_FALLBACK);
>> +
>> +               __raw_i915_write32(i915, d->reg_set,
>> +                                  _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL_FALLBACK));
>> +               /* Give gt some time to relax before the polling frenzy */
>> +               udelay(10 * pass);
>> +               wait_ack_set(i915, d, FORCEWAKE_KERNEL_FALLBACK);
>
> I would have started from pass=1 (i.e. udelay(10)) as we already have a
> 0-delay for the primary wait_ack before we hit the fallback.
>
>> +
>> +               ack_detected = (__raw_i915_read32(i915, d->reg_ack) & ack_bit) == value;
>> +
>> +               __raw_i915_write32(i915, d->reg_set,
>> +                                  _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL_FALLBACK));
>> +               pass++;
>> +       } while (!ack_detected && pass < 10);
>
> 	unsigned int pass = 1;
> 	do {
> 		...
> 	} while (!ack_detected && pass++ < 10);
>
>> +
>> +       DRM_DEBUG_DRIVER("%s had to use fallback to %s ack, 0x%x (passes %u)\n",
>> +                        intel_uncore_forcewake_domain_to_str(d->id),
>> +                        type == ACK_SET ? "set" : "clear",
>> +                        __raw_i915_read32(i915, d->reg_ack),
>> +                        pass);
>> +
>> +       return ack_detected ? 0 : -ETIMEDOUT;
>> +}
>
> I was going to say a-b, but given the state machine we've deduced that
> explains why this w/a has any chance of succeeding, I feel a
>

And it should be rather accurately maching the recommendation
of the hw engineers how to handle the issue.

There is a mention that this should be on top of delayed
ack reading of the primary fw request, which we don't have.

How I understand the issue is that without delaying the
primary ack we are more likely to hit the fallback
as the poll is interwined with the primary ack message
getting lost.

But as we now have the big hammer in place, we
can see how it goes and then tweak the delays and
timeouts if need arises.

Our timeouts are quite long and I have a suspicion
that the commit I am referring to in bxt case was
papering over of this issue.

> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

Thanks. Pushed.

> is justified. (If it's wrong, I'm equally culpable ;)

Wouldn't it be nice if we had hardware engineers to
stamp these kind of patches.

-Mika
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2017-11-02 11:28 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-26 14:01 [PATCH] drm/i915: Fallback to reserve forcewake if primary ack missing Mika Kuoppala
2017-10-26 14:24 ` ✗ Fi.CI.BAT: failure for " Patchwork
2017-10-26 14:34 ` [PATCH] " Chris Wilson
2017-10-27  6:34   ` Mika Kuoppala
2017-10-27 14:04 ` Mika Kuoppala
2017-10-27 14:08   ` Mika Kuoppala
2017-10-27 14:38   ` Chris Wilson
2017-10-27 14:38 ` ✓ Fi.CI.BAT: success for drm/i915: Fallback to reserve forcewake if primary ack missing (rev2) Patchwork
2017-10-27 16:07 ` ✓ Fi.CI.IGT: " Patchwork
2017-10-30 12:27 ` [PATCH] drm/i915: Fallback to reserve forcewake if primary ack missing Mika Kuoppala
2017-10-30 13:08   ` Chris Wilson
2017-11-02 11:27     ` Mika Kuoppala
2017-10-30 12:49 ` ✗ Fi.CI.BAT: warning for drm/i915: Fallback to reserve forcewake if primary ack missing (rev3) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.