From mboxrd@z Thu Jan 1 00:00:00 1970 From: oscar.mateo@intel.com Subject: [PATCH 37/53] drm/i915/bdw: Implement context switching (somewhat) Date: Fri, 13 Jun 2014 16:37:55 +0100 Message-ID: <1402673891-14618-38-git-send-email-oscar.mateo@intel.com> References: <1402673891-14618-1-git-send-email-oscar.mateo@intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by gabe.freedesktop.org (Postfix) with ESMTP id 770756EA82 for ; Fri, 13 Jun 2014 08:43:29 -0700 (PDT) In-Reply-To: <1402673891-14618-1-git-send-email-oscar.mateo@intel.com> List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" To: intel-gfx@lists.freedesktop.org List-Id: intel-gfx@lists.freedesktop.org From: Ben Widawsky A context switch occurs by submitting a context descriptor to the ExecList Submission Port. Given that we can now initialize a context, it's possible to begin implementing the context switch by creating the descriptor and submitting it to ELSP (actually two, since the ELSP has two ports). The context object must be mapped in the GGTT, which means it must exist in the 0-4GB graphics VA range. Signed-off-by: Ben Widawsky v2: This code has changed quite a lot in various rebases. Of particular importance is that now we use the globally unique Submission ID to send to the hardware. Also, context pages are now pinned unconditionally to GGTT, so there is no need to bind them. v3: Use LRCA[31:12] as hwCtxId[19:0]. This guarantees that the HW context ID we submit to the ELSP is globally unique and != 0 (Bspec requirements of the software use-only bits of the Context ID in the Context Descriptor Format) without the hassle of the previous submission Id construction. Also, re-add the ELSP porting read (it was dropped somewhere during the rebases). v4: - Squash with "drm/i915/bdw: Add forcewake lock around ELSP writes" (BSPEC says: "SW must set Force Wakeup bit to prevent GT from entering C6 while ELSP writes are in progress") as noted by Thomas Daniel (thomas.daniel@intel.com). - Rename functions and use an execlists/intel_execlists_ namespace. - The BUG_ON only checked that the LRCA was <32 bits, but it didn't make sure that it was properly aligned. Spotted by Alistair Mcaulay . Signed-off-by: Oscar Mateo --- drivers/gpu/drm/i915/intel_lrc.c | 112 ++++++++++++++++++++++++++++++++++++++- drivers/gpu/drm/i915/intel_lrc.h | 1 + 2 files changed, 112 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c9a5e00..4e8268c 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -47,6 +47,7 @@ #define GEN8_LR_CONTEXT_ALIGN 4096 #define RING_ELSP(ring) ((ring)->mmio_base+0x230) +#define RING_EXECLIST_STATUS(ring) ((ring)->mmio_base+0x234) #define RING_CONTEXT_CONTROL(ring) ((ring)->mmio_base+0x244) #define CTX_LRI_HEADER_0 0x01 @@ -78,6 +79,26 @@ #define CTX_R_PWR_CLK_STATE 0x42 #define CTX_GPGPU_CSR_BASE_ADDRESS 0x44 +#define GEN8_CTX_VALID (1<<0) +#define GEN8_CTX_FORCE_PD_RESTORE (1<<1) +#define GEN8_CTX_FORCE_RESTORE (1<<2) +#define GEN8_CTX_L3LLC_COHERENT (1<<5) +#define GEN8_CTX_PRIVILEGE (1<<8) +enum { + ADVANCED_CONTEXT=0, + LEGACY_CONTEXT, + ADVANCED_AD_CONTEXT, + LEGACY_64B_CONTEXT +}; +#define GEN8_CTX_MODE_SHIFT 3 +enum { + FAULT_AND_HANG=0, + FAULT_AND_HALT, /* Debug only */ + FAULT_AND_STREAM, + FAULT_AND_CONTINUE /* Unsupported */ +}; +#define GEN8_CTX_ID_SHIFT 32 + bool intel_enable_execlists(struct drm_device *dev) { if (!i915.enable_execlists) @@ -86,6 +107,94 @@ bool intel_enable_execlists(struct drm_device *dev) return HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev); } +u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj) +{ + u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj); + + /* LRCA is required to be 4K aligned so the more significant 20 bits + * are globally unique */ + return lrca >> 12; +} + +static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_object *ctx_obj) +{ + uint64_t desc; + uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj); + BUG_ON(lrca & 0xFFFFFFFF00000FFFULL); + + desc = GEN8_CTX_VALID; + desc |= LEGACY_CONTEXT << GEN8_CTX_MODE_SHIFT; + desc |= GEN8_CTX_L3LLC_COHERENT; + desc |= GEN8_CTX_PRIVILEGE; + desc |= lrca; + desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT; + + /* TODO: WaDisableLiteRestore when we start using semaphore + * signalling between Command Streamers */ + /* desc |= GEN8_CTX_FORCE_RESTORE; */ + + return desc; +} + +static void execlists_elsp_write(struct intel_engine_cs *ring, + struct drm_i915_gem_object *ctx_obj0, + struct drm_i915_gem_object *ctx_obj1) +{ + struct drm_i915_private *dev_priv = ring->dev->dev_private; + uint64_t temp = 0; + uint32_t desc[4]; + + /* XXX: You must always write both descriptors in the order below. */ + if (ctx_obj1) + temp = execlists_ctx_descriptor(ctx_obj1); + else + temp = 0; + desc[1] = (u32)(temp >> 32); + desc[0] = (u32)temp; + + temp = execlists_ctx_descriptor(ctx_obj0); + desc[3] = (u32)(temp >> 32); + desc[2] = (u32)temp; + + /* Set Force Wakeup bit to prevent GT from entering C6 while + * ELSP writes are in progress */ + gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); + + I915_WRITE(RING_ELSP(ring), desc[1]); + I915_WRITE(RING_ELSP(ring), desc[0]); + I915_WRITE(RING_ELSP(ring), desc[3]); + /* The context is automatically loaded after the following */ + I915_WRITE(RING_ELSP(ring), desc[2]); + + /* ELSP is a write only register, so this serves as a posting read */ + POSTING_READ(RING_EXECLIST_STATUS(ring)); + + /* Release Force Wakeup */ + gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); +} + +static int execlists_submit_context(struct intel_engine_cs *ring, + struct intel_context *to0, u32 tail0, + struct intel_context *to1, u32 tail1) +{ + struct drm_i915_gem_object *ctx_obj0; + struct drm_i915_gem_object *ctx_obj1 = NULL; + + ctx_obj0 = to0->engine[ring->id].obj; + BUG_ON(!ctx_obj0); + BUG_ON(!i915_gem_obj_is_pinned(ctx_obj0)); + + if (to1) { + ctx_obj1 = to1->engine[ring->id].obj; + BUG_ON(!ctx_obj1); + BUG_ON(!i915_gem_obj_is_pinned(ctx_obj1)); + } + + execlists_elsp_write(ring, ctx_obj0, ctx_obj1); + + return 0; +} + static inline struct intel_ringbuffer * logical_ringbuf_get(struct intel_engine_cs *ring, struct intel_context *ctx) { @@ -763,7 +872,8 @@ static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno) static void gen8_submit_ctx(struct intel_engine_cs *ring, struct intel_context *ctx, u32 value) { - DRM_ERROR("Execlists still not ready!\n"); + /* FIXME: too cheeky, we don't even check if the ELSP is ready */ + execlists_submit_context(ring, ctx, value, NULL, 0); } static int gen8_emit_request(struct intel_engine_cs *ring, diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 0cb7cb5..eeb90ec 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -41,6 +41,7 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, struct intel_engine_cs *ring); /* Execlists */ +u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj); bool intel_enable_execlists(struct drm_device *dev); #endif /* _INTEL_LRC_H_ */ -- 1.9.0