All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3] drm/i915: Added write-enable pte bit support
@ 2014-02-11  8:49 akash.goel
  2014-04-24  9:44 ` Goel, Akash
  2014-05-13 22:05 ` Jesse Barnes
  0 siblings, 2 replies; 16+ messages in thread
From: akash.goel @ 2014-02-11  8:49 UTC (permalink / raw)
  To: intel-gfx; +Cc: Akash Goel

From: Akash Goel <akash.goel@intel.com>

This adds support for a write-enable bit in the entry of GTT.
This is handled via a read-only flag in the GEM buffer object which
is then used to see how to set the bit when writing the GTT entries.
Currently by default the Batch buffer & Ring buffers are marked as read only.

v2: Moved the pte override code for read-only bit to 'byt_pte_encode'. (Chris)
    Fixed the issue of leaving 'gt_old_ro' as unused. (Chris)

v3: Removed the 'gt_old_ro' field, now setting RO bit only for Ring Buffers(Daniel).

Signed-off-by: Akash Goel <akash.goel@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |  6 ++++++
 drivers/gpu/drm/i915/i915_gem_gtt.c     | 14 ++++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.c |  3 +++
 3 files changed, 23 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 728b9c3..8dab62e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1699,6 +1699,12 @@ struct drm_i915_gem_object {
 	unsigned int pin_display:1;
 
 	/*
+	 * Is the object to be mapped as read-only to the GPU
+	 * Only honoured if hardware has relevant pte bit
+	 */
+	unsigned long gt_ro:1;
+
+	/*
 	 * Is the GPU currently using a fence to access this buffer,
 	 */
 	unsigned int pending_fenced_gpu_access:1;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 6e858e1..1e02d44 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -151,6 +151,7 @@ static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr,
 
 #define BYT_PTE_WRITEABLE		(1 << 1)
 #define BYT_PTE_SNOOPED_BY_CPU_CACHES	(1 << 2)
+#define BYT_PTE_READ_ONLY		(1 << 31)
 
 static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
 				     enum i915_cache_level level,
@@ -167,6 +168,10 @@ static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
 	if (level != I915_CACHE_NONE)
 		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
 
+	/* Handle read-only request */
+	if (level & BYT_PTE_READ_ONLY)
+		pte &= ~BYT_PTE_WRITEABLE;
+
 	return pte;
 }
 
@@ -810,6 +815,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 		pt_vaddr[act_pte] =
 			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
 				       cache_level, true);
+
 		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
 			kunmap_atomic(pt_vaddr);
 			pt_vaddr = NULL;
@@ -999,6 +1005,10 @@ ppgtt_bind_vma(struct i915_vma *vma,
 
 	WARN_ON(flags);
 
+	if (IS_VALLEYVIEW(vma->vm->dev))
+		if (vma->obj->gt_ro)
+			cache_level |= BYT_PTE_READ_ONLY;
+
 	vma->vm->insert_entries(vma->vm, vma->obj->pages, entry, cache_level);
 }
 
@@ -1336,6 +1346,10 @@ static void ggtt_bind_vma(struct i915_vma *vma,
 	struct drm_i915_gem_object *obj = vma->obj;
 	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
 
+	if (IS_VALLEYVIEW(dev))
+		if (obj->gt_ro)
+			cache_level |= BYT_PTE_READ_ONLY;
+
 	/* If there is no aliasing PPGTT, or the caller needs a global mapping,
 	 * or we have a global mapping already but the cacheability flags have
 	 * changed, set the global PTEs.
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index d897a19..2fee914 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1354,6 +1354,9 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 		goto err_hws;
 	}
 
+	/* mark ring buffers as read-only from GPU side by default */
+	obj->gt_ro = 1;
+
 	ring->obj = obj;
 
 	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, true, false);
-- 
1.8.5.2

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v3] drm/i915: Added write-enable pte bit support
  2014-02-11  8:49 [PATCH v3] drm/i915: Added write-enable pte bit support akash.goel
@ 2014-04-24  9:44 ` Goel, Akash
  2014-05-13 22:05 ` Jesse Barnes
  1 sibling, 0 replies; 16+ messages in thread
From: Goel, Akash @ 2014-04-24  9:44 UTC (permalink / raw)
  To: chris; +Cc: intel-gfx

Hi Chris,

Gentle reminder, Please can you review this patch, have address the review comments.

Best Regards
Akash

-----Original Message-----
From: Goel, Akash 
Sent: Tuesday, February 11, 2014 2:19 PM
To: intel-gfx@lists.freedesktop.org
Cc: Goel, Akash
Subject: [PATCH v3] drm/i915: Added write-enable pte bit support

From: Akash Goel <akash.goel@intel.com>

This adds support for a write-enable bit in the entry of GTT.
This is handled via a read-only flag in the GEM buffer object which is then used to see how to set the bit when writing the GTT entries.
Currently by default the Batch buffer & Ring buffers are marked as read only.

v2: Moved the pte override code for read-only bit to 'byt_pte_encode'. (Chris)
    Fixed the issue of leaving 'gt_old_ro' as unused. (Chris)

v3: Removed the 'gt_old_ro' field, now setting RO bit only for Ring Buffers(Daniel).

Signed-off-by: Akash Goel <akash.goel@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |  6 ++++++
 drivers/gpu/drm/i915/i915_gem_gtt.c     | 14 ++++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.c |  3 +++
 3 files changed, 23 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 728b9c3..8dab62e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1699,6 +1699,12 @@ struct drm_i915_gem_object {
 	unsigned int pin_display:1;
 
 	/*
+	 * Is the object to be mapped as read-only to the GPU
+	 * Only honoured if hardware has relevant pte bit
+	 */
+	unsigned long gt_ro:1;
+
+	/*
 	 * Is the GPU currently using a fence to access this buffer,
 	 */
 	unsigned int pending_fenced_gpu_access:1; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 6e858e1..1e02d44 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -151,6 +151,7 @@ static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr,
 
 #define BYT_PTE_WRITEABLE		(1 << 1)
 #define BYT_PTE_SNOOPED_BY_CPU_CACHES	(1 << 2)
+#define BYT_PTE_READ_ONLY		(1 << 31)
 
 static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
 				     enum i915_cache_level level,
@@ -167,6 +168,10 @@ static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
 	if (level != I915_CACHE_NONE)
 		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
 
+	/* Handle read-only request */
+	if (level & BYT_PTE_READ_ONLY)
+		pte &= ~BYT_PTE_WRITEABLE;
+
 	return pte;
 }
 
@@ -810,6 +815,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 		pt_vaddr[act_pte] =
 			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
 				       cache_level, true);
+
 		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
 			kunmap_atomic(pt_vaddr);
 			pt_vaddr = NULL;
@@ -999,6 +1005,10 @@ ppgtt_bind_vma(struct i915_vma *vma,
 
 	WARN_ON(flags);
 
+	if (IS_VALLEYVIEW(vma->vm->dev))
+		if (vma->obj->gt_ro)
+			cache_level |= BYT_PTE_READ_ONLY;
+
 	vma->vm->insert_entries(vma->vm, vma->obj->pages, entry, cache_level);  }
 
@@ -1336,6 +1346,10 @@ static void ggtt_bind_vma(struct i915_vma *vma,
 	struct drm_i915_gem_object *obj = vma->obj;
 	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
 
+	if (IS_VALLEYVIEW(dev))
+		if (obj->gt_ro)
+			cache_level |= BYT_PTE_READ_ONLY;
+
 	/* If there is no aliasing PPGTT, or the caller needs a global mapping,
 	 * or we have a global mapping already but the cacheability flags have
 	 * changed, set the global PTEs.
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index d897a19..2fee914 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1354,6 +1354,9 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 		goto err_hws;
 	}
 
+	/* mark ring buffers as read-only from GPU side by default */
+	obj->gt_ro = 1;
+
 	ring->obj = obj;
 
 	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, true, false);
--
1.8.5.2

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v3] drm/i915: Added write-enable pte bit support
  2014-02-11  8:49 [PATCH v3] drm/i915: Added write-enable pte bit support akash.goel
  2014-04-24  9:44 ` Goel, Akash
@ 2014-05-13 22:05 ` Jesse Barnes
  2014-05-13 22:30   ` Daniel Vetter
  1 sibling, 1 reply; 16+ messages in thread
From: Jesse Barnes @ 2014-05-13 22:05 UTC (permalink / raw)
  To: akash.goel; +Cc: intel-gfx

On Tue, 11 Feb 2014 14:19:03 +0530
akash.goel@intel.com wrote:

> @@ -810,6 +815,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
>  		pt_vaddr[act_pte] =
>  			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
>  				       cache_level, true);
> +
>  		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
>  			kunmap_atomic(pt_vaddr);
>  			pt_vaddr = NULL;

Some extra whitespace here.

Otherwise:
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

Might be good to expose this as a param too, so userspace could use it
for stuff that shouldn't be written...

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3] drm/i915: Added write-enable pte bit support
  2014-05-13 22:05 ` Jesse Barnes
@ 2014-05-13 22:30   ` Daniel Vetter
  2014-05-13 22:43     ` Jesse Barnes
  0 siblings, 1 reply; 16+ messages in thread
From: Daniel Vetter @ 2014-05-13 22:30 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: akash.goel, intel-gfx

On Tue, May 13, 2014 at 03:05:24PM -0700, Jesse Barnes wrote:
> On Tue, 11 Feb 2014 14:19:03 +0530
> akash.goel@intel.com wrote:
> 
> > @@ -810,6 +815,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
> >  		pt_vaddr[act_pte] =
> >  			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
> >  				       cache_level, true);
> > +
> >  		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
> >  			kunmap_atomic(pt_vaddr);
> >  			pt_vaddr = NULL;
> 
> Some extra whitespace here.
> 
> Otherwise:
> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

Hm, looking at the patch again encoding this into the cache_level enum is
fraught with fun. And due to IS_VLV aliasing chv this will blow up on chv
very likely. My old idea was to eventually add a pte_flags param all over
for this stuff with additional bits.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3] drm/i915: Added write-enable pte bit support
  2014-05-13 22:30   ` Daniel Vetter
@ 2014-05-13 22:43     ` Jesse Barnes
  2014-05-14  8:14       ` Daniel Vetter
  0 siblings, 1 reply; 16+ messages in thread
From: Jesse Barnes @ 2014-05-13 22:43 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: akash.goel, intel-gfx

On Wed, 14 May 2014 00:30:34 +0200
Daniel Vetter <daniel@ffwll.ch> wrote:

> On Tue, May 13, 2014 at 03:05:24PM -0700, Jesse Barnes wrote:
> > On Tue, 11 Feb 2014 14:19:03 +0530
> > akash.goel@intel.com wrote:
> > 
> > > @@ -810,6 +815,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
> > >  		pt_vaddr[act_pte] =
> > >  			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
> > >  				       cache_level, true);
> > > +
> > >  		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
> > >  			kunmap_atomic(pt_vaddr);
> > >  			pt_vaddr = NULL;
> > 
> > Some extra whitespace here.
> > 
> > Otherwise:
> > Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> 
> Hm, looking at the patch again encoding this into the cache_level enum is
> fraught with fun. And due to IS_VLV aliasing chv this will blow up on chv
> very likely. My old idea was to eventually add a pte_flags param all over
> for this stuff with additional bits.

That works too; and yeah CHV is all different here isn't it.  But won't
it go through the gen8 paths anyway?

Ack on the cache_level abuse though; it's an implementation detail that
should be changed if/when this is properly exposed to userspace (or
maybe we shouldn't bother and just use CHV/BDW stuff going forward,
supporting a proper mprotect ioctl).

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3] drm/i915: Added write-enable pte bit support
  2014-05-13 22:43     ` Jesse Barnes
@ 2014-05-14  8:14       ` Daniel Vetter
  2014-05-18  5:57         ` Akash Goel
  0 siblings, 1 reply; 16+ messages in thread
From: Daniel Vetter @ 2014-05-14  8:14 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: akash.goel, intel-gfx

On Tue, May 13, 2014 at 03:43:12PM -0700, Jesse Barnes wrote:
> On Wed, 14 May 2014 00:30:34 +0200
> Daniel Vetter <daniel@ffwll.ch> wrote:
> 
> > On Tue, May 13, 2014 at 03:05:24PM -0700, Jesse Barnes wrote:
> > > On Tue, 11 Feb 2014 14:19:03 +0530
> > > akash.goel@intel.com wrote:
> > > 
> > > > @@ -810,6 +815,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
> > > >  		pt_vaddr[act_pte] =
> > > >  			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
> > > >  				       cache_level, true);
> > > > +
> > > >  		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
> > > >  			kunmap_atomic(pt_vaddr);
> > > >  			pt_vaddr = NULL;
> > > 
> > > Some extra whitespace here.
> > > 
> > > Otherwise:
> > > Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> > 
> > Hm, looking at the patch again encoding this into the cache_level enum is
> > fraught with fun. And due to IS_VLV aliasing chv this will blow up on chv
> > very likely. My old idea was to eventually add a pte_flags param all over
> > for this stuff with additional bits.
> 
> That works too; and yeah CHV is all different here isn't it.  But won't
> it go through the gen8 paths anyway?

Yes, but we have a switch on the cache_level in the gen8 pte encode
function. Since the bit31 gets or'ed in for VLV, it'll hit also chv and
wreak havoc in that specific switch - we'll hit the default case when we
don't expect to.

cache_level = functional behaviour relevant for the kernel's clflushing
code

> Ack on the cache_level abuse though; it's an implementation detail that
> should be changed if/when this is properly exposed to userspace (or
> maybe we shouldn't bother and just use CHV/BDW stuff going forward,
> supporting a proper mprotect ioctl).

Not talking about exposing to userspace, but adding pte_flags to the
low-level platform pte encode functions to prevent the above bugs. I'm ok
with keeping obj->gt_ro.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3] drm/i915: Added write-enable pte bit support
  2014-05-14  8:14       ` Daniel Vetter
@ 2014-05-18  5:57         ` Akash Goel
  2014-05-19  6:56           ` Daniel Vetter
  0 siblings, 1 reply; 16+ messages in thread
From: Akash Goel @ 2014-05-18  5:57 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Wed, 2014-05-14 at 10:14 +0200, Daniel Vetter wrote:
> On Tue, May 13, 2014 at 03:43:12PM -0700, Jesse Barnes wrote:
> > On Wed, 14 May 2014 00:30:34 +0200
> > Daniel Vetter <daniel@ffwll.ch> wrote:
> > 
> > > On Tue, May 13, 2014 at 03:05:24PM -0700, Jesse Barnes wrote:
> > > > On Tue, 11 Feb 2014 14:19:03 +0530
> > > > akash.goel@intel.com wrote:
> > > > 
> > > > > @@ -810,6 +815,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
> > > > >  		pt_vaddr[act_pte] =
> > > > >  			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
> > > > >  				       cache_level, true);
> > > > > +
> > > > >  		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
> > > > >  			kunmap_atomic(pt_vaddr);
> > > > >  			pt_vaddr = NULL;
> > > > 
> > > > Some extra whitespace here.

Thanks, will remove this.

> > > > 
> > > > Otherwise:
> > > > Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> > > 
> > > Hm, looking at the patch again encoding this into the cache_level enum is
> > > fraught with fun. And due to IS_VLV aliasing chv this will blow up on chv
> > > very likely. My old idea was to eventually add a pte_flags param all over
> > > for this stuff with additional bits.
> > 
> > That works too; and yeah CHV is all different here isn't it.  But won't
> > it go through the gen8 paths anyway?
> 
> Yes, but we have a switch on the cache_level in the gen8 pte encode
> function. Since the bit31 gets or'ed in for VLV, it'll hit also chv and
> wreak havoc in that specific switch - we'll hit the default case when we
> don't expect to.
> 
> cache_level = functional behaviour relevant for the kernel's clflushing
> code
> 

As the 'IS_VALLEYVIEW' check will alias with CHV also, can I just update
the condition here, to include 'GEN7' also (IS_GEN7) in the check.

> > Ack on the cache_level abuse though; it's an implementation detail that
> > should be changed if/when this is properly exposed to userspace (or
> > maybe we shouldn't bother and just use CHV/BDW stuff going forward,
> > supporting a proper mprotect ioctl).
> 
> Not talking about exposing to userspace, but adding pte_flags to the
> low-level platform pte encode functions to prevent the above bugs. I'm ok
> with keeping obj->gt_ro.
> -Daniel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3] drm/i915: Added write-enable pte bit support
  2014-05-18  5:57         ` Akash Goel
@ 2014-05-19  6:56           ` Daniel Vetter
  2014-05-19  7:33             ` Akash Goel
  0 siblings, 1 reply; 16+ messages in thread
From: Daniel Vetter @ 2014-05-19  6:56 UTC (permalink / raw)
  To: Akash Goel; +Cc: intel-gfx

On Sun, May 18, 2014 at 11:27:00AM +0530, Akash Goel wrote:
> On Wed, 2014-05-14 at 10:14 +0200, Daniel Vetter wrote:
> > On Tue, May 13, 2014 at 03:43:12PM -0700, Jesse Barnes wrote:
> > > On Wed, 14 May 2014 00:30:34 +0200
> > > Daniel Vetter <daniel@ffwll.ch> wrote:
> > > 
> > > > On Tue, May 13, 2014 at 03:05:24PM -0700, Jesse Barnes wrote:
> > > > > On Tue, 11 Feb 2014 14:19:03 +0530
> > > > > akash.goel@intel.com wrote:
> > > > > 
> > > > > > @@ -810,6 +815,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
> > > > > >  		pt_vaddr[act_pte] =
> > > > > >  			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
> > > > > >  				       cache_level, true);
> > > > > > +
> > > > > >  		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
> > > > > >  			kunmap_atomic(pt_vaddr);
> > > > > >  			pt_vaddr = NULL;
> > > > > 
> > > > > Some extra whitespace here.
> 
> Thanks, will remove this.
> 
> > > > > 
> > > > > Otherwise:
> > > > > Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> > > > 
> > > > Hm, looking at the patch again encoding this into the cache_level enum is
> > > > fraught with fun. And due to IS_VLV aliasing chv this will blow up on chv
> > > > very likely. My old idea was to eventually add a pte_flags param all over
> > > > for this stuff with additional bits.
> > > 
> > > That works too; and yeah CHV is all different here isn't it.  But won't
> > > it go through the gen8 paths anyway?
> > 
> > Yes, but we have a switch on the cache_level in the gen8 pte encode
> > function. Since the bit31 gets or'ed in for VLV, it'll hit also chv and
> > wreak havoc in that specific switch - we'll hit the default case when we
> > don't expect to.
> > 
> > cache_level = functional behaviour relevant for the kernel's clflushing
> > code
> > 
> 
> As the 'IS_VALLEYVIEW' check will alias with CHV also, can I just update
> the condition here, to include 'GEN7' also (IS_GEN7) in the check.

Adding new random bits to an enum which is used all over the place (and
not just in the pte encoding functions) makes the code much harder to
read. Also the code that deals with enum cache_level is all about cache
coherency, which has rather tricky logic.

Hence I expect this choice to cause further bugs down the road, which is
why I don't really like it. My apologies for not spotting this earlier.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3] drm/i915: Added write-enable pte bit support
  2014-05-19  6:56           ` Daniel Vetter
@ 2014-05-19  7:33             ` Akash Goel
  2014-06-01  6:42               ` Akash Goel
  0 siblings, 1 reply; 16+ messages in thread
From: Akash Goel @ 2014-05-19  7:33 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Mon, 2014-05-19 at 08:56 +0200, Daniel Vetter wrote:
> On Sun, May 18, 2014 at 11:27:00AM +0530, Akash Goel wrote:
> > On Wed, 2014-05-14 at 10:14 +0200, Daniel Vetter wrote:
> > > On Tue, May 13, 2014 at 03:43:12PM -0700, Jesse Barnes wrote:
> > > > On Wed, 14 May 2014 00:30:34 +0200
> > > > Daniel Vetter <daniel@ffwll.ch> wrote:
> > > > 
> > > > > On Tue, May 13, 2014 at 03:05:24PM -0700, Jesse Barnes wrote:
> > > > > > On Tue, 11 Feb 2014 14:19:03 +0530
> > > > > > akash.goel@intel.com wrote:
> > > > > > 
> > > > > > > @@ -810,6 +815,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
> > > > > > >  		pt_vaddr[act_pte] =
> > > > > > >  			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
> > > > > > >  				       cache_level, true);
> > > > > > > +
> > > > > > >  		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
> > > > > > >  			kunmap_atomic(pt_vaddr);
> > > > > > >  			pt_vaddr = NULL;
> > > > > > 
> > > > > > Some extra whitespace here.
> > 
> > Thanks, will remove this.
> > 
> > > > > > 
> > > > > > Otherwise:
> > > > > > Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> > > > > 
> > > > > Hm, looking at the patch again encoding this into the cache_level enum is
> > > > > fraught with fun. And due to IS_VLV aliasing chv this will blow up on chv
> > > > > very likely. My old idea was to eventually add a pte_flags param all over
> > > > > for this stuff with additional bits.
> > > > 
> > > > That works too; and yeah CHV is all different here isn't it.  But won't
> > > > it go through the gen8 paths anyway?
> > > 
> > > Yes, but we have a switch on the cache_level in the gen8 pte encode
> > > function. Since the bit31 gets or'ed in for VLV, it'll hit also chv and
> > > wreak havoc in that specific switch - we'll hit the default case when we
> > > don't expect to.
> > > 
> > > cache_level = functional behaviour relevant for the kernel's clflushing
> > > code
> > > 
> > 
> > As the 'IS_VALLEYVIEW' check will alias with CHV also, can I just update
> > the condition here, to include 'GEN7' also (IS_GEN7) in the check.
> 
> Adding new random bits to an enum which is used all over the place (and
> not just in the pte encoding functions) makes the code much harder to
> read. Also the code that deals with enum cache_level is all about cache
> coherency, which has rather tricky logic.
> 
> Hence I expect this choice to cause further bugs down the road, which is
> why I don't really like it. My apologies for not spotting this earlier.
> -Daniel

Hi Daniel,

I understand your concern, but actually (as of now) this bit is only VLV
specific. As per an earlier suggestion from Chris, I decided to
overload the cache_level enum itself, in lieu of adding a new parameter
to all the respective 'xxx_insert_entries' & 'xxx_pte_encode' functions.

And this is being done just before calling the 'xxx_insert_entries'
function, which simply passes the flag as it is to 'xxx_pte_encode'
function. So there may not be any risk here, if we use the appropriate
VLV check.

Best Regards
Akash

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3] drm/i915: Added write-enable pte bit support
  2014-05-19  7:33             ` Akash Goel
@ 2014-06-01  6:42               ` Akash Goel
  2014-06-02  8:19                 ` Daniel Vetter
  0 siblings, 1 reply; 16+ messages in thread
From: Akash Goel @ 2014-06-01  6:42 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Mon, 2014-05-19 at 13:03 +0530, Akash Goel wrote:
> On Mon, 2014-05-19 at 08:56 +0200, Daniel Vetter wrote:
> > On Sun, May 18, 2014 at 11:27:00AM +0530, Akash Goel wrote:
> > > On Wed, 2014-05-14 at 10:14 +0200, Daniel Vetter wrote:
> > > > On Tue, May 13, 2014 at 03:43:12PM -0700, Jesse Barnes wrote:
> > > > > On Wed, 14 May 2014 00:30:34 +0200
> > > > > Daniel Vetter <daniel@ffwll.ch> wrote:
> > > > > 
> > > > > > On Tue, May 13, 2014 at 03:05:24PM -0700, Jesse Barnes wrote:
> > > > > > > On Tue, 11 Feb 2014 14:19:03 +0530
> > > > > > > akash.goel@intel.com wrote:
> > > > > > > 
> > > > > > > > @@ -810,6 +815,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
> > > > > > > >  		pt_vaddr[act_pte] =
> > > > > > > >  			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
> > > > > > > >  				       cache_level, true);
> > > > > > > > +
> > > > > > > >  		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
> > > > > > > >  			kunmap_atomic(pt_vaddr);
> > > > > > > >  			pt_vaddr = NULL;
> > > > > > > 
> > > > > > > Some extra whitespace here.
> > > 
> > > Thanks, will remove this.
> > > 
> > > > > > > 
> > > > > > > Otherwise:
> > > > > > > Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> > > > > > 
> > > > > > Hm, looking at the patch again encoding this into the cache_level enum is
> > > > > > fraught with fun. And due to IS_VLV aliasing chv this will blow up on chv
> > > > > > very likely. My old idea was to eventually add a pte_flags param all over
> > > > > > for this stuff with additional bits.
> > > > > 
> > > > > That works too; and yeah CHV is all different here isn't it.  But won't
> > > > > it go through the gen8 paths anyway?
> > > > 
> > > > Yes, but we have a switch on the cache_level in the gen8 pte encode
> > > > function. Since the bit31 gets or'ed in for VLV, it'll hit also chv and
> > > > wreak havoc in that specific switch - we'll hit the default case when we
> > > > don't expect to.
> > > > 
> > > > cache_level = functional behaviour relevant for the kernel's clflushing
> > > > code
> > > > 
> > > 
> > > As the 'IS_VALLEYVIEW' check will alias with CHV also, can I just update
> > > the condition here, to include 'GEN7' also (IS_GEN7) in the check.
> > 
> > Adding new random bits to an enum which is used all over the place (and
> > not just in the pte encoding functions) makes the code much harder to
> > read. Also the code that deals with enum cache_level is all about cache
> > coherency, which has rather tricky logic.
> > 
> > Hence I expect this choice to cause further bugs down the road, which is
> > why I don't really like it. My apologies for not spotting this earlier.
> > -Daniel
> 
> Hi Daniel,
> 
> I understand your concern, but actually (as of now) this bit is only VLV
> specific. As per an earlier suggestion from Chris, I decided to
> overload the cache_level enum itself, in lieu of adding a new parameter
> to all the respective 'xxx_insert_entries' & 'xxx_pte_encode' functions.
> 
> And this is being done just before calling the 'xxx_insert_entries'
> function, which simply passes the flag as it is to 'xxx_pte_encode'
> function. So there may not be any risk here, if we use the appropriate
> VLV check.
> 

Hi Daniel,

Please can you provide your inputs here.

Best regards
Akash


> Best Regards
> Akash
> 
> 
> 
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3] drm/i915: Added write-enable pte bit support
  2014-06-01  6:42               ` Akash Goel
@ 2014-06-02  8:19                 ` Daniel Vetter
  2014-06-04  4:46                   ` Akash Goel
  2014-06-09  3:06                   ` [PATCH v4] " akash.goel
  0 siblings, 2 replies; 16+ messages in thread
From: Daniel Vetter @ 2014-06-02  8:19 UTC (permalink / raw)
  To: Akash Goel; +Cc: intel-gfx

On Sun, Jun 01, 2014 at 12:12:50PM +0530, Akash Goel wrote:
> On Mon, 2014-05-19 at 13:03 +0530, Akash Goel wrote:
> > On Mon, 2014-05-19 at 08:56 +0200, Daniel Vetter wrote:
> > > On Sun, May 18, 2014 at 11:27:00AM +0530, Akash Goel wrote:
> > > > On Wed, 2014-05-14 at 10:14 +0200, Daniel Vetter wrote:
> > > > > On Tue, May 13, 2014 at 03:43:12PM -0700, Jesse Barnes wrote:
> > > > > > On Wed, 14 May 2014 00:30:34 +0200
> > > > > > Daniel Vetter <daniel@ffwll.ch> wrote:
> > > > > > 
> > > > > > > On Tue, May 13, 2014 at 03:05:24PM -0700, Jesse Barnes wrote:
> > > > > > > > On Tue, 11 Feb 2014 14:19:03 +0530
> > > > > > > > akash.goel@intel.com wrote:
> > > > > > > > 
> > > > > > > > > @@ -810,6 +815,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
> > > > > > > > >  		pt_vaddr[act_pte] =
> > > > > > > > >  			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
> > > > > > > > >  				       cache_level, true);
> > > > > > > > > +
> > > > > > > > >  		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
> > > > > > > > >  			kunmap_atomic(pt_vaddr);
> > > > > > > > >  			pt_vaddr = NULL;
> > > > > > > > 
> > > > > > > > Some extra whitespace here.
> > > > 
> > > > Thanks, will remove this.
> > > > 
> > > > > > > > 
> > > > > > > > Otherwise:
> > > > > > > > Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> > > > > > > 
> > > > > > > Hm, looking at the patch again encoding this into the cache_level enum is
> > > > > > > fraught with fun. And due to IS_VLV aliasing chv this will blow up on chv
> > > > > > > very likely. My old idea was to eventually add a pte_flags param all over
> > > > > > > for this stuff with additional bits.
> > > > > > 
> > > > > > That works too; and yeah CHV is all different here isn't it.  But won't
> > > > > > it go through the gen8 paths anyway?
> > > > > 
> > > > > Yes, but we have a switch on the cache_level in the gen8 pte encode
> > > > > function. Since the bit31 gets or'ed in for VLV, it'll hit also chv and
> > > > > wreak havoc in that specific switch - we'll hit the default case when we
> > > > > don't expect to.
> > > > > 
> > > > > cache_level = functional behaviour relevant for the kernel's clflushing
> > > > > code
> > > > > 
> > > > 
> > > > As the 'IS_VALLEYVIEW' check will alias with CHV also, can I just update
> > > > the condition here, to include 'GEN7' also (IS_GEN7) in the check.
> > > 
> > > Adding new random bits to an enum which is used all over the place (and
> > > not just in the pte encoding functions) makes the code much harder to
> > > read. Also the code that deals with enum cache_level is all about cache
> > > coherency, which has rather tricky logic.
> > > 
> > > Hence I expect this choice to cause further bugs down the road, which is
> > > why I don't really like it. My apologies for not spotting this earlier.
> > > -Daniel
> > 
> > Hi Daniel,
> > 
> > I understand your concern, but actually (as of now) this bit is only VLV
> > specific. As per an earlier suggestion from Chris, I decided to
> > overload the cache_level enum itself, in lieu of adding a new parameter
> > to all the respective 'xxx_insert_entries' & 'xxx_pte_encode' functions.
> > 
> > And this is being done just before calling the 'xxx_insert_entries'
> > function, which simply passes the flag as it is to 'xxx_pte_encode'
> > function. So there may not be any risk here, if we use the appropriate
> > VLV check.
> 
> Please can you provide your inputs here.

I guess you've run into a case where Chris&I disagree. I still think
wiring up a flags parameter to all the pte encode functions is the sane
option to pick here.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3] drm/i915: Added write-enable pte bit support
  2014-06-02  8:19                 ` Daniel Vetter
@ 2014-06-04  4:46                   ` Akash Goel
  2014-06-09  3:06                   ` [PATCH v4] " akash.goel
  1 sibling, 0 replies; 16+ messages in thread
From: Akash Goel @ 2014-06-04  4:46 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Mon, 2014-06-02 at 10:19 +0200, Daniel Vetter wrote:
> On Sun, Jun 01, 2014 at 12:12:50PM +0530, Akash Goel wrote:
> > On Mon, 2014-05-19 at 13:03 +0530, Akash Goel wrote:
> > > On Mon, 2014-05-19 at 08:56 +0200, Daniel Vetter wrote:
> > > > On Sun, May 18, 2014 at 11:27:00AM +0530, Akash Goel wrote:
> > > > > On Wed, 2014-05-14 at 10:14 +0200, Daniel Vetter wrote:
> > > > > > On Tue, May 13, 2014 at 03:43:12PM -0700, Jesse Barnes wrote:
> > > > > > > On Wed, 14 May 2014 00:30:34 +0200
> > > > > > > Daniel Vetter <daniel@ffwll.ch> wrote:
> > > > > > > 
> > > > > > > > On Tue, May 13, 2014 at 03:05:24PM -0700, Jesse Barnes wrote:
> > > > > > > > > On Tue, 11 Feb 2014 14:19:03 +0530
> > > > > > > > > akash.goel@intel.com wrote:
> > > > > > > > > 
> > > > > > > > > > @@ -810,6 +815,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
> > > > > > > > > >  		pt_vaddr[act_pte] =
> > > > > > > > > >  			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
> > > > > > > > > >  				       cache_level, true);
> > > > > > > > > > +
> > > > > > > > > >  		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
> > > > > > > > > >  			kunmap_atomic(pt_vaddr);
> > > > > > > > > >  			pt_vaddr = NULL;
> > > > > > > > > 
> > > > > > > > > Some extra whitespace here.
> > > > > 
> > > > > Thanks, will remove this.
> > > > > 
> > > > > > > > > 
> > > > > > > > > Otherwise:
> > > > > > > > > Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> > > > > > > > 
> > > > > > > > Hm, looking at the patch again encoding this into the cache_level enum is
> > > > > > > > fraught with fun. And due to IS_VLV aliasing chv this will blow up on chv
> > > > > > > > very likely. My old idea was to eventually add a pte_flags param all over
> > > > > > > > for this stuff with additional bits.
> > > > > > > 
> > > > > > > That works too; and yeah CHV is all different here isn't it.  But won't
> > > > > > > it go through the gen8 paths anyway?
> > > > > > 
> > > > > > Yes, but we have a switch on the cache_level in the gen8 pte encode
> > > > > > function. Since the bit31 gets or'ed in for VLV, it'll hit also chv and
> > > > > > wreak havoc in that specific switch - we'll hit the default case when we
> > > > > > don't expect to.
> > > > > > 
> > > > > > cache_level = functional behaviour relevant for the kernel's clflushing
> > > > > > code
> > > > > > 
> > > > > 
> > > > > As the 'IS_VALLEYVIEW' check will alias with CHV also, can I just update
> > > > > the condition here, to include 'GEN7' also (IS_GEN7) in the check.
> > > > 
> > > > Adding new random bits to an enum which is used all over the place (and
> > > > not just in the pte encoding functions) makes the code much harder to
> > > > read. Also the code that deals with enum cache_level is all about cache
> > > > coherency, which has rather tricky logic.
> > > > 
> > > > Hence I expect this choice to cause further bugs down the road, which is
> > > > why I don't really like it. My apologies for not spotting this earlier.
> > > > -Daniel
> > > 
> > > Hi Daniel,
> > > 
> > > I understand your concern, but actually (as of now) this bit is only VLV
> > > specific. As per an earlier suggestion from Chris, I decided to
> > > overload the cache_level enum itself, in lieu of adding a new parameter
> > > to all the respective 'xxx_insert_entries' & 'xxx_pte_encode' functions.
> > > 
> > > And this is being done just before calling the 'xxx_insert_entries'
> > > function, which simply passes the flag as it is to 'xxx_pte_encode'
> > > function. So there may not be any risk here, if we use the appropriate
> > > VLV check.
> > 
> > Please can you provide your inputs here.
> 
> I guess you've run into a case where Chris&I disagree. I still think
> wiring up a flags parameter to all the pte encode functions is the sane
> option to pick here.

Hi Daniel,

Yes I just followed Chris's suggestion, which I also felt was
reasonable. But it's fine, will implement it as per your suggestion
only. I hope that will be acceptable to all.

Best regards
Akash

> -Daniel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v4] drm/i915: Added write-enable pte bit support
  2014-06-02  8:19                 ` Daniel Vetter
  2014-06-04  4:46                   ` Akash Goel
@ 2014-06-09  3:06                   ` akash.goel
  2014-06-16 19:50                     ` Imre Deak
  1 sibling, 1 reply; 16+ messages in thread
From: akash.goel @ 2014-06-09  3:06 UTC (permalink / raw)
  To: intel-gfx; +Cc: sourab.gupta, Akash Goel

From: Akash Goel <akash.goel@intel.com>

This adds support for a write-enable bit in the entry of GTT.
This is handled via a read-only flag in the GEM buffer object which
is then used to see how to set the bit when writing the GTT entries.
Currently by default the Batch buffer & Ring buffers are marked as read only.

v2: Moved the pte override code for read-only bit to 'byt_pte_encode'. (Chris)
    Fixed the issue of leaving 'gt_old_ro' as unused. (Chris)

v3: Removed the 'gt_old_ro' field, now setting RO bit only for Ring Buffers(Daniel).

v4: Added a new 'flags' parameter to all the pte(gen6) encode & insert_entries functions,
    in lieu of overloading the cache_level enum (Daniel).

Signed-off-by: Akash Goel <akash.goel@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |  6 +++++
 drivers/gpu/drm/i915/i915_gem_gtt.c     | 48 ++++++++++++++++++++-------------
 drivers/gpu/drm/i915/i915_gem_gtt.h     |  5 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.c |  3 +++
 4 files changed, 41 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 506386e..dc18aee 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1658,6 +1658,12 @@ struct drm_i915_gem_object {
 	unsigned int pin_display:1;
 
 	/*
+	 * Is the object to be mapped as read-only to the GPU
+	 * Only honoured if hardware has relevant pte bit
+	 */
+	unsigned long gt_ro:1;
+
+	/*
 	 * Is the GPU currently using a fence to access this buffer,
 	 */
 	unsigned int pending_fenced_gpu_access:1;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 8b3cde7..221ea49 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -110,7 +110,7 @@ static inline gen8_ppgtt_pde_t gen8_pde_encode(struct drm_device *dev,
 
 static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
 				     enum i915_cache_level level,
-				     bool valid)
+				     bool valid, u32 unused)
 {
 	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
@@ -132,7 +132,7 @@ static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
 
 static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr,
 				     enum i915_cache_level level,
-				     bool valid)
+				     bool valid, u32 unused)
 {
 	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
@@ -156,7 +156,7 @@ static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr,
 
 static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
 				     enum i915_cache_level level,
-				     bool valid)
+				     bool valid, u32 flags)
 {
 	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
@@ -164,7 +164,8 @@ static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
 	/* Mark the page as writeable.  Other platforms don't have a
 	 * setting for read-only/writable, so this matches that behavior.
 	 */
-	pte |= BYT_PTE_WRITEABLE;
+	if (!(flags & PTE_READ_ONLY))
+		pte |= BYT_PTE_WRITEABLE;
 
 	if (level != I915_CACHE_NONE)
 		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
@@ -174,7 +175,7 @@ static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
 
 static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr,
 				     enum i915_cache_level level,
-				     bool valid)
+				     bool valid, u32 unused)
 {
 	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
 	pte |= HSW_PTE_ADDR_ENCODE(addr);
@@ -187,7 +188,7 @@ static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr,
 
 static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
 				      enum i915_cache_level level,
-				      bool valid)
+				      bool valid, u32 unused)
 {
 	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
 	pte |= HSW_PTE_ADDR_ENCODE(addr);
@@ -301,7 +302,7 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
 				      struct sg_table *pages,
 				      uint64_t start,
-				      enum i915_cache_level cache_level)
+				      enum i915_cache_level cache_level, u32 unused)
 {
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
@@ -639,7 +640,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 	uint32_t pd_entry;
 	int pte, pde;
 
-	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
+	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0);
 
 	pd_addr = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm +
 		ppgtt->pd_offset / sizeof(gen6_gtt_pte_t);
@@ -941,7 +942,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
 	unsigned last_pte, i;
 
-	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
+	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0);
 
 	while (num_entries) {
 		last_pte = first_pte + num_entries;
@@ -964,7 +965,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 				      struct sg_table *pages,
 				      uint64_t start,
-				      enum i915_cache_level cache_level)
+				      enum i915_cache_level cache_level, u32 flags)
 {
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
@@ -981,7 +982,8 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 
 		pt_vaddr[act_pte] =
 			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
-				       cache_level, true);
+				       cache_level, true, flags);
+
 		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
 			kunmap_atomic(pt_vaddr);
 			pt_vaddr = NULL;
@@ -1218,8 +1220,12 @@ ppgtt_bind_vma(struct i915_vma *vma,
 	       enum i915_cache_level cache_level,
 	       u32 flags)
 {
+	if (IS_VALLEYVIEW(vma->vm->dev))
+		if (vma->obj->gt_ro)
+			flags |= PTE_READ_ONLY;
+
 	vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
-				cache_level);
+				cache_level, flags);
 }
 
 static void ppgtt_unbind_vma(struct i915_vma *vma)
@@ -1394,7 +1400,7 @@ static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)
 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *st,
 				     uint64_t start,
-				     enum i915_cache_level level)
+				     enum i915_cache_level level, u32 unused)
 {
 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
 	unsigned first_entry = start >> PAGE_SHIFT;
@@ -1440,7 +1446,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *st,
 				     uint64_t start,
-				     enum i915_cache_level level)
+				     enum i915_cache_level level, u32 flags)
 {
 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
 	unsigned first_entry = start >> PAGE_SHIFT;
@@ -1452,7 +1458,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 
 	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
 		addr = sg_page_iter_dma_address(&sg_iter);
-		iowrite32(vm->pte_encode(addr, level, true), &gtt_entries[i]);
+		iowrite32(vm->pte_encode(addr, level, true, flags), &gtt_entries[i]);
 		i++;
 	}
 
@@ -1464,7 +1470,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 	 */
 	if (i != 0)
 		WARN_ON(readl(&gtt_entries[i-1]) !=
-			vm->pte_encode(addr, level, true));
+			vm->pte_encode(addr, level, true, flags));
 
 	/* This next bit makes the above posting read even more important. We
 	 * want to flush the TLBs only after we're certain all the PTE updates
@@ -1518,7 +1524,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 		 first_entry, num_entries, max_entries))
 		num_entries = max_entries;
 
-	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch);
+	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch, 0);
 
 	for (i = 0; i < num_entries; i++)
 		iowrite32(scratch_pte, &gtt_base[i]);
@@ -1567,6 +1573,10 @@ static void ggtt_bind_vma(struct i915_vma *vma,
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj = vma->obj;
 
+	if (IS_VALLEYVIEW(vma->vm->dev))
+		if (obj->gt_ro)
+			flags |= PTE_READ_ONLY;
+
 	/* If there is no aliasing PPGTT, or the caller needs a global mapping,
 	 * or we have a global mapping already but the cacheability flags have
 	 * changed, set the global PTEs.
@@ -1583,7 +1593,7 @@ static void ggtt_bind_vma(struct i915_vma *vma,
 		    (cache_level != obj->cache_level)) {
 			vma->vm->insert_entries(vma->vm, obj->pages,
 						vma->node.start,
-						cache_level);
+						cache_level, flags);
 			obj->has_global_gtt_mapping = 1;
 		}
 	}
@@ -1595,7 +1605,7 @@ static void ggtt_bind_vma(struct i915_vma *vma,
 		appgtt->base.insert_entries(&appgtt->base,
 					    vma->obj->pages,
 					    vma->node.start,
-					    cache_level);
+					    cache_level, flags);
 		vma->obj->has_aliasing_ppgtt_mapping = 1;
 	}
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 1b96a06..674eaf5 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -197,15 +197,16 @@ struct i915_address_space {
 	/* FIXME: Need a more generic return type */
 	gen6_gtt_pte_t (*pte_encode)(dma_addr_t addr,
 				     enum i915_cache_level level,
-				     bool valid); /* Create a valid PTE */
+				     bool valid, u32 flags); /* Create a valid PTE */
 	void (*clear_range)(struct i915_address_space *vm,
 			    uint64_t start,
 			    uint64_t length,
 			    bool use_scratch);
+#define PTE_READ_ONLY (1<<2)
 	void (*insert_entries)(struct i915_address_space *vm,
 			       struct sg_table *st,
 			       uint64_t start,
-			       enum i915_cache_level cache_level);
+			       enum i915_cache_level cache_level, u32 flags);
 	void (*cleanup)(struct i915_address_space *vm);
 };
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 279488a..ee99971 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1397,6 +1397,9 @@ static int allocate_ring_buffer(struct intel_engine_cs *ring)
 	if (obj == NULL)
 		return -ENOMEM;
 
+	/* mark ring buffers as read-only from GPU side by default */
+	obj->gt_ro = 1;
+
 	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
 	if (ret)
 		goto err_unref;
-- 
1.9.2

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v4] drm/i915: Added write-enable pte bit support
  2014-06-09  3:06                   ` [PATCH v4] " akash.goel
@ 2014-06-16 19:50                     ` Imre Deak
  2014-06-17  5:29                       ` [PATCH v5] " akash.goel
  0 siblings, 1 reply; 16+ messages in thread
From: Imre Deak @ 2014-06-16 19:50 UTC (permalink / raw)
  To: akash.goel; +Cc: intel-gfx, sourab.gupta

Hi Akash,

On Mon, 2014-06-09 at 08:36 +0530, akash.goel@intel.com wrote:
> From: Akash Goel <akash.goel@intel.com>
> 
> This adds support for a write-enable bit in the entry of GTT.
> This is handled via a read-only flag in the GEM buffer object which
> is then used to see how to set the bit when writing the GTT entries.
> Currently by default the Batch buffer & Ring buffers are marked as read only.
> 
> v2: Moved the pte override code for read-only bit to 'byt_pte_encode'. (Chris)
>     Fixed the issue of leaving 'gt_old_ro' as unused. (Chris)
> 
> v3: Removed the 'gt_old_ro' field, now setting RO bit only for Ring Buffers(Daniel).
> 
> v4: Added a new 'flags' parameter to all the pte(gen6) encode & insert_entries functions,
>     in lieu of overloading the cache_level enum (Daniel).
> 
> Signed-off-by: Akash Goel <akash.goel@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h         |  6 +++++
>  drivers/gpu/drm/i915/i915_gem_gtt.c     | 48 ++++++++++++++++++++-------------
>  drivers/gpu/drm/i915/i915_gem_gtt.h     |  5 ++--
>  drivers/gpu/drm/i915/intel_ringbuffer.c |  3 +++
>  4 files changed, 41 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 506386e..dc18aee 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1658,6 +1658,12 @@ struct drm_i915_gem_object {
>  	unsigned int pin_display:1;
>  
>  	/*
> +	 * Is the object to be mapped as read-only to the GPU
> +	 * Only honoured if hardware has relevant pte bit
> +	 */
> +	unsigned long gt_ro:1;
> +
> +	/*
>  	 * Is the GPU currently using a fence to access this buffer,
>  	 */
>  	unsigned int pending_fenced_gpu_access:1;
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 8b3cde7..221ea49 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -110,7 +110,7 @@ static inline gen8_ppgtt_pde_t gen8_pde_encode(struct drm_device *dev,
>  
>  static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
>  				     enum i915_cache_level level,
> -				     bool valid)
> +				     bool valid, u32 unused)
>  {
>  	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
>  	pte |= GEN6_PTE_ADDR_ENCODE(addr);
> @@ -132,7 +132,7 @@ static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
>  
>  static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr,
>  				     enum i915_cache_level level,
> -				     bool valid)
> +				     bool valid, u32 unused)
>  {
>  	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
>  	pte |= GEN6_PTE_ADDR_ENCODE(addr);
> @@ -156,7 +156,7 @@ static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr,
>  
>  static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
>  				     enum i915_cache_level level,
> -				     bool valid)
> +				     bool valid, u32 flags)
>  {
>  	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
>  	pte |= GEN6_PTE_ADDR_ENCODE(addr);
> @@ -164,7 +164,8 @@ static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
>  	/* Mark the page as writeable.  Other platforms don't have a
>  	 * setting for read-only/writable, so this matches that behavior.
>  	 */
> -	pte |= BYT_PTE_WRITEABLE;
> +	if (!(flags & PTE_READ_ONLY))
> +		pte |= BYT_PTE_WRITEABLE;
>  
>  	if (level != I915_CACHE_NONE)
>  		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
> @@ -174,7 +175,7 @@ static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
>  
>  static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr,
>  				     enum i915_cache_level level,
> -				     bool valid)
> +				     bool valid, u32 unused)
>  {
>  	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
>  	pte |= HSW_PTE_ADDR_ENCODE(addr);
> @@ -187,7 +188,7 @@ static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr,
>  
>  static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
>  				      enum i915_cache_level level,
> -				      bool valid)
> +				      bool valid, u32 unused)
>  {
>  	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
>  	pte |= HSW_PTE_ADDR_ENCODE(addr);
> @@ -301,7 +302,7 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
>  static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
>  				      struct sg_table *pages,
>  				      uint64_t start,
> -				      enum i915_cache_level cache_level)
> +				      enum i915_cache_level cache_level, u32 unused)
>  {
>  	struct i915_hw_ppgtt *ppgtt =
>  		container_of(vm, struct i915_hw_ppgtt, base);
> @@ -639,7 +640,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
>  	uint32_t pd_entry;
>  	int pte, pde;
>  
> -	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
> +	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0);
>  
>  	pd_addr = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm +
>  		ppgtt->pd_offset / sizeof(gen6_gtt_pte_t);
> @@ -941,7 +942,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
>  	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
>  	unsigned last_pte, i;
>  
> -	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
> +	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0);
>  
>  	while (num_entries) {
>  		last_pte = first_pte + num_entries;
> @@ -964,7 +965,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
>  static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
>  				      struct sg_table *pages,
>  				      uint64_t start,
> -				      enum i915_cache_level cache_level)
> +				      enum i915_cache_level cache_level, u32 flags)
>  {
>  	struct i915_hw_ppgtt *ppgtt =
>  		container_of(vm, struct i915_hw_ppgtt, base);
> @@ -981,7 +982,8 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
>  
>  		pt_vaddr[act_pte] =
>  			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
> -				       cache_level, true);
> +				       cache_level, true, flags);
> +
>  		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
>  			kunmap_atomic(pt_vaddr);
>  			pt_vaddr = NULL;
> @@ -1218,8 +1220,12 @@ ppgtt_bind_vma(struct i915_vma *vma,
>  	       enum i915_cache_level cache_level,
>  	       u32 flags)
>  {
> +	if (IS_VALLEYVIEW(vma->vm->dev))

The above check is redundant, since with the vlv/byt specific
pte_encode() we'll get already the right behavior.

> +		if (vma->obj->gt_ro)
> +			flags |= PTE_READ_ONLY;
> +
>  	vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
> -				cache_level);
> +				cache_level, flags);
>  }
>  
>  static void ppgtt_unbind_vma(struct i915_vma *vma)
> @@ -1394,7 +1400,7 @@ static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)
>  static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>  				     struct sg_table *st,
>  				     uint64_t start,
> -				     enum i915_cache_level level)
> +				     enum i915_cache_level level, u32 unused)
>  {
>  	struct drm_i915_private *dev_priv = vm->dev->dev_private;
>  	unsigned first_entry = start >> PAGE_SHIFT;
> @@ -1440,7 +1446,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>  static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
>  				     struct sg_table *st,
>  				     uint64_t start,
> -				     enum i915_cache_level level)
> +				     enum i915_cache_level level, u32 flags)
>  {
>  	struct drm_i915_private *dev_priv = vm->dev->dev_private;
>  	unsigned first_entry = start >> PAGE_SHIFT;
> @@ -1452,7 +1458,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
>  
>  	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
>  		addr = sg_page_iter_dma_address(&sg_iter);
> -		iowrite32(vm->pte_encode(addr, level, true), &gtt_entries[i]);
> +		iowrite32(vm->pte_encode(addr, level, true, flags), &gtt_entries[i]);
>  		i++;
>  	}
>  
> @@ -1464,7 +1470,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
>  	 */
>  	if (i != 0)
>  		WARN_ON(readl(&gtt_entries[i-1]) !=
> -			vm->pte_encode(addr, level, true));
> +			vm->pte_encode(addr, level, true, flags));
>  
>  	/* This next bit makes the above posting read even more important. We
>  	 * want to flush the TLBs only after we're certain all the PTE updates
> @@ -1518,7 +1524,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
>  		 first_entry, num_entries, max_entries))
>  		num_entries = max_entries;
>  
> -	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch);
> +	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch, 0);
>  
>  	for (i = 0; i < num_entries; i++)
>  		iowrite32(scratch_pte, &gtt_base[i]);
> @@ -1567,6 +1573,10 @@ static void ggtt_bind_vma(struct i915_vma *vma,
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct drm_i915_gem_object *obj = vma->obj;
>  
> +	if (IS_VALLEYVIEW(vma->vm->dev))

The check is redundant as above.

> +		if (obj->gt_ro)
> +			flags |= PTE_READ_ONLY;
> +
>  	/* If there is no aliasing PPGTT, or the caller needs a global mapping,
>  	 * or we have a global mapping already but the cacheability flags have
>  	 * changed, set the global PTEs.
> @@ -1583,7 +1593,7 @@ static void ggtt_bind_vma(struct i915_vma *vma,
>  		    (cache_level != obj->cache_level)) {
>  			vma->vm->insert_entries(vma->vm, obj->pages,
>  						vma->node.start,
> -						cache_level);
> +						cache_level, flags);
>  			obj->has_global_gtt_mapping = 1;
>  		}
>  	}
> @@ -1595,7 +1605,7 @@ static void ggtt_bind_vma(struct i915_vma *vma,
>  		appgtt->base.insert_entries(&appgtt->base,
>  					    vma->obj->pages,
>  					    vma->node.start,
> -					    cache_level);
> +					    cache_level, flags);
>  		vma->obj->has_aliasing_ppgtt_mapping = 1;
>  	}
>  }
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 1b96a06..674eaf5 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -197,15 +197,16 @@ struct i915_address_space {
>  	/* FIXME: Need a more generic return type */
>  	gen6_gtt_pte_t (*pte_encode)(dma_addr_t addr,
>  				     enum i915_cache_level level,
> -				     bool valid); /* Create a valid PTE */
> +				     bool valid, u32 flags); /* Create a valid PTE */
>  	void (*clear_range)(struct i915_address_space *vm,
>  			    uint64_t start,
>  			    uint64_t length,
>  			    bool use_scratch);
> +#define PTE_READ_ONLY (1<<2)

As this uses the same namespace as GLOBAL_BIND, I would keep their
definitions together. Also I couldn't find any other flag besides
GLOBAL_BIND, in which case this could be (1 << 1).

With the above fixed the patch looks good to me:
Reviewed-by: Imre Deak <imre.deak@intel.com>

>  	void (*insert_entries)(struct i915_address_space *vm,
>  			       struct sg_table *st,
>  			       uint64_t start,
> -			       enum i915_cache_level cache_level);
> +			       enum i915_cache_level cache_level, u32 flags);
>  	void (*cleanup)(struct i915_address_space *vm);
>  };
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 279488a..ee99971 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1397,6 +1397,9 @@ static int allocate_ring_buffer(struct intel_engine_cs *ring)
>  	if (obj == NULL)
>  		return -ENOMEM;
>  
> +	/* mark ring buffers as read-only from GPU side by default */
> +	obj->gt_ro = 1;
> +
>  	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
>  	if (ret)
>  		goto err_unref;

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v5] drm/i915: Added write-enable pte bit support
  2014-06-16 19:50                     ` Imre Deak
@ 2014-06-17  5:29                       ` akash.goel
  2014-06-17  7:22                         ` Daniel Vetter
  0 siblings, 1 reply; 16+ messages in thread
From: akash.goel @ 2014-06-17  5:29 UTC (permalink / raw)
  To: intel-gfx; +Cc: sourab.gupta, Akash Goel

From: Akash Goel <akash.goel@intel.com>

This adds support for a write-enable bit in the entry of GTT.
This is handled via a read-only flag in the GEM buffer object which
is then used to see how to set the bit when writing the GTT entries.
Currently by default the Batch buffer & Ring buffers are marked as read only.

v2: Moved the pte override code for read-only bit to 'byt_pte_encode'. (Chris)
    Fixed the issue of leaving 'gt_old_ro' as unused. (Chris)

v3: Removed the 'gt_old_ro' field, now setting RO bit only for Ring Buffers(Daniel).

v4: Added a new 'flags' parameter to all the pte(gen6) encode & insert_entries functions,
    in lieu of overloading the cache_level enum (Daniel).

v5: Removed the superfluous VLV check & changed the definition location of PTE_READ_ONLY flag (Imre)

Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Akash Goel <akash.goel@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |  6 +++++
 drivers/gpu/drm/i915/i915_gem_gtt.c     | 48 ++++++++++++++++++++-------------
 drivers/gpu/drm/i915/i915_gem_gtt.h     |  5 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.c |  3 +++
 4 files changed, 41 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 64d520f..7e4c272 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1661,6 +1661,12 @@ struct drm_i915_gem_object {
 	unsigned int pin_display:1;
 
 	/*
+	 * Is the object to be mapped as read-only to the GPU
+	 * Only honoured if hardware has relevant pte bit
+	 */
+	unsigned long gt_ro:1;
+
+	/*
 	 * Is the GPU currently using a fence to access this buffer,
 	 */
 	unsigned int pending_fenced_gpu_access:1;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 7f226fa..a4153ee 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -116,7 +116,7 @@ static inline gen8_ppgtt_pde_t gen8_pde_encode(struct drm_device *dev,
 
 static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
 				     enum i915_cache_level level,
-				     bool valid)
+				     bool valid, u32 unused)
 {
 	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
@@ -138,7 +138,7 @@ static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
 
 static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr,
 				     enum i915_cache_level level,
-				     bool valid)
+				     bool valid, u32 unused)
 {
 	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
@@ -162,7 +162,7 @@ static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr,
 
 static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
 				     enum i915_cache_level level,
-				     bool valid)
+				     bool valid, u32 flags)
 {
 	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
@@ -170,7 +170,8 @@ static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
 	/* Mark the page as writeable.  Other platforms don't have a
 	 * setting for read-only/writable, so this matches that behavior.
 	 */
-	pte |= BYT_PTE_WRITEABLE;
+	if (!(flags & PTE_READ_ONLY))
+		pte |= BYT_PTE_WRITEABLE;
 
 	if (level != I915_CACHE_NONE)
 		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
@@ -180,7 +181,7 @@ static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
 
 static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr,
 				     enum i915_cache_level level,
-				     bool valid)
+				     bool valid, u32 unused)
 {
 	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
 	pte |= HSW_PTE_ADDR_ENCODE(addr);
@@ -193,7 +194,7 @@ static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr,
 
 static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
 				      enum i915_cache_level level,
-				      bool valid)
+				      bool valid, u32 unused)
 {
 	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
 	pte |= HSW_PTE_ADDR_ENCODE(addr);
@@ -307,7 +308,7 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
 				      struct sg_table *pages,
 				      uint64_t start,
-				      enum i915_cache_level cache_level)
+				      enum i915_cache_level cache_level, u32 unused)
 {
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
@@ -645,7 +646,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 	uint32_t pd_entry;
 	int pte, pde;
 
-	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
+	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0);
 
 	pd_addr = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm +
 		ppgtt->pd_offset / sizeof(gen6_gtt_pte_t);
@@ -947,7 +948,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
 	unsigned last_pte, i;
 
-	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
+	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0);
 
 	while (num_entries) {
 		last_pte = first_pte + num_entries;
@@ -970,7 +971,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 				      struct sg_table *pages,
 				      uint64_t start,
-				      enum i915_cache_level cache_level)
+				      enum i915_cache_level cache_level, u32 flags)
 {
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
@@ -987,7 +988,8 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 
 		pt_vaddr[act_pte] =
 			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
-				       cache_level, true);
+				       cache_level, true, flags);
+
 		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
 			kunmap_atomic(pt_vaddr);
 			pt_vaddr = NULL;
@@ -1224,8 +1226,12 @@ ppgtt_bind_vma(struct i915_vma *vma,
 	       enum i915_cache_level cache_level,
 	       u32 flags)
 {
+	/* Currently applicable only to VLV */
+	if (vma->obj->gt_ro)
+		flags |= PTE_READ_ONLY;
+
 	vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
-				cache_level);
+				cache_level, flags);
 }
 
 static void ppgtt_unbind_vma(struct i915_vma *vma)
@@ -1400,7 +1406,7 @@ static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)
 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *st,
 				     uint64_t start,
-				     enum i915_cache_level level)
+				     enum i915_cache_level level, u32 unused)
 {
 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
 	unsigned first_entry = start >> PAGE_SHIFT;
@@ -1446,7 +1452,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *st,
 				     uint64_t start,
-				     enum i915_cache_level level)
+				     enum i915_cache_level level, u32 flags)
 {
 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
 	unsigned first_entry = start >> PAGE_SHIFT;
@@ -1458,7 +1464,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 
 	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
 		addr = sg_page_iter_dma_address(&sg_iter);
-		iowrite32(vm->pte_encode(addr, level, true), &gtt_entries[i]);
+		iowrite32(vm->pte_encode(addr, level, true, flags), &gtt_entries[i]);
 		i++;
 	}
 
@@ -1470,7 +1476,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 	 */
 	if (i != 0)
 		WARN_ON(readl(&gtt_entries[i-1]) !=
-			vm->pte_encode(addr, level, true));
+			vm->pte_encode(addr, level, true, flags));
 
 	/* This next bit makes the above posting read even more important. We
 	 * want to flush the TLBs only after we're certain all the PTE updates
@@ -1524,7 +1530,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 		 first_entry, num_entries, max_entries))
 		num_entries = max_entries;
 
-	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch);
+	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch, 0);
 
 	for (i = 0; i < num_entries; i++)
 		iowrite32(scratch_pte, &gtt_base[i]);
@@ -1573,6 +1579,10 @@ static void ggtt_bind_vma(struct i915_vma *vma,
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj = vma->obj;
 
+	/* Currently applicable only to VLV */
+	if (obj->gt_ro)
+		flags |= PTE_READ_ONLY;
+
 	/* If there is no aliasing PPGTT, or the caller needs a global mapping,
 	 * or we have a global mapping already but the cacheability flags have
 	 * changed, set the global PTEs.
@@ -1589,7 +1599,7 @@ static void ggtt_bind_vma(struct i915_vma *vma,
 		    (cache_level != obj->cache_level)) {
 			vma->vm->insert_entries(vma->vm, obj->pages,
 						vma->node.start,
-						cache_level);
+						cache_level, flags);
 			obj->has_global_gtt_mapping = 1;
 		}
 	}
@@ -1601,7 +1611,7 @@ static void ggtt_bind_vma(struct i915_vma *vma,
 		appgtt->base.insert_entries(&appgtt->base,
 					    vma->obj->pages,
 					    vma->node.start,
-					    cache_level);
+					    cache_level, flags);
 		vma->obj->has_aliasing_ppgtt_mapping = 1;
 	}
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 1b96a06..8d6f7c1 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -154,6 +154,7 @@ struct i915_vma {
 	void (*unbind_vma)(struct i915_vma *vma);
 	/* Map an object into an address space with the given cache flags. */
 #define GLOBAL_BIND (1<<0)
+#define PTE_READ_ONLY (1<<1)
 	void (*bind_vma)(struct i915_vma *vma,
 			 enum i915_cache_level cache_level,
 			 u32 flags);
@@ -197,7 +198,7 @@ struct i915_address_space {
 	/* FIXME: Need a more generic return type */
 	gen6_gtt_pte_t (*pte_encode)(dma_addr_t addr,
 				     enum i915_cache_level level,
-				     bool valid); /* Create a valid PTE */
+				     bool valid, u32 flags); /* Create a valid PTE */
 	void (*clear_range)(struct i915_address_space *vm,
 			    uint64_t start,
 			    uint64_t length,
@@ -205,7 +206,7 @@ struct i915_address_space {
 	void (*insert_entries)(struct i915_address_space *vm,
 			       struct sg_table *st,
 			       uint64_t start,
-			       enum i915_cache_level cache_level);
+			       enum i915_cache_level cache_level, u32 flags);
 	void (*cleanup)(struct i915_address_space *vm);
 };
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 0eaaaec..b96edaf 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1397,6 +1397,9 @@ static int allocate_ring_buffer(struct intel_engine_cs *ring)
 	if (obj == NULL)
 		return -ENOMEM;
 
+	/* mark ring buffers as read-only from GPU side by default */
+	obj->gt_ro = 1;
+
 	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
 	if (ret)
 		goto err_unref;
-- 
1.9.2

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v5] drm/i915: Added write-enable pte bit support
  2014-06-17  5:29                       ` [PATCH v5] " akash.goel
@ 2014-06-17  7:22                         ` Daniel Vetter
  0 siblings, 0 replies; 16+ messages in thread
From: Daniel Vetter @ 2014-06-17  7:22 UTC (permalink / raw)
  To: akash.goel; +Cc: intel-gfx, sourab.gupta

On Tue, Jun 17, 2014 at 10:59:42AM +0530, akash.goel@intel.com wrote:
> From: Akash Goel <akash.goel@intel.com>
> 
> This adds support for a write-enable bit in the entry of GTT.
> This is handled via a read-only flag in the GEM buffer object which
> is then used to see how to set the bit when writing the GTT entries.
> Currently by default the Batch buffer & Ring buffers are marked as read only.
> 
> v2: Moved the pte override code for read-only bit to 'byt_pte_encode'. (Chris)
>     Fixed the issue of leaving 'gt_old_ro' as unused. (Chris)
> 
> v3: Removed the 'gt_old_ro' field, now setting RO bit only for Ring Buffers(Daniel).
> 
> v4: Added a new 'flags' parameter to all the pte(gen6) encode & insert_entries functions,
>     in lieu of overloading the cache_level enum (Daniel).
> 
> v5: Removed the superfluous VLV check & changed the definition location of PTE_READ_ONLY flag (Imre)
> 
> Reviewed-by: Imre Deak <imre.deak@intel.com>
> Signed-off-by: Akash Goel <akash.goel@intel.com>

Queued for -next, thanks for the patch.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2014-06-17  7:22 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-02-11  8:49 [PATCH v3] drm/i915: Added write-enable pte bit support akash.goel
2014-04-24  9:44 ` Goel, Akash
2014-05-13 22:05 ` Jesse Barnes
2014-05-13 22:30   ` Daniel Vetter
2014-05-13 22:43     ` Jesse Barnes
2014-05-14  8:14       ` Daniel Vetter
2014-05-18  5:57         ` Akash Goel
2014-05-19  6:56           ` Daniel Vetter
2014-05-19  7:33             ` Akash Goel
2014-06-01  6:42               ` Akash Goel
2014-06-02  8:19                 ` Daniel Vetter
2014-06-04  4:46                   ` Akash Goel
2014-06-09  3:06                   ` [PATCH v4] " akash.goel
2014-06-16 19:50                     ` Imre Deak
2014-06-17  5:29                       ` [PATCH v5] " akash.goel
2014-06-17  7:22                         ` Daniel Vetter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.