All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915/vlv: Added write-enable pte bit support
@ 2014-01-09 12:24 akash.goel
  2014-02-06 10:22 ` Goel, Akash
  2014-02-06 21:41 ` Eric Anholt
  0 siblings, 2 replies; 10+ messages in thread
From: akash.goel @ 2014-01-09 12:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: Akash Goel

From: Akash Goel <akash.goel@intel.com>

This adds support for using the write-enable bit in the GTT entry for VLV.
This is handled via a read-only flag in the GEM buffer object which
is then used to check if the write-enable bit has to be set or not
when writing the GTT entries.
Currently by default only the Batch buffer & Ring buffers are being marked
as read only.

Signed-off-by: Akash Goel <akash.goel@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h            | 10 ++++++-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 11 ++++++++
 drivers/gpu/drm/i915/i915_gem_gtt.c        | 45 ++++++++++++++++++++++--------
 drivers/gpu/drm/i915/intel_ringbuffer.c    |  3 ++
 4 files changed, 57 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cc8afff..a3ab8a1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -620,7 +620,8 @@ struct i915_address_space {
 	void (*insert_entries)(struct i915_address_space *vm,
 			       struct sg_table *st,
 			       unsigned int first_entry,
-			       enum i915_cache_level cache_level);
+			       enum i915_cache_level cache_level,
+			       bool gt_ro);
 	void (*cleanup)(struct i915_address_space *vm);
 };
 
@@ -1671,6 +1672,13 @@ struct drm_i915_gem_object {
 	unsigned int pin_display:1;
 
 	/*
+	 * Is the object to be mapped as read-only to the GPU
+	 * Only honoured if hardware has relevant pte bit
+	 */
+	unsigned long gt_ro:1;
+	unsigned long gt_old_ro:1;
+
+	/*
 	 * Is the GPU currently using a fence to access this buffer,
 	 */
 	unsigned int pending_fenced_gpu_access:1;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index bbff8f9..3a15aec 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -164,6 +164,14 @@ eb_lookup_vmas(struct eb_vmas *eb,
 		list_add_tail(&vma->exec_list, &eb->vmas);
 		list_del_init(&obj->obj_exec_link);
 
+		/*
+		 * Currently mark each buffer as r/w by default.
+		 * If we are changing gt_ro, we need to make sure that it
+		 * gets re-mapped on gtt to update the entries.
+		 */
+		obj->gt_old_ro = obj->gt_ro;
+		obj->gt_ro = 0;
+
 		vma->exec_entry = &exec[i];
 		if (eb->and < 0) {
 			eb->lut[i] = vma;
@@ -1153,6 +1161,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	/* take note of the batch buffer before we might reorder the lists */
 	batch_obj = list_entry(eb->vmas.prev, struct i915_vma, exec_list)->obj;
 
+	/* Mark exec buffers as read-only from GPU side by default */
+	batch_obj->gt_ro = 1;
+
 	/* Move the objects en-masse into the GTT, evicting if necessary. */
 	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
 	ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 998f9a0..d87add4 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -290,7 +290,8 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
 				      struct sg_table *pages,
 				      unsigned first_entry,
-				      enum i915_cache_level cache_level)
+				      enum i915_cache_level cache_level,
+				      bool unused)
 {
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
@@ -792,11 +793,13 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 				      struct sg_table *pages,
 				      unsigned first_entry,
-				      enum i915_cache_level cache_level)
+				      enum i915_cache_level cache_level,
+				      bool gt_ro)
 {
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
 	gen6_gtt_pte_t *pt_vaddr;
+	gen6_gtt_pte_t pte;
 	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
 	unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
 	struct sg_page_iter sg_iter;
@@ -806,7 +809,16 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 		dma_addr_t page_addr;
 
 		page_addr = sg_page_iter_dma_address(&sg_iter);
-		pt_vaddr[act_pte] = vm->pte_encode(page_addr, cache_level, true);
+		pte  = vm->pte_encode(page_addr, cache_level, true);
+		if (IS_VALLEYVIEW(vm->dev)) {
+			/* Handle read-only request */
+			if (gt_ro)
+				pte &= ~BYT_PTE_WRITEABLE;
+			else
+				pte |= BYT_PTE_WRITEABLE;
+		}
+		pt_vaddr[act_pte] = pte;
+
 		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
 			kunmap_atomic(pt_vaddr);
 			act_pt++;
@@ -996,7 +1008,7 @@ ppgtt_bind_vma(struct i915_vma *vma,
 
 	WARN_ON(flags);
 
-	vma->vm->insert_entries(vma->vm, vma->obj->pages, entry, cache_level);
+	vma->vm->insert_entries(vma->vm, vma->obj->pages, entry, cache_level, vma->obj->gt_ro);
 }
 
 static void ppgtt_unbind_vma(struct i915_vma *vma)
@@ -1167,7 +1179,8 @@ static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)
 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *st,
 				     unsigned int first_entry,
-				     enum i915_cache_level level)
+				     enum i915_cache_level level,
+				     bool unused)
 {
 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
 	gen8_gtt_pte_t __iomem *gtt_entries =
@@ -1214,18 +1227,29 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *st,
 				     unsigned int first_entry,
-				     enum i915_cache_level level)
+				     enum i915_cache_level level,
+				     bool gt_ro)
 {
 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
 	gen6_gtt_pte_t __iomem *gtt_entries =
 		(gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
+	gen6_gtt_pte_t pte;
 	int i = 0;
 	struct sg_page_iter sg_iter;
 	dma_addr_t addr;
 
 	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
 		addr = sg_page_iter_dma_address(&sg_iter);
-		iowrite32(vm->pte_encode(addr, level, true), &gtt_entries[i]);
+		pte  = vm->pte_encode(addr, level, true);
+		if (IS_VALLEYVIEW(vm->dev)) {
+			/* Handle read-only request */
+			if (gt_ro)
+				pte &= ~BYT_PTE_WRITEABLE;
+			else
+				pte |= BYT_PTE_WRITEABLE;
+		}
+		iowrite32(pte, &gtt_entries[i]);
+
 		i++;
 	}
 
@@ -1236,8 +1260,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 	 * hardware should work, we must keep this posting read for paranoia.
 	 */
 	if (i != 0)
-		WARN_ON(readl(&gtt_entries[i-1]) !=
-			vm->pte_encode(addr, level, true));
+		WARN_ON(readl(&gtt_entries[i-1]) != pte);
 
 	/* This next bit makes the above posting read even more important. We
 	 * want to flush the TLBs only after we're certain all the PTE updates
@@ -1350,7 +1373,7 @@ static void ggtt_bind_vma(struct i915_vma *vma,
 		if (!obj->has_global_gtt_mapping ||
 		    (cache_level != obj->cache_level)) {
 			vma->vm->insert_entries(vma->vm, obj->pages, entry,
-						cache_level);
+						cache_level, obj->gt_ro);
 			obj->has_global_gtt_mapping = 1;
 		}
 	}
@@ -1360,7 +1383,7 @@ static void ggtt_bind_vma(struct i915_vma *vma,
 	     (cache_level != obj->cache_level))) {
 		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
 		appgtt->base.insert_entries(&appgtt->base,
-					    vma->obj->pages, entry, cache_level);
+					    vma->obj->pages, entry, cache_level, obj->gt_ro);
 		vma->obj->has_aliasing_ppgtt_mapping = 1;
 	}
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 442c9a6..d257bb3 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1352,6 +1352,9 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 		goto err_hws;
 	}
 
+	/* mark ring buffers as read-only from GPU side by default */
+	obj->gt_ro = 1;
+
 	ring->obj = obj;
 
 	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, true, false);
-- 
1.8.5.2

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] drm/i915/vlv: Added write-enable pte bit support
  2014-01-09 12:24 [PATCH] drm/i915/vlv: Added write-enable pte bit support akash.goel
@ 2014-02-06 10:22 ` Goel, Akash
  2014-02-06 11:56   ` Chris Wilson
  2014-02-06 21:41 ` Eric Anholt
  1 sibling, 1 reply; 10+ messages in thread
From: Goel, Akash @ 2014-02-06 10:22 UTC (permalink / raw)
  To: intel-gfx

Please kindly review this patch.

Best regards
Akash
-----Original Message-----
From: Goel, Akash 
Sent: Thursday, January 09, 2014 5:55 PM
To: intel-gfx@lists.freedesktop.org
Cc: Goel, Akash
Subject: [PATCH] drm/i915/vlv: Added write-enable pte bit support

From: Akash Goel <akash.goel@intel.com>

This adds support for using the write-enable bit in the GTT entry for VLV.
This is handled via a read-only flag in the GEM buffer object which is then used to check if the write-enable bit has to be set or not when writing the GTT entries.
Currently by default only the Batch buffer & Ring buffers are being marked as read only.

Signed-off-by: Akash Goel <akash.goel@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h            | 10 ++++++-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 11 ++++++++
 drivers/gpu/drm/i915/i915_gem_gtt.c        | 45 ++++++++++++++++++++++--------
 drivers/gpu/drm/i915/intel_ringbuffer.c    |  3 ++
 4 files changed, 57 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index cc8afff..a3ab8a1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -620,7 +620,8 @@ struct i915_address_space {
 	void (*insert_entries)(struct i915_address_space *vm,
 			       struct sg_table *st,
 			       unsigned int first_entry,
-			       enum i915_cache_level cache_level);
+			       enum i915_cache_level cache_level,
+			       bool gt_ro);
 	void (*cleanup)(struct i915_address_space *vm);  };
 
@@ -1671,6 +1672,13 @@ struct drm_i915_gem_object {
 	unsigned int pin_display:1;
 
 	/*
+	 * Is the object to be mapped as read-only to the GPU
+	 * Only honoured if hardware has relevant pte bit
+	 */
+	unsigned long gt_ro:1;
+	unsigned long gt_old_ro:1;
+
+	/*
 	 * Is the GPU currently using a fence to access this buffer,
 	 */
 	unsigned int pending_fenced_gpu_access:1; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index bbff8f9..3a15aec 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -164,6 +164,14 @@ eb_lookup_vmas(struct eb_vmas *eb,
 		list_add_tail(&vma->exec_list, &eb->vmas);
 		list_del_init(&obj->obj_exec_link);
 
+		/*
+		 * Currently mark each buffer as r/w by default.
+		 * If we are changing gt_ro, we need to make sure that it
+		 * gets re-mapped on gtt to update the entries.
+		 */
+		obj->gt_old_ro = obj->gt_ro;
+		obj->gt_ro = 0;
+
 		vma->exec_entry = &exec[i];
 		if (eb->and < 0) {
 			eb->lut[i] = vma;
@@ -1153,6 +1161,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	/* take note of the batch buffer before we might reorder the lists */
 	batch_obj = list_entry(eb->vmas.prev, struct i915_vma, exec_list)->obj;
 
+	/* Mark exec buffers as read-only from GPU side by default */
+	batch_obj->gt_ro = 1;
+
 	/* Move the objects en-masse into the GTT, evicting if necessary. */
 	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
 	ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 998f9a0..d87add4 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -290,7 +290,8 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,  static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
 				      struct sg_table *pages,
 				      unsigned first_entry,
-				      enum i915_cache_level cache_level)
+				      enum i915_cache_level cache_level,
+				      bool unused)
 {
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base); @@ -792,11 +793,13 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,  static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 				      struct sg_table *pages,
 				      unsigned first_entry,
-				      enum i915_cache_level cache_level)
+				      enum i915_cache_level cache_level,
+				      bool gt_ro)
 {
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
 	gen6_gtt_pte_t *pt_vaddr;
+	gen6_gtt_pte_t pte;
 	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
 	unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
 	struct sg_page_iter sg_iter;
@@ -806,7 +809,16 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 		dma_addr_t page_addr;
 
 		page_addr = sg_page_iter_dma_address(&sg_iter);
-		pt_vaddr[act_pte] = vm->pte_encode(page_addr, cache_level, true);
+		pte  = vm->pte_encode(page_addr, cache_level, true);
+		if (IS_VALLEYVIEW(vm->dev)) {
+			/* Handle read-only request */
+			if (gt_ro)
+				pte &= ~BYT_PTE_WRITEABLE;
+			else
+				pte |= BYT_PTE_WRITEABLE;
+		}
+		pt_vaddr[act_pte] = pte;
+
 		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
 			kunmap_atomic(pt_vaddr);
 			act_pt++;
@@ -996,7 +1008,7 @@ ppgtt_bind_vma(struct i915_vma *vma,
 
 	WARN_ON(flags);
 
-	vma->vm->insert_entries(vma->vm, vma->obj->pages, entry, cache_level);
+	vma->vm->insert_entries(vma->vm, vma->obj->pages, entry, cache_level, 
+vma->obj->gt_ro);
 }
 
 static void ppgtt_unbind_vma(struct i915_vma *vma) @@ -1167,7 +1179,8 @@ static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)  static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *st,
 				     unsigned int first_entry,
-				     enum i915_cache_level level)
+				     enum i915_cache_level level,
+				     bool unused)
 {
 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
 	gen8_gtt_pte_t __iomem *gtt_entries =
@@ -1214,18 +1227,29 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,  static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *st,
 				     unsigned int first_entry,
-				     enum i915_cache_level level)
+				     enum i915_cache_level level,
+				     bool gt_ro)
 {
 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
 	gen6_gtt_pte_t __iomem *gtt_entries =
 		(gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
+	gen6_gtt_pte_t pte;
 	int i = 0;
 	struct sg_page_iter sg_iter;
 	dma_addr_t addr;
 
 	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
 		addr = sg_page_iter_dma_address(&sg_iter);
-		iowrite32(vm->pte_encode(addr, level, true), &gtt_entries[i]);
+		pte  = vm->pte_encode(addr, level, true);
+		if (IS_VALLEYVIEW(vm->dev)) {
+			/* Handle read-only request */
+			if (gt_ro)
+				pte &= ~BYT_PTE_WRITEABLE;
+			else
+				pte |= BYT_PTE_WRITEABLE;
+		}
+		iowrite32(pte, &gtt_entries[i]);
+
 		i++;
 	}
 
@@ -1236,8 +1260,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 	 * hardware should work, we must keep this posting read for paranoia.
 	 */
 	if (i != 0)
-		WARN_ON(readl(&gtt_entries[i-1]) !=
-			vm->pte_encode(addr, level, true));
+		WARN_ON(readl(&gtt_entries[i-1]) != pte);
 
 	/* This next bit makes the above posting read even more important. We
 	 * want to flush the TLBs only after we're certain all the PTE updates @@ -1350,7 +1373,7 @@ static void ggtt_bind_vma(struct i915_vma *vma,
 		if (!obj->has_global_gtt_mapping ||
 		    (cache_level != obj->cache_level)) {
 			vma->vm->insert_entries(vma->vm, obj->pages, entry,
-						cache_level);
+						cache_level, obj->gt_ro);
 			obj->has_global_gtt_mapping = 1;
 		}
 	}
@@ -1360,7 +1383,7 @@ static void ggtt_bind_vma(struct i915_vma *vma,
 	     (cache_level != obj->cache_level))) {
 		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
 		appgtt->base.insert_entries(&appgtt->base,
-					    vma->obj->pages, entry, cache_level);
+					    vma->obj->pages, entry, cache_level, obj->gt_ro);
 		vma->obj->has_aliasing_ppgtt_mapping = 1;
 	}
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 442c9a6..d257bb3 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1352,6 +1352,9 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 		goto err_hws;
 	}
 
+	/* mark ring buffers as read-only from GPU side by default */
+	obj->gt_ro = 1;
+
 	ring->obj = obj;
 
 	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, true, false);
--
1.8.5.2

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] drm/i915/vlv: Added write-enable pte bit support
  2014-02-06 10:22 ` Goel, Akash
@ 2014-02-06 11:56   ` Chris Wilson
  2014-02-06 17:36     ` Daniel Vetter
  0 siblings, 1 reply; 10+ messages in thread
From: Chris Wilson @ 2014-02-06 11:56 UTC (permalink / raw)
  To: Goel, Akash; +Cc: intel-gfx

On Thu, Feb 06, 2014 at 10:22:28AM +0000, Goel, Akash wrote:
> Please kindly review this patch.
> 
> Best regards
> Akash
> -----Original Message-----
> From: Goel, Akash 
> Sent: Thursday, January 09, 2014 5:55 PM
> To: intel-gfx@lists.freedesktop.org
> Cc: Goel, Akash
> Subject: [PATCH] drm/i915/vlv: Added write-enable pte bit support
> 
> From: Akash Goel <akash.goel@intel.com>
> 
> This adds support for using the write-enable bit in the GTT entry for VLV.
> This is handled via a read-only flag in the GEM buffer object which is then used to check if the write-enable bit has to be set or not when writing the GTT entries.
> Currently by default only the Batch buffer & Ring buffers are being marked as read only.

Don't cause us to rewrite the PTE for the batch buffer between each
execbuffer (ro for the batch, rw next time it gets used as a texture).
In fact, do not change ro without user intervention.

gt_old_ro is unused

Use byt_pte_encode() instead of hacking the result of
ppgtt->pte_encode().

Consider expanding i915_cache_level so that it included the concept of
PROT_READ | PROT_WRITE.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] drm/i915/vlv: Added write-enable pte bit support
  2014-02-06 11:56   ` Chris Wilson
@ 2014-02-06 17:36     ` Daniel Vetter
  2014-02-07  5:44       ` Goel, Akash
  0 siblings, 1 reply; 10+ messages in thread
From: Daniel Vetter @ 2014-02-06 17:36 UTC (permalink / raw)
  To: Chris Wilson, Goel, Akash, intel-gfx

On Thu, Feb 06, 2014 at 11:56:37AM +0000, Chris Wilson wrote:
> On Thu, Feb 06, 2014 at 10:22:28AM +0000, Goel, Akash wrote:
> > Please kindly review this patch.
> > 
> > Best regards
> > Akash
> > -----Original Message-----
> > From: Goel, Akash 
> > Sent: Thursday, January 09, 2014 5:55 PM
> > To: intel-gfx@lists.freedesktop.org
> > Cc: Goel, Akash
> > Subject: [PATCH] drm/i915/vlv: Added write-enable pte bit support
> > 
> > From: Akash Goel <akash.goel@intel.com>
> > 
> > This adds support for using the write-enable bit in the GTT entry for VLV.
> > This is handled via a read-only flag in the GEM buffer object which is then used to check if the write-enable bit has to be set or not when writing the GTT entries.
> > Currently by default only the Batch buffer & Ring buffers are being marked as read only.
> 
> Don't cause us to rewrite the PTE for the batch buffer between each
> execbuffer (ro for the batch, rw next time it gets used as a texture).
> In fact, do not change ro without user intervention.
> 
> gt_old_ro is unused
> 
> Use byt_pte_encode() instead of hacking the result of
> ppgtt->pte_encode().
> 
> Consider expanding i915_cache_level so that it included the concept of
> PROT_READ | PROT_WRITE.

Also, what's the exact use-case for this here? And if we need to expose
this to userspace, then it needs a testcase.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] drm/i915/vlv: Added write-enable pte bit support
  2014-01-09 12:24 [PATCH] drm/i915/vlv: Added write-enable pte bit support akash.goel
  2014-02-06 10:22 ` Goel, Akash
@ 2014-02-06 21:41 ` Eric Anholt
  2014-02-07  9:39   ` Goel, Akash
  1 sibling, 1 reply; 10+ messages in thread
From: Eric Anholt @ 2014-02-06 21:41 UTC (permalink / raw)
  To: intel-gfx; +Cc: Akash Goel


[-- Attachment #1.1: Type: text/plain, Size: 553 bytes --]

akash.goel@intel.com writes:

> From: Akash Goel <akash.goel@intel.com>
>
> This adds support for using the write-enable bit in the GTT entry for VLV.
> This is handled via a read-only flag in the GEM buffer object which
> is then used to check if the write-enable bit has to be set or not
> when writing the GTT entries.
> Currently by default only the Batch buffer & Ring buffers are being marked
> as read only.

What happens when we GTT-mapped write a batchbuffer that had previously
been silently made RO by the kernel?  Does the CPU take a fault?

[-- Attachment #1.2: Type: application/pgp-signature, Size: 818 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] drm/i915/vlv: Added write-enable pte bit support
  2014-02-06 17:36     ` Daniel Vetter
@ 2014-02-07  5:44       ` Goel, Akash
  2014-02-09 10:34         ` Daniel Vetter
  0 siblings, 1 reply; 10+ messages in thread
From: Goel, Akash @ 2014-02-07  5:44 UTC (permalink / raw)
  To: Daniel Vetter, Chris Wilson, intel-gfx

> Don't cause us to rewrite the PTE for the batch buffer between each execbuffer (ro for the batch, rw next time it gets used as a texture).
> In fact, do not change ro without user intervention.
> gt_old_ro is unused
Sorry we didn't consider this scenario, that a batch buffer could be subsequently used as a different type of buffer also, like as a texture buffer. But is the remap going to have a significant overhead.

>> Use byt_pte_encode() instead of hacking the result of ppgtt->pte_encode().
>> Consider expanding i915_cache_level so that it included the concept of PROT_READ | PROT_WRITE.
Thanks, actually we initially thought of doing it in a same way. Can we overload the i915_cache_level enum with the RO flag, while calling 'insert_entries' for VALLEYVIEW platform. 

>> Also, what's the exact use-case for this here? And if we need to expose this to userspace, then it needs a testcase.
>> -Daniel
Since we have this RO bit available for our disposal on BYT platform, we thought we can avail it in order to have an extra protection on the buffers.
We initially used it only for Ring & Batch buffers as we know, without User's input, that they are supposed to be read-only from GPU side.
For extending this for other buffers, we need a Libdrm level change. 
Or can we use the Read/Write domains information in relocation entries to decide this internally on the Driver side, but that will require some change in the exec-buffer path.

Best Regards
Akash

-----Original Message-----
From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of Daniel Vetter
Sent: Thursday, February 06, 2014 11:06 PM
To: Chris Wilson; Goel, Akash; intel-gfx@lists.freedesktop.org
Subject: Re: [Intel-gfx] [PATCH] drm/i915/vlv: Added write-enable pte bit support

On Thu, Feb 06, 2014 at 11:56:37AM +0000, Chris Wilson wrote:
> On Thu, Feb 06, 2014 at 10:22:28AM +0000, Goel, Akash wrote:
> > Please kindly review this patch.
> > 
> > Best regards
> > Akash
> > -----Original Message-----
> > From: Goel, Akash
> > Sent: Thursday, January 09, 2014 5:55 PM
> > To: intel-gfx@lists.freedesktop.org
> > Cc: Goel, Akash
> > Subject: [PATCH] drm/i915/vlv: Added write-enable pte bit support
> > 
> > From: Akash Goel <akash.goel@intel.com>
> > 
> > This adds support for using the write-enable bit in the GTT entry for VLV.
> > This is handled via a read-only flag in the GEM buffer object which is then used to check if the write-enable bit has to be set or not when writing the GTT entries.
> > Currently by default only the Batch buffer & Ring buffers are being marked as read only.
> 
> Don't cause us to rewrite the PTE for the batch buffer between each 
> execbuffer (ro for the batch, rw next time it gets used as a texture).
> In fact, do not change ro without user intervention.
> 
> gt_old_ro is unused
> 
> Use byt_pte_encode() instead of hacking the result of
> ppgtt->pte_encode().
> 
> Consider expanding i915_cache_level so that it included the concept of 
> PROT_READ | PROT_WRITE.

Also, what's the exact use-case for this here? And if we need to expose this to userspace, then it needs a testcase.
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] drm/i915/vlv: Added write-enable pte bit support
  2014-02-06 21:41 ` Eric Anholt
@ 2014-02-07  9:39   ` Goel, Akash
  2014-02-08 20:13     ` Eric Anholt
  0 siblings, 1 reply; 10+ messages in thread
From: Goel, Akash @ 2014-02-07  9:39 UTC (permalink / raw)
  To: Eric Anholt, intel-gfx; +Cc: Sharma, Ankitprasad R

>> What happens when we GTT-mapped write a batchbuffer that had previously been silently made RO by the kernel?  Does the CPU take a fault?
We tested this particular case, doing the relocation inside the Batch buffer, which is mapped to GTT as read-only, from the exec-buffer path.
On doing so, there were no faults observed on the CPU side.
 
Also we don't see any ring hangs, when forcing the GPU to do a write access on the buffers marked as RO. Just the writes were getting rejected.
 
Best Regards
Akash
-----Original Message-----
From: Eric Anholt [mailto:eric@anholt.net] 
Sent: Friday, February 07, 2014 3:11 AM
To: Goel, Akash; intel-gfx@lists.freedesktop.org
Cc: Goel, Akash
Subject: Re: [Intel-gfx] [PATCH] drm/i915/vlv: Added write-enable pte bit support

akash.goel@intel.com writes:

> From: Akash Goel <akash.goel@intel.com>
>
> This adds support for using the write-enable bit in the GTT entry for VLV.
> This is handled via a read-only flag in the GEM buffer object which is 
> then used to check if the write-enable bit has to be set or not when 
> writing the GTT entries.
> Currently by default only the Batch buffer & Ring buffers are being 
> marked as read only.

What happens when we GTT-mapped write a batchbuffer that had previously been silently made RO by the kernel?  Does the CPU take a fault?

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] drm/i915/vlv: Added write-enable pte bit support
  2014-02-07  9:39   ` Goel, Akash
@ 2014-02-08 20:13     ` Eric Anholt
  0 siblings, 0 replies; 10+ messages in thread
From: Eric Anholt @ 2014-02-08 20:13 UTC (permalink / raw)
  To: Goel, Akash, intel-gfx; +Cc: Sharma, Ankitprasad R


[-- Attachment #1.1: Type: text/plain, Size: 827 bytes --]

"Goel, Akash" <akash.goel@intel.com> writes:

>>> What happens when we GTT-mapped write a batchbuffer that had
> previously been silently made RO by the kernel?  Does the CPU take a
> fault?  We tested this particular case, doing the relocation inside
> the Batch buffer, which is mapped to GTT as read-only, from the
> exec-buffer path.  On doing so, there were no faults observed on the
> CPU side.
>  
> Also we don't see any ring hangs, when forcing the GPU to do a write
> access on the buffers marked as RO. Just the writes were getting
> rejected.

Interesting.  Good to know.

So far we've never done self-modifying batches.  If we could do it, we
had some interesting ideas for handling some classes of state updates,
but I think at least at one point the kernel was banning self-modifying
batches.

[-- Attachment #1.2: Type: application/pgp-signature, Size: 818 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] drm/i915/vlv: Added write-enable pte bit support
  2014-02-07  5:44       ` Goel, Akash
@ 2014-02-09 10:34         ` Daniel Vetter
  2014-02-10 14:45           ` Goel, Akash
  0 siblings, 1 reply; 10+ messages in thread
From: Daniel Vetter @ 2014-02-09 10:34 UTC (permalink / raw)
  To: Goel, Akash; +Cc: intel-gfx

Aside: Your quoting is a bit strange, it looks like part of Chris' mail is
indented differently than other paragraphs from the same mail, which then
in turn match the indent for my reply. And the indent-levels are wrong, my
replay has >> but Chris's has only one >. Rather confusing.

I suggest you switch to a saner mail client ;-)

On Fri, Feb 07, 2014 at 05:44:00AM +0000, Goel, Akash wrote:
> > Don't cause us to rewrite the PTE for the batch buffer between each
> > execbuffer (ro for the batch, rw next time it gets used as a texture).
> > In fact, do not change ro without user intervention.
> > gt_old_ro is unused
> Sorry we didn't consider this scenario, that a batch buffer could be
> subsequently used as a different type of buffer also, like as a texture
> buffer. But is the remap going to have a significant overhead.
> 
> >> Use byt_pte_encode() instead of hacking the result of ppgtt->pte_encode().
> >> Consider expanding i915_cache_level so that it included the concept of PROT_READ | PROT_WRITE.
> Thanks, actually we initially thought of doing it in a same way. Can we
> overload the i915_cache_level enum with the RO flag, while calling
> 'insert_entries' for VALLEYVIEW platform. 
> 
> >> Also, what's the exact use-case for this here? And if we need to expose this to userspace, then it needs a testcase.
> >> -Daniel
> Since we have this RO bit available for our disposal on BYT platform, we
> thought we can avail it in order to have an extra protection on the
> buffers.
> We initially used it only for Ring & Batch buffers as we know, without
> User's input, that they are supposed to be read-only from GPU side.
> For extending this for other buffers, we need a Libdrm level change. 
> Or can we use the Read/Write domains information in relocation entries
> to decide this internally on the Driver side, but that will require some
> change in the exec-buffer path.

I think a start would be to roll the r/o bit out just for ringbuffers.
With that we can then implement the byt_pte_encode stuff and related
infrastructure.

Once we have that we can use it in more places internally, e.g. the
command scanner could copy scanned batches to r/o gem buffers. With that
use-case we'll never have a conflict which requires switching to r/w
access again since the batch buffer scanner gem bos will be on a separate
list.

Once we've knocked down the easy security improvements this allows we can
look at any benefits for userspace-allocated gem buffers. But since the
gpu doesn't fault it's not useful as a debug feature. And for better
isolation we really want ppgtt (which would resolve all these issues even
without r/o bits).
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] drm/i915/vlv: Added write-enable pte bit support
  2014-02-09 10:34         ` Daniel Vetter
@ 2014-02-10 14:45           ` Goel, Akash
  0 siblings, 0 replies; 10+ messages in thread
From: Goel, Akash @ 2014-02-10 14:45 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

Daniel,

>> I think a start would be to roll the r/o bit out just for ringbuffers.
>> With that we can then implement the byt_pte_encode stuff and related infrastructure.

Thanks for the review, so I will modify the patch, to use the ro protection only for the ring buffer.

Best Regards
Akash

-----Original Message-----
From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of Daniel Vetter
Sent: Sunday, February 09, 2014 4:04 PM
To: Goel, Akash
Cc: Daniel Vetter; Chris Wilson; intel-gfx@lists.freedesktop.org
Subject: Re: [Intel-gfx] [PATCH] drm/i915/vlv: Added write-enable pte bit support

Aside: Your quoting is a bit strange, it looks like part of Chris' mail is indented differently than other paragraphs from the same mail, which then in turn match the indent for my reply. And the indent-levels are wrong, my replay has >> but Chris's has only one >. Rather confusing.

I suggest you switch to a saner mail client ;-)

On Fri, Feb 07, 2014 at 05:44:00AM +0000, Goel, Akash wrote:
> > Don't cause us to rewrite the PTE for the batch buffer between each 
> > execbuffer (ro for the batch, rw next time it gets used as a texture).
> > In fact, do not change ro without user intervention.
> > gt_old_ro is unused
> Sorry we didn't consider this scenario, that a batch buffer could be 
> subsequently used as a different type of buffer also, like as a 
> texture buffer. But is the remap going to have a significant overhead.
> 
> >> Use byt_pte_encode() instead of hacking the result of ppgtt->pte_encode().
> >> Consider expanding i915_cache_level so that it included the concept of PROT_READ | PROT_WRITE.
> Thanks, actually we initially thought of doing it in a same way. Can 
> we overload the i915_cache_level enum with the RO flag, while calling 
> 'insert_entries' for VALLEYVIEW platform.
> 
> >> Also, what's the exact use-case for this here? And if we need to expose this to userspace, then it needs a testcase.
> >> -Daniel
> Since we have this RO bit available for our disposal on BYT platform, 
> we thought we can avail it in order to have an extra protection on the 
> buffers.
> We initially used it only for Ring & Batch buffers as we know, without 
> User's input, that they are supposed to be read-only from GPU side.
> For extending this for other buffers, we need a Libdrm level change. 
> Or can we use the Read/Write domains information in relocation entries 
> to decide this internally on the Driver side, but that will require 
> some change in the exec-buffer path.

I think a start would be to roll the r/o bit out just for ringbuffers.
With that we can then implement the byt_pte_encode stuff and related infrastructure.

Once we have that we can use it in more places internally, e.g. the command scanner could copy scanned batches to r/o gem buffers. With that use-case we'll never have a conflict which requires switching to r/w access again since the batch buffer scanner gem bos will be on a separate list.

Once we've knocked down the easy security improvements this allows we can look at any benefits for userspace-allocated gem buffers. But since the gpu doesn't fault it's not useful as a debug feature. And for better isolation we really want ppgtt (which would resolve all these issues even without r/o bits).
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2014-02-10 14:45 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-01-09 12:24 [PATCH] drm/i915/vlv: Added write-enable pte bit support akash.goel
2014-02-06 10:22 ` Goel, Akash
2014-02-06 11:56   ` Chris Wilson
2014-02-06 17:36     ` Daniel Vetter
2014-02-07  5:44       ` Goel, Akash
2014-02-09 10:34         ` Daniel Vetter
2014-02-10 14:45           ` Goel, Akash
2014-02-06 21:41 ` Eric Anholt
2014-02-07  9:39   ` Goel, Akash
2014-02-08 20:13     ` Eric Anholt

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.