intel-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/4] gtt abstractions
@ 2013-01-24 15:50 Daniel Vetter
  2013-01-24 15:50 ` [PATCH 1/4] drm/i915: vfuncs for gtt_clear_range/insert_entries Daniel Vetter
                   ` (6 more replies)
  0 siblings, 7 replies; 26+ messages in thread
From: Daniel Vetter @ 2013-01-24 15:50 UTC (permalink / raw)
  To: Intel Graphics Development; +Cc: Daniel Vetter

Hi all,

Whith Ben wrestling the gtt code and trying to finally wear us off the intel-gtt
fix I've figured I might as well drop my idea for how we could better organize
the pte writing/clearing.

With these patches there's no a clean cut between the code which manages the
address spaces and the code which writes (global/pp) gtt ptes. Which should
nicely pave the way for cool things to happen, like real per-process address
spaces.

Comments highly welcome.

Cheers, Daniel

Daniel Vetter (4):
  drm/i915: vfuncs for gtt_clear_range/insert_entries
  drm/i915: vfuncs for ppgtt
  drm/i915: pte_encode is gen6+
  drm/i915: extract hw ppgtt setup/cleanup code

 drivers/gpu/drm/i915/i915_drv.h     |  32 +++-
 drivers/gpu/drm/i915/i915_gem_gtt.c | 310 ++++++++++++++++++++----------------
 2 files changed, 200 insertions(+), 142 deletions(-)

-- 
1.7.11.7

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 1/4] drm/i915: vfuncs for gtt_clear_range/insert_entries
  2013-01-24 15:50 [PATCH 0/4] gtt abstractions Daniel Vetter
@ 2013-01-24 15:50 ` Daniel Vetter
  2013-01-24 15:50 ` [PATCH 2/4] drm/i915: vfuncs for ppgtt Daniel Vetter
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 26+ messages in thread
From: Daniel Vetter @ 2013-01-24 15:50 UTC (permalink / raw)
  To: Intel Graphics Development; +Cc: Daniel Vetter

We have a few too many differences between platforms here, so finally
take the prepared abstraction and run with it. A few smaller changes
are required to get things into shape:

- Move i915_cache_level up since we need it in the gt funcs.
- Split up i915_ggtt_clear_range and move the two functions down to
  where the relevant insert_entries functions are.
- Adjust a few function parameter lists.

Now we have 2 functions which deal with the gen6+ global gtt
(gen6_ggtt_ prefix) and 2 functions which deal with the legacy gtt
code in the intel-gtt.c fake agp driver (i915_ggtt_ prefix).

Init is still a bit a mess, but honestly I don't care about that.

One thing I've thought about while deciding on the exact interfaces is
a flag parameter for ->clear_range: We could use that to decide
between writing invalid pte entries or scratch pte entries. In case we
ever get around to fixing all our bugs which currently prevent us from
filling the gtt with empty ptes for the truly unused ranges ...

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h     |  21 +++++--
 drivers/gpu/drm/i915/i915_gem_gtt.c | 121 ++++++++++++++++++++----------------
 2 files changed, 83 insertions(+), 59 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0a798ee..5e224af 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -308,9 +308,24 @@ struct drm_i915_display_funcs {
 	/* pll clock increase/decrease */
 };
 
+enum i915_cache_level {
+	I915_CACHE_NONE = 0,
+	I915_CACHE_LLC,
+	I915_CACHE_LLC_MLC, /* gen6+, in docs at least! */
+};
+
 struct drm_i915_gt_funcs {
 	void (*force_wake_get)(struct drm_i915_private *dev_priv);
 	void (*force_wake_put)(struct drm_i915_private *dev_priv);
+
+	/* global gtt ops */
+	void (*gtt_clear_range)(struct drm_device *dev,
+				unsigned int first_entry,
+				unsigned int num_entries);
+	void (*gtt_insert_entries)(struct drm_device *dev,
+				   struct sg_table *st,
+				   unsigned int pg_start,
+				   enum i915_cache_level cache_level);
 };
 
 #define DEV_INFO_FLAGS \
@@ -1025,12 +1040,6 @@ enum hdmi_force_audio {
 	HDMI_AUDIO_ON,			/* force turn on HDMI audio */
 };
 
-enum i915_cache_level {
-	I915_CACHE_NONE = 0,
-	I915_CACHE_LLC,
-	I915_CACHE_LLC_MLC, /* gen6+, in docs at least! */
-};
-
 #define I915_GTT_RESERVED ((struct drm_mm_node *)0x1)
 
 struct drm_i915_gem_object_ops {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index a0ba4a9..1698d63 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -367,40 +367,14 @@ static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
 		dev_priv->mm.interruptible = interruptible;
 }
 
-static void i915_ggtt_clear_range(struct drm_device *dev,
-				 unsigned first_entry,
-				 unsigned num_entries)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	gtt_pte_t scratch_pte;
-	gtt_pte_t __iomem *gtt_base = (gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
-	const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry;
-	int i;
-
-	if (INTEL_INFO(dev)->gen < 6) {
-		intel_gtt_clear_range(first_entry, num_entries);
-		return;
-	}
-
-	if (WARN(num_entries > max_entries,
-		 "First entry = %d; Num entries = %d (max=%d)\n",
-		 first_entry, num_entries, max_entries))
-		num_entries = max_entries;
-
-	scratch_pte = pte_encode(dev, dev_priv->gtt.scratch_page_dma, I915_CACHE_LLC);
-	for (i = 0; i < num_entries; i++)
-		iowrite32(scratch_pte, &gtt_base[i]);
-	readl(gtt_base);
-}
-
 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj;
 
 	/* First fill our portion of the GTT with scratch pages */
-	i915_ggtt_clear_range(dev, dev_priv->gtt.start / PAGE_SIZE,
-			      dev_priv->gtt.total / PAGE_SIZE);
+	dev_priv->gt.gtt_clear_range(dev, dev_priv->gtt.start / PAGE_SIZE,
+				     dev_priv->gtt.total / PAGE_SIZE);
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) {
 		i915_gem_clflush_object(obj);
@@ -429,15 +403,13 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
  * within the global GTT as well as accessible by the GPU through the GMADR
  * mapped BAR (dev_priv->mm.gtt->gtt).
  */
-static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj,
-				  enum i915_cache_level level)
+static void gen6_ggtt_insert_entries(struct drm_device *dev,
+				     struct sg_table *st,
+				     unsigned int first_entry,
+				     enum i915_cache_level level)
 {
-	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct sg_table *st = obj->pages;
 	struct scatterlist *sg = st->sgl;
-	const int first_entry = obj->gtt_space->start >> PAGE_SHIFT;
-	const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry;
 	gtt_pte_t __iomem *gtt_entries =
 		(gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
 	int unused, i = 0;
@@ -453,9 +425,6 @@ static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj,
 		}
 	}
 
-	BUG_ON(i > max_entries);
-	BUG_ON(i != obj->base.size / PAGE_SIZE);
-
 	/* XXX: This serves as a posting read to make sure that the PTE has
 	 * actually been updated. There is some concern that even though
 	 * registers and PTEs are within the same BAR that they are potentially
@@ -473,28 +442,69 @@ static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj,
 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
 }
 
+static void gen6_ggtt_clear_range(struct drm_device *dev,
+				  unsigned int first_entry,
+				  unsigned int num_entries)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	gtt_pte_t scratch_pte;
+	gtt_pte_t __iomem *gtt_base = (gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
+	const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry;
+	int i;
+
+	if (WARN(num_entries > max_entries,
+		 "First entry = %d; Num entries = %d (max=%d)\n",
+		 first_entry, num_entries, max_entries))
+		num_entries = max_entries;
+
+	scratch_pte = pte_encode(dev, dev_priv->gtt.scratch_page_dma, I915_CACHE_LLC);
+	for (i = 0; i < num_entries; i++)
+		iowrite32(scratch_pte, &gtt_base[i]);
+	readl(gtt_base);
+}
+
+
+static void i915_ggtt_insert_entries(struct drm_device *dev,
+				     struct sg_table *st,
+				     unsigned int pg_start,
+				     enum i915_cache_level cache_level)
+{
+	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
+
+	intel_gtt_insert_sg_entries(st, pg_start, flags);
+
+}
+
+static void i915_ggtt_clear_range(struct drm_device *dev,
+				  unsigned int first_entry,
+				  unsigned int num_entries)
+{
+	intel_gtt_clear_range(first_entry, num_entries);
+}
+
+
 void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
 			      enum i915_cache_level cache_level)
 {
 	struct drm_device *dev = obj->base.dev;
-	if (INTEL_INFO(dev)->gen < 6) {
-		unsigned int flags = (cache_level == I915_CACHE_NONE) ?
-			AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
-		intel_gtt_insert_sg_entries(obj->pages,
-					    obj->gtt_space->start >> PAGE_SHIFT,
-					    flags);
-	} else {
-		gen6_ggtt_bind_object(obj, cache_level);
-	}
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	dev_priv->gt.gtt_insert_entries(dev, obj->pages,
+					obj->gtt_space->start >> PAGE_SHIFT,
+					cache_level);
 
 	obj->has_global_gtt_mapping = 1;
 }
 
 void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj)
 {
-	i915_ggtt_clear_range(obj->base.dev,
-			      obj->gtt_space->start >> PAGE_SHIFT,
-			      obj->base.size >> PAGE_SHIFT);
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	dev_priv->gt.gtt_clear_range(obj->base.dev,
+				     obj->gtt_space->start >> PAGE_SHIFT,
+				     obj->base.size >> PAGE_SHIFT);
 
 	obj->has_global_gtt_mapping = 0;
 }
@@ -570,13 +580,12 @@ void i915_gem_setup_global_gtt(struct drm_device *dev,
 			     hole_start, hole_end) {
 		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
 			      hole_start, hole_end);
-		i915_ggtt_clear_range(dev,
-				      hole_start / PAGE_SIZE,
-				      (hole_end-hole_start) / PAGE_SIZE);
+		dev_priv->gt.gtt_clear_range(dev, hole_start / PAGE_SIZE,
+					     (hole_end-hole_start) / PAGE_SIZE);
 	}
 
 	/* And finally clear the reserved guard page */
-	i915_ggtt_clear_range(dev, end / PAGE_SIZE - 1, 1);
+	dev_priv->gt.gtt_clear_range(dev, end / PAGE_SIZE - 1, 1);
 }
 
 static bool
@@ -718,6 +727,9 @@ int i915_gem_gtt_init(struct drm_device *dev)
 
 		dev_priv->gtt.do_idle_maps = needs_idle_maps(dev);
 
+		dev_priv->gt.gtt_clear_range = i915_ggtt_clear_range;
+		dev_priv->gt.gtt_insert_entries = i915_ggtt_insert_entries;
+
 		return 0;
 	}
 
@@ -771,6 +783,9 @@ int i915_gem_gtt_init(struct drm_device *dev)
 	DRM_DEBUG_DRIVER("GMADR size = %ldM\n", dev_priv->gtt.mappable_end >> 20);
 	DRM_DEBUG_DRIVER("GTT stolen size = %dM\n", dev_priv->mm.gtt->stolen_size >> 20);
 
+	dev_priv->gt.gtt_clear_range = gen6_ggtt_clear_range;
+	dev_priv->gt.gtt_insert_entries = gen6_ggtt_insert_entries;
+
 	return 0;
 
 err_out:
-- 
1.7.11.7

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 2/4] drm/i915: vfuncs for ppgtt
  2013-01-24 15:50 [PATCH 0/4] gtt abstractions Daniel Vetter
  2013-01-24 15:50 ` [PATCH 1/4] drm/i915: vfuncs for gtt_clear_range/insert_entries Daniel Vetter
@ 2013-01-24 15:50 ` Daniel Vetter
  2013-01-24 15:50 ` [PATCH 3/4] drm/i915: pte_encode is gen6+ Daniel Vetter
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 26+ messages in thread
From: Daniel Vetter @ 2013-01-24 15:50 UTC (permalink / raw)
  To: Intel Graphics Development; +Cc: Daniel Vetter

Like for the global gtt we want a notch more flexibility here. Only
big change (besides a few tiny function parameter adjustments) was to
move gen6_ppgtt_insert_entries up (and remove _sg_ from its name, we
only have one kind of insert_entries since the last gtt cleanup).

A follow-up patch will also extract the init/cleanup code a bit.

With this we have the hw details of pte writing nicely hidden away
behind a bit of abstraction. Which should pave the way for
different/multiple ppgtts (e.g. what we need for real ppgtt support).

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h     |  10 +++-
 drivers/gpu/drm/i915/i915_gem_gtt.c | 111 ++++++++++++++++++------------------
 2 files changed, 65 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5e224af..4316ede 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -414,6 +414,15 @@ struct i915_hw_ppgtt {
 	uint32_t pd_offset;
 	dma_addr_t *pt_dma_addr;
 	dma_addr_t scratch_page_dma_addr;
+
+	/* pte functions, mirroring the interface of the global gtt. */
+	void (*clear_range)(struct i915_hw_ppgtt *ppgtt,
+			    unsigned int first_entry,
+			    unsigned int num_entries);
+	void (*insert_entries)(struct i915_hw_ppgtt *ppgtt,
+			       struct sg_table *st,
+			       unsigned int pg_start,
+			       enum i915_cache_level cache_level);
 };
 
 
@@ -1664,7 +1673,6 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 				   struct drm_file *file);
 
 /* i915_gem_gtt.c */
-int __must_check i915_gem_init_aliasing_ppgtt(struct drm_device *dev);
 void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev);
 void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
 			    struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 1698d63..250d150 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -77,7 +77,7 @@ static inline gtt_pte_t pte_encode(struct drm_device *dev,
 }
 
 /* PPGTT support for Sandybdrige/Gen6 and later */
-static void i915_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
+static void gen6_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
 				   unsigned first_entry,
 				   unsigned num_entries)
 {
@@ -108,7 +108,51 @@ static void i915_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
 	}
 }
 
-int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
+static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt,
+				      struct sg_table *pages,
+				      unsigned first_entry,
+				      enum i915_cache_level cache_level)
+{
+	gtt_pte_t *pt_vaddr;
+	unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
+	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
+	unsigned i, j, m, segment_len;
+	dma_addr_t page_addr;
+	struct scatterlist *sg;
+
+	/* init sg walking */
+	sg = pages->sgl;
+	i = 0;
+	segment_len = sg_dma_len(sg) >> PAGE_SHIFT;
+	m = 0;
+
+	while (i < pages->nents) {
+		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]);
+
+		for (j = first_pte; j < I915_PPGTT_PT_ENTRIES; j++) {
+			page_addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
+			pt_vaddr[j] = pte_encode(ppgtt->dev, page_addr,
+						 cache_level);
+
+			/* grab the next page */
+			if (++m == segment_len) {
+				if (++i == pages->nents)
+					break;
+
+				sg = sg_next(sg);
+				segment_len = sg_dma_len(sg) >> PAGE_SHIFT;
+				m = 0;
+			}
+		}
+
+		kunmap_atomic(pt_vaddr);
+
+		first_pte = 0;
+		act_pd++;
+	}
+}
+
+static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct i915_hw_ppgtt *ppgtt;
@@ -127,6 +171,8 @@ int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
 
 	ppgtt->dev = dev;
 	ppgtt->num_pd_entries = I915_PPGTT_PD_ENTRIES;
+	ppgtt->clear_range = gen6_ppgtt_clear_range;
+	ppgtt->insert_entries = gen6_ppgtt_insert_entries;
 	ppgtt->pt_pages = kzalloc(sizeof(struct page *)*ppgtt->num_pd_entries,
 				  GFP_KERNEL);
 	if (!ppgtt->pt_pages)
@@ -159,8 +205,8 @@ int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
 
 	ppgtt->scratch_page_dma_addr = dev_priv->gtt.scratch_page_dma;
 
-	i915_ppgtt_clear_range(ppgtt, 0,
-			       ppgtt->num_pd_entries*I915_PPGTT_PT_ENTRIES);
+	ppgtt->clear_range(ppgtt, 0,
+			   ppgtt->num_pd_entries*I915_PPGTT_PT_ENTRIES);
 
 	ppgtt->pd_offset = (first_pd_entry_in_global_pt)*sizeof(gtt_pte_t);
 
@@ -209,66 +255,21 @@ void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
 	kfree(ppgtt);
 }
 
-static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt,
-					 const struct sg_table *pages,
-					 unsigned first_entry,
-					 enum i915_cache_level cache_level)
-{
-	gtt_pte_t *pt_vaddr;
-	unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
-	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
-	unsigned i, j, m, segment_len;
-	dma_addr_t page_addr;
-	struct scatterlist *sg;
-
-	/* init sg walking */
-	sg = pages->sgl;
-	i = 0;
-	segment_len = sg_dma_len(sg) >> PAGE_SHIFT;
-	m = 0;
-
-	while (i < pages->nents) {
-		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]);
-
-		for (j = first_pte; j < I915_PPGTT_PT_ENTRIES; j++) {
-			page_addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
-			pt_vaddr[j] = pte_encode(ppgtt->dev, page_addr,
-						 cache_level);
-
-			/* grab the next page */
-			if (++m == segment_len) {
-				if (++i == pages->nents)
-					break;
-
-				sg = sg_next(sg);
-				segment_len = sg_dma_len(sg) >> PAGE_SHIFT;
-				m = 0;
-			}
-		}
-
-		kunmap_atomic(pt_vaddr);
-
-		first_pte = 0;
-		act_pd++;
-	}
-}
-
 void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
 			    struct drm_i915_gem_object *obj,
 			    enum i915_cache_level cache_level)
 {
-	i915_ppgtt_insert_sg_entries(ppgtt,
-				     obj->pages,
-				     obj->gtt_space->start >> PAGE_SHIFT,
-				     cache_level);
+	ppgtt->insert_entries(ppgtt, obj->pages,
+			      obj->gtt_space->start >> PAGE_SHIFT,
+			      cache_level);
 }
 
 void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
 			      struct drm_i915_gem_object *obj)
 {
-	i915_ppgtt_clear_range(ppgtt,
-			       obj->gtt_space->start >> PAGE_SHIFT,
-			       obj->base.size >> PAGE_SHIFT);
+	ppgtt->clear_range(ppgtt,
+			   obj->gtt_space->start >> PAGE_SHIFT,
+			   obj->base.size >> PAGE_SHIFT);
 }
 
 void i915_gem_init_ppgtt(struct drm_device *dev)
-- 
1.7.11.7

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 3/4] drm/i915: pte_encode is gen6+
  2013-01-24 15:50 [PATCH 0/4] gtt abstractions Daniel Vetter
  2013-01-24 15:50 ` [PATCH 1/4] drm/i915: vfuncs for gtt_clear_range/insert_entries Daniel Vetter
  2013-01-24 15:50 ` [PATCH 2/4] drm/i915: vfuncs for ppgtt Daniel Vetter
@ 2013-01-24 15:50 ` Daniel Vetter
  2013-01-24 15:50 ` [PATCH 4/4] drm/i915: extract hw ppgtt setup/cleanup code Daniel Vetter
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 26+ messages in thread
From: Daniel Vetter @ 2013-01-24 15:50 UTC (permalink / raw)
  To: Intel Graphics Development; +Cc: Daniel Vetter

All the other gen6+ hw code has the gen6_ prefix, so be consistent
about it.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 250d150..5ee6bcc 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -44,9 +44,9 @@ typedef uint32_t gtt_pte_t;
 #define GEN6_PTE_CACHE_LLC_MLC		(3 << 1)
 #define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
 
-static inline gtt_pte_t pte_encode(struct drm_device *dev,
-				   dma_addr_t addr,
-				   enum i915_cache_level level)
+static inline gtt_pte_t gen6_pte_encode(struct drm_device *dev,
+					dma_addr_t addr,
+					enum i915_cache_level level)
 {
 	gtt_pte_t pte = GEN6_PTE_VALID;
 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
@@ -87,8 +87,9 @@ static void gen6_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
 	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
 	unsigned last_pte, i;
 
-	scratch_pte = pte_encode(ppgtt->dev, ppgtt->scratch_page_dma_addr,
-				 I915_CACHE_LLC);
+	scratch_pte = gen6_pte_encode(ppgtt->dev,
+				      ppgtt->scratch_page_dma_addr,
+				      I915_CACHE_LLC);
 
 	while (num_entries) {
 		last_pte = first_pte + num_entries;
@@ -131,8 +132,8 @@ static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt,
 
 		for (j = first_pte; j < I915_PPGTT_PT_ENTRIES; j++) {
 			page_addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
-			pt_vaddr[j] = pte_encode(ppgtt->dev, page_addr,
-						 cache_level);
+			pt_vaddr[j] = gen6_pte_encode(ppgtt->dev, page_addr,
+						      cache_level);
 
 			/* grab the next page */
 			if (++m == segment_len) {
@@ -421,7 +422,8 @@ static void gen6_ggtt_insert_entries(struct drm_device *dev,
 		len = sg_dma_len(sg) >> PAGE_SHIFT;
 		for (m = 0; m < len; m++) {
 			addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
-			iowrite32(pte_encode(dev, addr, level), &gtt_entries[i]);
+			iowrite32(gen6_pte_encode(dev, addr, level),
+				  &gtt_entries[i]);
 			i++;
 		}
 	}
@@ -433,7 +435,8 @@ static void gen6_ggtt_insert_entries(struct drm_device *dev,
 	 * hardware should work, we must keep this posting read for paranoia.
 	 */
 	if (i != 0)
-		WARN_ON(readl(&gtt_entries[i-1]) != pte_encode(dev, addr, level));
+		WARN_ON(readl(&gtt_entries[i-1])
+			!= gen6_pte_encode(dev, addr, level));
 
 	/* This next bit makes the above posting read even more important. We
 	 * want to flush the TLBs only after we're certain all the PTE updates
@@ -458,7 +461,8 @@ static void gen6_ggtt_clear_range(struct drm_device *dev,
 		 first_entry, num_entries, max_entries))
 		num_entries = max_entries;
 
-	scratch_pte = pte_encode(dev, dev_priv->gtt.scratch_page_dma, I915_CACHE_LLC);
+	scratch_pte = gen6_pte_encode(dev, dev_priv->gtt.scratch_page_dma,
+				      I915_CACHE_LLC);
 	for (i = 0; i < num_entries; i++)
 		iowrite32(scratch_pte, &gtt_base[i]);
 	readl(gtt_base);
-- 
1.7.11.7

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 4/4] drm/i915: extract hw ppgtt setup/cleanup code
  2013-01-24 15:50 [PATCH 0/4] gtt abstractions Daniel Vetter
                   ` (2 preceding siblings ...)
  2013-01-24 15:50 ` [PATCH 3/4] drm/i915: pte_encode is gen6+ Daniel Vetter
@ 2013-01-24 15:50 ` Daniel Vetter
  2013-01-24 19:41 ` [PATCH 0/4] gtt abstractions Ben Widawsky
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 26+ messages in thread
From: Daniel Vetter @ 2013-01-24 15:50 UTC (permalink / raw)
  To: Intel Graphics Development; +Cc: Daniel Vetter

At the moment only cosmetics, but being able to initialize/cleanup
arbitrary ppgtt address spaces paves the way to have more than one of
them ... Just in case we ever get around to implementing real
per-process address spaces.

Note that in that case another vfunc for ppgtt initialization would be
beneficial though. But that can wait until the code grows a second
place which initializes ppgtts. Anyway, with this patch the ppgtt code
is now nicely separate from anything about the aliasing ppgtt code, so
we only need to add tracking for additional address-spaces with
drm_mm. And a bit of smarts to execbuf to ensure buffer objects are
mapped into the right ppgtt space.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h     |  1 +
 drivers/gpu/drm/i915/i915_gem_gtt.c | 68 ++++++++++++++++++++++++-------------
 2 files changed, 45 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4316ede..42aa0e0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -423,6 +423,7 @@ struct i915_hw_ppgtt {
 			       struct sg_table *st,
 			       unsigned int pg_start,
 			       enum i915_cache_level cache_level);
+	void (*cleanup)(struct i915_hw_ppgtt *ppgtt);
 };
 
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 5ee6bcc..faff274 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -153,10 +153,28 @@ static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt,
 	}
 }
 
-static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
+static void gen6_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt)
 {
+	int i;
+
+	if (ppgtt->pt_dma_addr) {
+		for (i = 0; i < ppgtt->num_pd_entries; i++)
+			pci_unmap_page(ppgtt->dev->pdev,
+				       ppgtt->pt_dma_addr[i],
+				       4096, PCI_DMA_BIDIRECTIONAL);
+	}
+
+	kfree(ppgtt->pt_dma_addr);
+	for (i = 0; i < ppgtt->num_pd_entries; i++)
+		__free_page(ppgtt->pt_pages[i]);
+	kfree(ppgtt->pt_pages);
+	kfree(ppgtt);
+}
+
+static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
+{
+	struct drm_device *dev = ppgtt->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct i915_hw_ppgtt *ppgtt;
 	unsigned first_pd_entry_in_global_pt;
 	int i;
 	int ret = -ENOMEM;
@@ -166,18 +184,14 @@ static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
 	 * now. */
 	first_pd_entry_in_global_pt = dev_priv->mm.gtt->gtt_total_entries - I915_PPGTT_PD_ENTRIES;
 
-	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
-	if (!ppgtt)
-		return ret;
-
-	ppgtt->dev = dev;
 	ppgtt->num_pd_entries = I915_PPGTT_PD_ENTRIES;
 	ppgtt->clear_range = gen6_ppgtt_clear_range;
 	ppgtt->insert_entries = gen6_ppgtt_insert_entries;
+	ppgtt->cleanup = gen6_ppgtt_cleanup;
 	ppgtt->pt_pages = kzalloc(sizeof(struct page *)*ppgtt->num_pd_entries,
 				  GFP_KERNEL);
 	if (!ppgtt->pt_pages)
-		goto err_ppgtt;
+		return -ENOMEM;
 
 	for (i = 0; i < ppgtt->num_pd_entries; i++) {
 		ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL);
@@ -211,8 +225,6 @@ static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
 
 	ppgtt->pd_offset = (first_pd_entry_in_global_pt)*sizeof(gtt_pte_t);
 
-	dev_priv->mm.aliasing_ppgtt = ppgtt;
-
 	return 0;
 
 err_pd_pin:
@@ -228,8 +240,27 @@ err_pt_alloc:
 			__free_page(ppgtt->pt_pages[i]);
 	}
 	kfree(ppgtt->pt_pages);
-err_ppgtt:
-	kfree(ppgtt);
+
+	return ret;
+}
+
+static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_hw_ppgtt *ppgtt;
+	int ret;
+
+	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
+	if (!ppgtt)
+		return -ENOMEM;
+
+	ppgtt->dev = dev;
+
+	ret = gen6_ppgtt_init(ppgtt);
+	if (ret)
+		kfree(ppgtt);
+	else
+		dev_priv->mm.aliasing_ppgtt = ppgtt;
 
 	return ret;
 }
@@ -238,22 +269,11 @@ void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
-	int i;
 
 	if (!ppgtt)
 		return;
 
-	if (ppgtt->pt_dma_addr) {
-		for (i = 0; i < ppgtt->num_pd_entries; i++)
-			pci_unmap_page(dev->pdev, ppgtt->pt_dma_addr[i],
-				       4096, PCI_DMA_BIDIRECTIONAL);
-	}
-
-	kfree(ppgtt->pt_dma_addr);
-	for (i = 0; i < ppgtt->num_pd_entries; i++)
-		__free_page(ppgtt->pt_pages[i]);
-	kfree(ppgtt->pt_pages);
-	kfree(ppgtt);
+	ppgtt->cleanup(ppgtt);
 }
 
 void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
-- 
1.7.11.7

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH 0/4] gtt abstractions
  2013-01-24 15:50 [PATCH 0/4] gtt abstractions Daniel Vetter
                   ` (3 preceding siblings ...)
  2013-01-24 15:50 ` [PATCH 4/4] drm/i915: extract hw ppgtt setup/cleanup code Daniel Vetter
@ 2013-01-24 19:41 ` Ben Widawsky
  2013-01-24 21:49 ` [PATCH 1/6] drm/i915: vfuncs for gtt_clear_range/insert_entries Ben Widawsky
  2013-01-24 22:44 ` [PATCH 1/6] drm/i915: vfuncs for gtt_clear_range/insert_entries Ben Widawsky
  6 siblings, 0 replies; 26+ messages in thread
From: Ben Widawsky @ 2013-01-24 19:41 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Intel Graphics Development

On Thu, 24 Jan 2013 16:50:11 +0100
Daniel Vetter <daniel.vetter@ffwll.ch> wrote:

> Hi all,
> 
> Whith Ben wrestling the gtt code and trying to finally wear us off the intel-gtt
> fix I've figured I might as well drop my idea for how we could better organize
> the pte writing/clearing.
> 
> With these patches there's no a clean cut between the code which manages the
> address spaces and the code which writes (global/pp) gtt ptes. Which should
> nicely pave the way for cool things to happen, like real per-process address
> spaces.
> 
> Comments highly welcome.

Patches 1, and 2 from my original RFC rebase on top of this pretty
nicely. I'd like to keep those, as it completes ripping out the.

Also, given what I know about the future, I still like the (unposted)
RFC I was working on as I think it makes everything cleaner - but you
weild the power, and I just want to get real work done.

I'll post the rebased 2 patches to this series in a few minutes.

> 
> Cheers, Daniel
> 
> Daniel Vetter (4):
>   drm/i915: vfuncs for gtt_clear_range/insert_entries
>   drm/i915: vfuncs for ppgtt
>   drm/i915: pte_encode is gen6+
>   drm/i915: extract hw ppgtt setup/cleanup code
> 
>  drivers/gpu/drm/i915/i915_drv.h     |  32 +++-
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 310 ++++++++++++++++++++----------------
>  2 files changed, 200 insertions(+), 142 deletions(-)
> 


-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 1/6] drm/i915: vfuncs for gtt_clear_range/insert_entries
  2013-01-24 15:50 [PATCH 0/4] gtt abstractions Daniel Vetter
                   ` (4 preceding siblings ...)
  2013-01-24 19:41 ` [PATCH 0/4] gtt abstractions Ben Widawsky
@ 2013-01-24 21:49 ` Ben Widawsky
  2013-01-24 21:49   ` [PATCH 2/6] drm/i915: vfuncs for ppgtt Ben Widawsky
                     ` (4 more replies)
  2013-01-24 22:44 ` [PATCH 1/6] drm/i915: vfuncs for gtt_clear_range/insert_entries Ben Widawsky
  6 siblings, 5 replies; 26+ messages in thread
From: Ben Widawsky @ 2013-01-24 21:49 UTC (permalink / raw)
  To: Intel, GFX <intel-gfx; +Cc: Daniel Vetter, Ben Widawsky

From: Daniel Vetter <daniel.vetter@ffwll.ch>

We have a few too many differences here, so finally take the prepared
abstraction and run with it. A few smaller changes are required to get
things into shape:

- move i915_cache_level up since we need it in the gt funcs
- split up i915_ggtt_clear_range and move the two functions down to
  where the relevant insert_entries functions are
- adjustments to a few function parameter lists

Now we have 2 functions which deal with the gen6+ global gtt
(gen6_ggtt_ prefix) and 2 functions which deal with the legacy gtt
code in the intel-gtt.c fake agp driver (i915_ggtt_ prefix).

Init is still a bit a mess, but honestly I don't care about that.

One thing I've thought about while deciding on the exact interfaces is
a flag parameter for ->clear_range: We could use that to decide
between writing invalid pte entries or scratch pte entries. In case we
ever get around to fixing all our bugs which currently prevent us from
filling the gtt with empty ptes for the truly unused ranges ...

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>

[bwidawsk: Moved functions to the gtt struct]
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h     |  21 +++++--
 drivers/gpu/drm/i915/i915_gem_gtt.c | 121 ++++++++++++++++++++----------------
 2 files changed, 83 insertions(+), 59 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0a798ee..160c269 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -367,6 +367,12 @@ struct intel_device_info {
 	u8 has_llc:1;
 };
 
+enum i915_cache_level {
+	I915_CACHE_NONE = 0,
+	I915_CACHE_LLC,
+	I915_CACHE_LLC_MLC, /* gen6+, in docs at least! */
+};
+
 /* The Graphics Translation Table is the way in which GEN hardware translates a
  * Graphics Virtual Address into a Physical Address. In addition to the normal
  * collateral associated with any va->pa translations GEN hardware also has a
@@ -388,6 +394,15 @@ struct i915_gtt {
 	bool do_idle_maps;
 	dma_addr_t scratch_page_dma;
 	struct page *scratch_page;
+
+	/* global gtt ops */
+	void (*gtt_clear_range)(struct drm_device *dev,
+				unsigned int first_entry,
+				unsigned int num_entries);
+	void (*gtt_insert_entries)(struct drm_device *dev,
+				   struct sg_table *st,
+				   unsigned int pg_start,
+				   enum i915_cache_level cache_level);
 };
 
 #define I915_PPGTT_PD_ENTRIES 512
@@ -1025,12 +1040,6 @@ enum hdmi_force_audio {
 	HDMI_AUDIO_ON,			/* force turn on HDMI audio */
 };
 
-enum i915_cache_level {
-	I915_CACHE_NONE = 0,
-	I915_CACHE_LLC,
-	I915_CACHE_LLC_MLC, /* gen6+, in docs at least! */
-};
-
 #define I915_GTT_RESERVED ((struct drm_mm_node *)0x1)
 
 struct drm_i915_gem_object_ops {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index a0ba4a9..4712626 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -367,40 +367,14 @@ static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
 		dev_priv->mm.interruptible = interruptible;
 }
 
-static void i915_ggtt_clear_range(struct drm_device *dev,
-				 unsigned first_entry,
-				 unsigned num_entries)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	gtt_pte_t scratch_pte;
-	gtt_pte_t __iomem *gtt_base = (gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
-	const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry;
-	int i;
-
-	if (INTEL_INFO(dev)->gen < 6) {
-		intel_gtt_clear_range(first_entry, num_entries);
-		return;
-	}
-
-	if (WARN(num_entries > max_entries,
-		 "First entry = %d; Num entries = %d (max=%d)\n",
-		 first_entry, num_entries, max_entries))
-		num_entries = max_entries;
-
-	scratch_pte = pte_encode(dev, dev_priv->gtt.scratch_page_dma, I915_CACHE_LLC);
-	for (i = 0; i < num_entries; i++)
-		iowrite32(scratch_pte, &gtt_base[i]);
-	readl(gtt_base);
-}
-
 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj;
 
 	/* First fill our portion of the GTT with scratch pages */
-	i915_ggtt_clear_range(dev, dev_priv->gtt.start / PAGE_SIZE,
-			      dev_priv->gtt.total / PAGE_SIZE);
+	dev_priv->gtt.gtt_clear_range(dev, dev_priv->gtt.start / PAGE_SIZE,
+				      dev_priv->gtt.total / PAGE_SIZE);
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) {
 		i915_gem_clflush_object(obj);
@@ -429,15 +403,13 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
  * within the global GTT as well as accessible by the GPU through the GMADR
  * mapped BAR (dev_priv->mm.gtt->gtt).
  */
-static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj,
-				  enum i915_cache_level level)
+static void gen6_ggtt_insert_entries(struct drm_device *dev,
+				     struct sg_table *st,
+				     unsigned int first_entry,
+				     enum i915_cache_level level)
 {
-	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct sg_table *st = obj->pages;
 	struct scatterlist *sg = st->sgl;
-	const int first_entry = obj->gtt_space->start >> PAGE_SHIFT;
-	const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry;
 	gtt_pte_t __iomem *gtt_entries =
 		(gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
 	int unused, i = 0;
@@ -453,9 +425,6 @@ static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj,
 		}
 	}
 
-	BUG_ON(i > max_entries);
-	BUG_ON(i != obj->base.size / PAGE_SIZE);
-
 	/* XXX: This serves as a posting read to make sure that the PTE has
 	 * actually been updated. There is some concern that even though
 	 * registers and PTEs are within the same BAR that they are potentially
@@ -473,28 +442,69 @@ static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj,
 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
 }
 
+static void gen6_ggtt_clear_range(struct drm_device *dev,
+				  unsigned int first_entry,
+				  unsigned int num_entries)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	gtt_pte_t scratch_pte;
+	gtt_pte_t __iomem *gtt_base = (gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
+	const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry;
+	int i;
+
+	if (WARN(num_entries > max_entries,
+		 "First entry = %d; Num entries = %d (max=%d)\n",
+		 first_entry, num_entries, max_entries))
+		num_entries = max_entries;
+
+	scratch_pte = pte_encode(dev, dev_priv->gtt.scratch_page_dma, I915_CACHE_LLC);
+	for (i = 0; i < num_entries; i++)
+		iowrite32(scratch_pte, &gtt_base[i]);
+	readl(gtt_base);
+}
+
+
+static void i915_ggtt_insert_entries(struct drm_device *dev,
+				     struct sg_table *st,
+				     unsigned int pg_start,
+				     enum i915_cache_level cache_level)
+{
+	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
+
+	intel_gtt_insert_sg_entries(st, pg_start, flags);
+
+}
+
+static void i915_ggtt_clear_range(struct drm_device *dev,
+				  unsigned int first_entry,
+				  unsigned int num_entries)
+{
+	intel_gtt_clear_range(first_entry, num_entries);
+}
+
+
 void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
 			      enum i915_cache_level cache_level)
 {
 	struct drm_device *dev = obj->base.dev;
-	if (INTEL_INFO(dev)->gen < 6) {
-		unsigned int flags = (cache_level == I915_CACHE_NONE) ?
-			AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
-		intel_gtt_insert_sg_entries(obj->pages,
-					    obj->gtt_space->start >> PAGE_SHIFT,
-					    flags);
-	} else {
-		gen6_ggtt_bind_object(obj, cache_level);
-	}
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	dev_priv->gtt.gtt_insert_entries(dev, obj->pages,
+					 obj->gtt_space->start >> PAGE_SHIFT,
+					 cache_level);
 
 	obj->has_global_gtt_mapping = 1;
 }
 
 void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj)
 {
-	i915_ggtt_clear_range(obj->base.dev,
-			      obj->gtt_space->start >> PAGE_SHIFT,
-			      obj->base.size >> PAGE_SHIFT);
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	dev_priv->gtt.gtt_clear_range(obj->base.dev,
+				      obj->gtt_space->start >> PAGE_SHIFT,
+				      obj->base.size >> PAGE_SHIFT);
 
 	obj->has_global_gtt_mapping = 0;
 }
@@ -570,13 +580,12 @@ void i915_gem_setup_global_gtt(struct drm_device *dev,
 			     hole_start, hole_end) {
 		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
 			      hole_start, hole_end);
-		i915_ggtt_clear_range(dev,
-				      hole_start / PAGE_SIZE,
-				      (hole_end-hole_start) / PAGE_SIZE);
+		dev_priv->gtt.gtt_clear_range(dev, hole_start / PAGE_SIZE,
+					      (hole_end-hole_start) / PAGE_SIZE);
 	}
 
 	/* And finally clear the reserved guard page */
-	i915_ggtt_clear_range(dev, end / PAGE_SIZE - 1, 1);
+	dev_priv->gtt.gtt_clear_range(dev, end / PAGE_SIZE - 1, 1);
 }
 
 static bool
@@ -718,6 +727,9 @@ int i915_gem_gtt_init(struct drm_device *dev)
 
 		dev_priv->gtt.do_idle_maps = needs_idle_maps(dev);
 
+		dev_priv->gtt.gtt_clear_range = i915_ggtt_clear_range;
+		dev_priv->gtt.gtt_insert_entries = i915_ggtt_insert_entries;
+
 		return 0;
 	}
 
@@ -771,6 +783,9 @@ int i915_gem_gtt_init(struct drm_device *dev)
 	DRM_DEBUG_DRIVER("GMADR size = %ldM\n", dev_priv->gtt.mappable_end >> 20);
 	DRM_DEBUG_DRIVER("GTT stolen size = %dM\n", dev_priv->mm.gtt->stolen_size >> 20);
 
+	dev_priv->gtt.gtt_clear_range = gen6_ggtt_clear_range;
+	dev_priv->gtt.gtt_insert_entries = gen6_ggtt_insert_entries;
+
 	return 0;
 
 err_out:
-- 
1.8.1.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 2/6] drm/i915: vfuncs for ppgtt
  2013-01-24 21:49 ` [PATCH 1/6] drm/i915: vfuncs for gtt_clear_range/insert_entries Ben Widawsky
@ 2013-01-24 21:49   ` Ben Widawsky
  2013-01-24 21:49   ` [PATCH 3/6] drm/i915: pte_encode is gen6+ Ben Widawsky
                     ` (3 subsequent siblings)
  4 siblings, 0 replies; 26+ messages in thread
From: Ben Widawsky @ 2013-01-24 21:49 UTC (permalink / raw)
  To: Intel, GFX <intel-gfx; +Cc: Daniel Vetter

From: Daniel Vetter <daniel.vetter@ffwll.ch>

Like for the global gtt we want a notch more flexibility here. Only
big change (besides a few tiny function parameter adjustments) was to
move gen6_ppgtt_insert_entries up (and remove _sg_ from its name, we
only have one kind of insert_entries since the last gtt cleanup).

We could also extract the platform ppgtt setup/teardown code a bit
better, but I don't care that much.

With this we have the hw details of pte writing nicely hidden away
behind a bit of abstraction. Which should pave the way for
different/multiple ppgtts (e.g. what we need for real ppgtt support).

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h     |  10 +++-
 drivers/gpu/drm/i915/i915_gem_gtt.c | 111 ++++++++++++++++++------------------
 2 files changed, 65 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 160c269..4d51c4e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -414,6 +414,15 @@ struct i915_hw_ppgtt {
 	uint32_t pd_offset;
 	dma_addr_t *pt_dma_addr;
 	dma_addr_t scratch_page_dma_addr;
+
+	/* pte functions, mirroring the interface of the global gtt. */
+	void (*clear_range)(struct i915_hw_ppgtt *ppgtt,
+			    unsigned int first_entry,
+			    unsigned int num_entries);
+	void (*insert_entries)(struct i915_hw_ppgtt *ppgtt,
+			       struct sg_table *st,
+			       unsigned int pg_start,
+			       enum i915_cache_level cache_level);
 };
 
 
@@ -1664,7 +1673,6 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 				   struct drm_file *file);
 
 /* i915_gem_gtt.c */
-int __must_check i915_gem_init_aliasing_ppgtt(struct drm_device *dev);
 void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev);
 void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
 			    struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 4712626..f63dbc7 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -77,7 +77,7 @@ static inline gtt_pte_t pte_encode(struct drm_device *dev,
 }
 
 /* PPGTT support for Sandybdrige/Gen6 and later */
-static void i915_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
+static void gen6_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
 				   unsigned first_entry,
 				   unsigned num_entries)
 {
@@ -108,7 +108,51 @@ static void i915_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
 	}
 }
 
-int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
+static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt,
+				      struct sg_table *pages,
+				      unsigned first_entry,
+				      enum i915_cache_level cache_level)
+{
+	gtt_pte_t *pt_vaddr;
+	unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
+	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
+	unsigned i, j, m, segment_len;
+	dma_addr_t page_addr;
+	struct scatterlist *sg;
+
+	/* init sg walking */
+	sg = pages->sgl;
+	i = 0;
+	segment_len = sg_dma_len(sg) >> PAGE_SHIFT;
+	m = 0;
+
+	while (i < pages->nents) {
+		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]);
+
+		for (j = first_pte; j < I915_PPGTT_PT_ENTRIES; j++) {
+			page_addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
+			pt_vaddr[j] = pte_encode(ppgtt->dev, page_addr,
+						 cache_level);
+
+			/* grab the next page */
+			if (++m == segment_len) {
+				if (++i == pages->nents)
+					break;
+
+				sg = sg_next(sg);
+				segment_len = sg_dma_len(sg) >> PAGE_SHIFT;
+				m = 0;
+			}
+		}
+
+		kunmap_atomic(pt_vaddr);
+
+		first_pte = 0;
+		act_pd++;
+	}
+}
+
+static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct i915_hw_ppgtt *ppgtt;
@@ -127,6 +171,8 @@ int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
 
 	ppgtt->dev = dev;
 	ppgtt->num_pd_entries = I915_PPGTT_PD_ENTRIES;
+	ppgtt->clear_range = gen6_ppgtt_clear_range;
+	ppgtt->insert_entries = gen6_ppgtt_insert_entries;
 	ppgtt->pt_pages = kzalloc(sizeof(struct page *)*ppgtt->num_pd_entries,
 				  GFP_KERNEL);
 	if (!ppgtt->pt_pages)
@@ -159,8 +205,8 @@ int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
 
 	ppgtt->scratch_page_dma_addr = dev_priv->gtt.scratch_page_dma;
 
-	i915_ppgtt_clear_range(ppgtt, 0,
-			       ppgtt->num_pd_entries*I915_PPGTT_PT_ENTRIES);
+	ppgtt->clear_range(ppgtt, 0,
+			   ppgtt->num_pd_entries*I915_PPGTT_PT_ENTRIES);
 
 	ppgtt->pd_offset = (first_pd_entry_in_global_pt)*sizeof(gtt_pte_t);
 
@@ -209,66 +255,21 @@ void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
 	kfree(ppgtt);
 }
 
-static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt,
-					 const struct sg_table *pages,
-					 unsigned first_entry,
-					 enum i915_cache_level cache_level)
-{
-	gtt_pte_t *pt_vaddr;
-	unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
-	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
-	unsigned i, j, m, segment_len;
-	dma_addr_t page_addr;
-	struct scatterlist *sg;
-
-	/* init sg walking */
-	sg = pages->sgl;
-	i = 0;
-	segment_len = sg_dma_len(sg) >> PAGE_SHIFT;
-	m = 0;
-
-	while (i < pages->nents) {
-		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]);
-
-		for (j = first_pte; j < I915_PPGTT_PT_ENTRIES; j++) {
-			page_addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
-			pt_vaddr[j] = pte_encode(ppgtt->dev, page_addr,
-						 cache_level);
-
-			/* grab the next page */
-			if (++m == segment_len) {
-				if (++i == pages->nents)
-					break;
-
-				sg = sg_next(sg);
-				segment_len = sg_dma_len(sg) >> PAGE_SHIFT;
-				m = 0;
-			}
-		}
-
-		kunmap_atomic(pt_vaddr);
-
-		first_pte = 0;
-		act_pd++;
-	}
-}
-
 void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
 			    struct drm_i915_gem_object *obj,
 			    enum i915_cache_level cache_level)
 {
-	i915_ppgtt_insert_sg_entries(ppgtt,
-				     obj->pages,
-				     obj->gtt_space->start >> PAGE_SHIFT,
-				     cache_level);
+	ppgtt->insert_entries(ppgtt, obj->pages,
+			      obj->gtt_space->start >> PAGE_SHIFT,
+			      cache_level);
 }
 
 void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
 			      struct drm_i915_gem_object *obj)
 {
-	i915_ppgtt_clear_range(ppgtt,
-			       obj->gtt_space->start >> PAGE_SHIFT,
-			       obj->base.size >> PAGE_SHIFT);
+	ppgtt->clear_range(ppgtt,
+			   obj->gtt_space->start >> PAGE_SHIFT,
+			   obj->base.size >> PAGE_SHIFT);
 }
 
 void i915_gem_init_ppgtt(struct drm_device *dev)
-- 
1.8.1.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 3/6] drm/i915: pte_encode is gen6+
  2013-01-24 21:49 ` [PATCH 1/6] drm/i915: vfuncs for gtt_clear_range/insert_entries Ben Widawsky
  2013-01-24 21:49   ` [PATCH 2/6] drm/i915: vfuncs for ppgtt Ben Widawsky
@ 2013-01-24 21:49   ` Ben Widawsky
  2013-01-24 21:49   ` [PATCH 4/6] drm/i915: extract hw ppgtt setup/cleanup code Ben Widawsky
                     ` (2 subsequent siblings)
  4 siblings, 0 replies; 26+ messages in thread
From: Ben Widawsky @ 2013-01-24 21:49 UTC (permalink / raw)
  To: Intel, GFX <intel-gfx; +Cc: Daniel Vetter

From: Daniel Vetter <daniel.vetter@ffwll.ch>

All the other gen6+ hw code has the gen6_ prefix, so be consistent
about it.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index f63dbc7..d171982 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -44,9 +44,9 @@ typedef uint32_t gtt_pte_t;
 #define GEN6_PTE_CACHE_LLC_MLC		(3 << 1)
 #define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
 
-static inline gtt_pte_t pte_encode(struct drm_device *dev,
-				   dma_addr_t addr,
-				   enum i915_cache_level level)
+static inline gtt_pte_t gen6_pte_encode(struct drm_device *dev,
+					dma_addr_t addr,
+					enum i915_cache_level level)
 {
 	gtt_pte_t pte = GEN6_PTE_VALID;
 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
@@ -87,8 +87,9 @@ static void gen6_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
 	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
 	unsigned last_pte, i;
 
-	scratch_pte = pte_encode(ppgtt->dev, ppgtt->scratch_page_dma_addr,
-				 I915_CACHE_LLC);
+	scratch_pte = gen6_pte_encode(ppgtt->dev,
+				      ppgtt->scratch_page_dma_addr,
+				      I915_CACHE_LLC);
 
 	while (num_entries) {
 		last_pte = first_pte + num_entries;
@@ -131,8 +132,8 @@ static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt,
 
 		for (j = first_pte; j < I915_PPGTT_PT_ENTRIES; j++) {
 			page_addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
-			pt_vaddr[j] = pte_encode(ppgtt->dev, page_addr,
-						 cache_level);
+			pt_vaddr[j] = gen6_pte_encode(ppgtt->dev, page_addr,
+						      cache_level);
 
 			/* grab the next page */
 			if (++m == segment_len) {
@@ -421,7 +422,8 @@ static void gen6_ggtt_insert_entries(struct drm_device *dev,
 		len = sg_dma_len(sg) >> PAGE_SHIFT;
 		for (m = 0; m < len; m++) {
 			addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
-			iowrite32(pte_encode(dev, addr, level), &gtt_entries[i]);
+			iowrite32(gen6_pte_encode(dev, addr, level),
+				  &gtt_entries[i]);
 			i++;
 		}
 	}
@@ -433,7 +435,8 @@ static void gen6_ggtt_insert_entries(struct drm_device *dev,
 	 * hardware should work, we must keep this posting read for paranoia.
 	 */
 	if (i != 0)
-		WARN_ON(readl(&gtt_entries[i-1]) != pte_encode(dev, addr, level));
+		WARN_ON(readl(&gtt_entries[i-1])
+			!= gen6_pte_encode(dev, addr, level));
 
 	/* This next bit makes the above posting read even more important. We
 	 * want to flush the TLBs only after we're certain all the PTE updates
@@ -458,7 +461,8 @@ static void gen6_ggtt_clear_range(struct drm_device *dev,
 		 first_entry, num_entries, max_entries))
 		num_entries = max_entries;
 
-	scratch_pte = pte_encode(dev, dev_priv->gtt.scratch_page_dma, I915_CACHE_LLC);
+	scratch_pte = gen6_pte_encode(dev, dev_priv->gtt.scratch_page_dma,
+				      I915_CACHE_LLC);
 	for (i = 0; i < num_entries; i++)
 		iowrite32(scratch_pte, &gtt_base[i]);
 	readl(gtt_base);
-- 
1.8.1.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 4/6] drm/i915: extract hw ppgtt setup/cleanup code
  2013-01-24 21:49 ` [PATCH 1/6] drm/i915: vfuncs for gtt_clear_range/insert_entries Ben Widawsky
  2013-01-24 21:49   ` [PATCH 2/6] drm/i915: vfuncs for ppgtt Ben Widawsky
  2013-01-24 21:49   ` [PATCH 3/6] drm/i915: pte_encode is gen6+ Ben Widawsky
@ 2013-01-24 21:49   ` Ben Widawsky
  2013-01-29  7:58     ` Damien Lespiau
  2013-01-24 21:49   ` [PATCH 5/6] drm/i915: Add probe and remove to the gtt ops Ben Widawsky
  2013-01-24 21:49   ` [PATCH 6/6] drm/i915: Resume dissecting intel_gtt Ben Widawsky
  4 siblings, 1 reply; 26+ messages in thread
From: Ben Widawsky @ 2013-01-24 21:49 UTC (permalink / raw)
  To: Intel, GFX <intel-gfx; +Cc: Daniel Vetter

From: Daniel Vetter <daniel.vetter@ffwll.ch>

At the moment only cosmetics, but being able to initialize/cleanup
arbitrary ppgtt address spaces paves the way to have more than one of
them ... Just in case we ever get around to implementing real
per-process address spaces. Note that in that case another vfunc for
ppgtt would be beneficial though. But that can wait until the code
grows a second place which initializes ppgtts.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h     |  1 +
 drivers/gpu/drm/i915/i915_gem_gtt.c | 68 ++++++++++++++++++++++++-------------
 2 files changed, 45 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4d51c4e..953ea3a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -423,6 +423,7 @@ struct i915_hw_ppgtt {
 			       struct sg_table *st,
 			       unsigned int pg_start,
 			       enum i915_cache_level cache_level);
+	void (*cleanup)(struct i915_hw_ppgtt *ppgtt);
 };
 
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index d171982..5502067 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -153,10 +153,28 @@ static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt,
 	}
 }
 
-static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
+static void gen6_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt)
 {
+	int i;
+
+	if (ppgtt->pt_dma_addr) {
+		for (i = 0; i < ppgtt->num_pd_entries; i++)
+			pci_unmap_page(ppgtt->dev->pdev,
+				       ppgtt->pt_dma_addr[i],
+				       4096, PCI_DMA_BIDIRECTIONAL);
+	}
+
+	kfree(ppgtt->pt_dma_addr);
+	for (i = 0; i < ppgtt->num_pd_entries; i++)
+		__free_page(ppgtt->pt_pages[i]);
+	kfree(ppgtt->pt_pages);
+	kfree(ppgtt);
+}
+
+static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
+{
+	struct drm_device *dev = ppgtt->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct i915_hw_ppgtt *ppgtt;
 	unsigned first_pd_entry_in_global_pt;
 	int i;
 	int ret = -ENOMEM;
@@ -166,18 +184,14 @@ static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
 	 * now. */
 	first_pd_entry_in_global_pt = dev_priv->mm.gtt->gtt_total_entries - I915_PPGTT_PD_ENTRIES;
 
-	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
-	if (!ppgtt)
-		return ret;
-
-	ppgtt->dev = dev;
 	ppgtt->num_pd_entries = I915_PPGTT_PD_ENTRIES;
 	ppgtt->clear_range = gen6_ppgtt_clear_range;
 	ppgtt->insert_entries = gen6_ppgtt_insert_entries;
+	ppgtt->cleanup = gen6_ppgtt_cleanup;
 	ppgtt->pt_pages = kzalloc(sizeof(struct page *)*ppgtt->num_pd_entries,
 				  GFP_KERNEL);
 	if (!ppgtt->pt_pages)
-		goto err_ppgtt;
+		return -ENOMEM;
 
 	for (i = 0; i < ppgtt->num_pd_entries; i++) {
 		ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL);
@@ -211,8 +225,6 @@ static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
 
 	ppgtt->pd_offset = (first_pd_entry_in_global_pt)*sizeof(gtt_pte_t);
 
-	dev_priv->mm.aliasing_ppgtt = ppgtt;
-
 	return 0;
 
 err_pd_pin:
@@ -228,8 +240,27 @@ err_pt_alloc:
 			__free_page(ppgtt->pt_pages[i]);
 	}
 	kfree(ppgtt->pt_pages);
-err_ppgtt:
-	kfree(ppgtt);
+
+	return ret;
+}
+
+static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_hw_ppgtt *ppgtt;
+	int ret;
+
+	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
+	if (!ppgtt)
+		return -ENOMEM;
+
+	ppgtt->dev = dev;
+
+	ret = gen6_ppgtt_init(ppgtt);
+	if (ret)
+		kfree(ppgtt);
+	else
+		dev_priv->mm.aliasing_ppgtt = ppgtt;
 
 	return ret;
 }
@@ -238,22 +269,11 @@ void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
-	int i;
 
 	if (!ppgtt)
 		return;
 
-	if (ppgtt->pt_dma_addr) {
-		for (i = 0; i < ppgtt->num_pd_entries; i++)
-			pci_unmap_page(dev->pdev, ppgtt->pt_dma_addr[i],
-				       4096, PCI_DMA_BIDIRECTIONAL);
-	}
-
-	kfree(ppgtt->pt_dma_addr);
-	for (i = 0; i < ppgtt->num_pd_entries; i++)
-		__free_page(ppgtt->pt_pages[i]);
-	kfree(ppgtt->pt_pages);
-	kfree(ppgtt);
+	ppgtt->cleanup(ppgtt);
 }
 
 void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
-- 
1.8.1.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 5/6] drm/i915: Add probe and remove to the gtt ops
  2013-01-24 21:49 ` [PATCH 1/6] drm/i915: vfuncs for gtt_clear_range/insert_entries Ben Widawsky
                     ` (2 preceding siblings ...)
  2013-01-24 21:49   ` [PATCH 4/6] drm/i915: extract hw ppgtt setup/cleanup code Ben Widawsky
@ 2013-01-24 21:49   ` Ben Widawsky
  2013-01-29  8:30     ` Damien Lespiau
  2013-01-24 21:49   ` [PATCH 6/6] drm/i915: Resume dissecting intel_gtt Ben Widawsky
  4 siblings, 1 reply; 26+ messages in thread
From: Ben Widawsky @ 2013-01-24 21:49 UTC (permalink / raw)
  To: Intel, GFX <intel-gfx; +Cc: Daniel Vetter, Ben Widawsky

The idea, and much of the code came originally from:

commit 0712f0249c3148d8cf42a3703403c278590d4de5
Author: Ben Widawsky <ben@bwidawsk.net>
Date:   Fri Jan 18 17:23:16 2013 -0800

    drm/i915: Create a vtable for i915 gtt

Daniel didn't like the color of that patch series, and so I asked him to
start something which appealed to his sense of color. The preceding
patches are those, and now this is going on top of that.

[extracted from the original commit message]

One immediately obvious thing to implement is our gmch probing. The init
function was getting massively bloated. Fundamentally, all that's needed
from GMCH probing is the GTT size, and the stolen size. It makes design
sense to put the mappable calculation in there as well, but the code
turns out a bit nicer without it (IMO)

The intel_gtt bridge thing is still here, but the subsequent patches
will finish ripping that out.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_dma.c     |   2 +-
 drivers/gpu/drm/i915/i915_drv.h     |   5 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c | 182 +++++++++++++++++++++---------------
 3 files changed, 110 insertions(+), 79 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 11c7aa8..cf06103 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1665,7 +1665,7 @@ out_mtrrfree:
 out_rmmap:
 	pci_iounmap(dev->pdev, dev_priv->regs);
 put_gmch:
-	i915_gem_gtt_fini(dev);
+	dev_priv->gtt.gtt_remove(dev);
 put_bridge:
 	pci_dev_put(dev_priv->bridge_dev);
 free_priv:
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 953ea3a..dbf676d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -383,6 +383,7 @@ enum i915_cache_level {
 struct i915_gtt {
 	unsigned long start;		/* Start offset of used GTT */
 	size_t total;			/* Total size GTT can map */
+	size_t stolen_size;		/* Total size of stolen memory */
 
 	unsigned long mappable_end;	/* End offset that we can CPU map */
 	struct io_mapping *mappable;	/* Mapping to our CPU mappable region */
@@ -396,6 +397,9 @@ struct i915_gtt {
 	struct page *scratch_page;
 
 	/* global gtt ops */
+	int (*gtt_probe)(struct drm_device *dev, size_t *gtt_total,
+			  size_t *stolen);
+	void (*gtt_remove)(struct drm_device *dev);
 	void (*gtt_clear_range)(struct drm_device *dev,
 				unsigned int first_entry,
 				unsigned int num_entries);
@@ -1691,7 +1695,6 @@ void i915_gem_init_global_gtt(struct drm_device *dev);
 void i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start,
 			       unsigned long mappable_end, unsigned long end);
 int i915_gem_gtt_init(struct drm_device *dev);
-void i915_gem_gtt_fini(struct drm_device *dev);
 static inline void i915_gem_chipset_flush(struct drm_device *dev)
 {
 	if (INTEL_INFO(dev)->gen < 6)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 5502067..bfc405a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -707,14 +707,14 @@ static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
 	return snb_gmch_ctl << 20;
 }
 
-static inline unsigned int gen6_get_stolen_size(u16 snb_gmch_ctl)
+static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
 {
 	snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
 	snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
 	return snb_gmch_ctl << 25; /* 32 MB units */
 }
 
-static inline unsigned int gen7_get_stolen_size(u16 snb_gmch_ctl)
+static inline size_t gen7_get_stolen_size(u16 snb_gmch_ctl)
 {
 	static const int stolen_decoder[] = {
 		0, 0, 0, 0, 0, 32, 48, 64, 128, 256, 96, 160, 224, 352};
@@ -723,109 +723,137 @@ static inline unsigned int gen7_get_stolen_size(u16 snb_gmch_ctl)
 	return stolen_decoder[snb_gmch_ctl] << 20;
 }
 
-int i915_gem_gtt_init(struct drm_device *dev)
+static int gen6_gmch_probe(struct drm_device *dev,
+			   size_t *gtt_total,
+			   size_t *stolen)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	phys_addr_t gtt_bus_addr;
+	unsigned int gtt_size;
 	u16 snb_gmch_ctl;
 	int ret;
 
-	dev_priv->gtt.mappable_base = pci_resource_start(dev->pdev, 2);
-	dev_priv->gtt.mappable_end = pci_resource_len(dev->pdev, 2);
-
-	/* On modern platforms we need not worry ourself with the legacy
-	 * hostbridge query stuff. Skip it entirely
+	/* 64/512MB is the current min/max we actually know of, but this is just
+	 * a coarse sanity check.
 	 */
-	if (INTEL_INFO(dev)->gen < 6) {
-		ret = intel_gmch_probe(dev_priv->bridge_dev, dev->pdev, NULL);
-		if (!ret) {
-			DRM_ERROR("failed to set up gmch\n");
-			return -EIO;
-		}
-
-		dev_priv->mm.gtt = intel_gtt_get();
-		if (!dev_priv->mm.gtt) {
-			DRM_ERROR("Failed to initialize GTT\n");
-			intel_gmch_remove();
-			return -ENODEV;
-		}
-
-		dev_priv->gtt.do_idle_maps = needs_idle_maps(dev);
-
-		dev_priv->gtt.gtt_clear_range = i915_ggtt_clear_range;
-		dev_priv->gtt.gtt_insert_entries = i915_ggtt_insert_entries;
-
-		return 0;
+	if ((dev_priv->gtt.mappable_end < (64<<20) ||
+	     (dev_priv->gtt.mappable_end > (512<<20)))) {
+		DRM_ERROR("Unknown GMADR size (%lx)\n", dev_priv->gtt.mappable_end);
+		return -ENXIO;
 	}
 
 	if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
 		pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
-
-	dev_priv->mm.gtt = kzalloc(sizeof(*dev_priv->mm.gtt), GFP_KERNEL);
-	if (!dev_priv->mm.gtt)
-		return -ENOMEM;
-
-	/* For GEN6+ the PTEs for the ggtt live at 2MB + BAR0 */
-	gtt_bus_addr = pci_resource_start(dev->pdev, 0) + (2<<20);
-
-	/* i9xx_setup */
 	pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
-	dev_priv->mm.gtt->gtt_total_entries =
-		gen6_get_total_gtt_size(snb_gmch_ctl) / sizeof(gtt_pte_t);
-	if (INTEL_INFO(dev)->gen < 7)
-		dev_priv->mm.gtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl);
-	else
-		dev_priv->mm.gtt->stolen_size = gen7_get_stolen_size(snb_gmch_ctl);
+	gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl);
 
-	/* 64/512MB is the current min/max we actually know of, but this is just a
-	 * coarse sanity check.
-	 */
-	if ((dev_priv->gtt.mappable_end < (64<<20) ||
-	    (dev_priv->gtt.mappable_end > (512<<20)))) {
-		DRM_ERROR("Unknown GMADR size (%lx)\n",
-			  dev_priv->gtt.mappable_end);
-		ret = -ENXIO;
-		goto err_out;
-	}
+	if (IS_GEN7(dev))
+		*stolen = gen7_get_stolen_size(snb_gmch_ctl);
+	else
+		*stolen = gen6_get_stolen_size(snb_gmch_ctl);
 
-	ret = setup_scratch_page(dev);
-	if (ret) {
-		DRM_ERROR("Scratch setup failed\n");
-		goto err_out;
-	}
+	*gtt_total = (gtt_size / sizeof(gtt_pte_t)) << PAGE_SHIFT;
 
-	dev_priv->gtt.gsm = ioremap_wc(gtt_bus_addr,
-				       dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t));
+	/* For GEN6+ the PTEs for the ggtt live at 2MB + BAR0 */
+	gtt_bus_addr = pci_resource_start(dev->pdev, 0) + (2<<20);
+	dev_priv->gtt.gsm = ioremap_wc(gtt_bus_addr, gtt_size);
 	if (!dev_priv->gtt.gsm) {
 		DRM_ERROR("Failed to map the gtt page table\n");
-		teardown_scratch_page(dev);
-		ret = -ENOMEM;
-		goto err_out;
+		return -ENOMEM;
 	}
 
-	/* GMADR is the PCI aperture used by SW to access tiled GFX surfaces in a linear fashion. */
-	DRM_INFO("Memory usable by graphics device = %dM\n", dev_priv->mm.gtt->gtt_total_entries >> 8);
-	DRM_DEBUG_DRIVER("GMADR size = %ldM\n", dev_priv->gtt.mappable_end >> 20);
-	DRM_DEBUG_DRIVER("GTT stolen size = %dM\n", dev_priv->mm.gtt->stolen_size >> 20);
+	ret = setup_scratch_page(dev);
+	if (ret)
+		DRM_ERROR("Scratch setup failed\n");
 
 	dev_priv->gtt.gtt_clear_range = gen6_ggtt_clear_range;
 	dev_priv->gtt.gtt_insert_entries = gen6_ggtt_insert_entries;
 
-	return 0;
-
-err_out:
-	kfree(dev_priv->mm.gtt);
-	if (INTEL_INFO(dev)->gen < 6)
-		intel_gmch_remove();
 	return ret;
 }
 
-void i915_gem_gtt_fini(struct drm_device *dev)
+void gen6_gmch_remove(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	iounmap(dev_priv->gtt.gsm);
-	teardown_scratch_page(dev);
-	if (INTEL_INFO(dev)->gen < 6)
-		intel_gmch_remove();
+	teardown_scratch_page(dev_priv->dev);
+	kfree(dev_priv->mm.gtt);
+}
+
+static int i915_gmch_probe(struct drm_device *dev,
+			   size_t *gtt_total,
+			   size_t *stolen)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret;
+
+	/* This is a temporary hack to make the code cleaner in
+	 * i915_gem_gtt_init. I promise it will go away very shortly. */
 	kfree(dev_priv->mm.gtt);
+
+	ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL);
+	if (!ret) {
+		DRM_ERROR("failed to set up gmch\n");
+		return -EIO;
+	}
+
+	dev_priv->mm.gtt = intel_gtt_get();
+	if (!dev_priv->mm.gtt) {
+		DRM_ERROR("Failed to initialize GTT\n");
+		intel_gmch_remove();
+		return -ENODEV;
+	}
+
+	dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
+	dev_priv->gtt.gtt_clear_range = i915_ggtt_clear_range;
+	dev_priv->gtt.gtt_insert_entries = i915_ggtt_insert_entries;
+
+	return 0;
+}
+
+static void i915_gmch_remove(struct drm_device *dev)
+{
+	intel_gmch_remove();
+}
+
+int i915_gem_gtt_init(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_gtt *gtt = &dev_priv->gtt;
+	unsigned long gtt_size;
+	int ret;
+
+	dev_priv->mm.gtt = kzalloc(sizeof(*dev_priv->mm.gtt), GFP_KERNEL);
+	if (!dev_priv->mm.gtt)
+		return -ENOMEM;
+
+	gtt->mappable_base = pci_resource_start(dev->pdev, 2);
+	gtt->mappable_end = pci_resource_len(dev->pdev, 2);
+
+	if (INTEL_INFO(dev)->gen <= 5) {
+		dev_priv->gtt.gtt_probe = i915_gmch_probe;
+		dev_priv->gtt.gtt_remove =i915_gmch_remove;
+	} else {
+		dev_priv->gtt.gtt_probe = gen6_gmch_probe;
+		dev_priv->gtt.gtt_remove = gen6_gmch_remove;
+	}
+
+	ret = dev_priv->gtt.gtt_probe(dev, &dev_priv->gtt.total,
+				     &dev_priv->gtt.stolen_size);
+	if (ret) {
+		kfree(dev_priv->mm.gtt);
+		return ret;
+	}
+
+	dev_priv->mm.gtt->gtt_total_entries = dev_priv->gtt.total >> PAGE_SHIFT;
+	dev_priv->mm.gtt->stolen_size = dev_priv->gtt.stolen_size;
+
+	gtt_size = (dev_priv->gtt.total >> PAGE_SHIFT) * sizeof(gtt_pte_t);
+
+	/* GMADR is the PCI aperture used by SW to access tiled GFX surfaces in a linear fashion. */
+	DRM_INFO("Memory usable by graphics device = %zdM\n", dev_priv->gtt.total >> 20);
+	DRM_DEBUG_DRIVER("GMADR size = %ldM\n", dev_priv->gtt.mappable_end >> 20);
+	DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", dev_priv->gtt.stolen_size >> 20);
+
+	return 0;
 }
-- 
1.8.1.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 6/6] drm/i915: Resume dissecting intel_gtt
  2013-01-24 21:49 ` [PATCH 1/6] drm/i915: vfuncs for gtt_clear_range/insert_entries Ben Widawsky
                     ` (3 preceding siblings ...)
  2013-01-24 21:49   ` [PATCH 5/6] drm/i915: Add probe and remove to the gtt ops Ben Widawsky
@ 2013-01-24 21:49   ` Ben Widawsky
  4 siblings, 0 replies; 26+ messages in thread
From: Ben Widawsky @ 2013-01-24 21:49 UTC (permalink / raw)
  To: Intel, GFX <intel-gfx; +Cc: Daniel Vetter, Ben Widawsky

With the probe call in our dispatch table, we can now cut away two of
the remaining members in the intel_gtt shared struct.

v2: Rebased on top of Daniel's series

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/char/agp/intel-gtt.c           | 36 ++++++++++++++++++++--------------
 drivers/gpu/drm/i915/i915_drv.h        |  3 +--
 drivers/gpu/drm/i915/i915_gem_gtt.c    | 30 ++++++----------------------
 drivers/gpu/drm/i915/i915_gem_stolen.c |  8 ++++----
 include/drm/intel-gtt.h                | 10 +---------
 5 files changed, 33 insertions(+), 54 deletions(-)

diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index ff5f348..d8e7e6c 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -60,7 +60,6 @@ struct intel_gtt_driver {
 };
 
 static struct _intel_private {
-	struct intel_gtt base;
 	const struct intel_gtt_driver *driver;
 	struct pci_dev *pcidev;	/* device one */
 	struct pci_dev *bridge_dev;
@@ -80,6 +79,13 @@ static struct _intel_private {
 	/* Whether i915 needs to use the dmar apis or not. */
 	unsigned int needs_dmar : 1;
 	phys_addr_t gma_bus_addr;
+	/*  Size of memory reserved for graphics by the BIOS */
+	unsigned int stolen_size;
+	/* Total number of gtt entries. */
+	unsigned int gtt_total_entries;
+	/* Part of the gtt that is mappable by the cpu, for those chips where
+	 * this is not the full gtt. */
+	unsigned int gtt_mappable_entries;
 } intel_private;
 
 #define INTEL_GTT_GEN	intel_private.driver->gen
@@ -510,7 +516,7 @@ static unsigned int intel_gtt_total_entries(void)
 		/* On previous hardware, the GTT size was just what was
 		 * required to map the aperture.
 		 */
-		return intel_private.base.gtt_mappable_entries;
+		return intel_private.gtt_mappable_entries;
 	}
 }
 
@@ -576,8 +582,8 @@ static int intel_gtt_init(void)
 	if (ret != 0)
 		return ret;
 
-	intel_private.base.gtt_mappable_entries = intel_gtt_mappable_entries();
-	intel_private.base.gtt_total_entries = intel_gtt_total_entries();
+	intel_private.gtt_mappable_entries = intel_gtt_mappable_entries();
+	intel_private.gtt_total_entries = intel_gtt_total_entries();
 
 	/* save the PGETBL reg for resume */
 	intel_private.PGETBL_save =
@@ -589,10 +595,10 @@ static int intel_gtt_init(void)
 
 	dev_info(&intel_private.bridge_dev->dev,
 			"detected gtt size: %dK total, %dK mappable\n",
-			intel_private.base.gtt_total_entries * 4,
-			intel_private.base.gtt_mappable_entries * 4);
+			intel_private.gtt_total_entries * 4,
+			intel_private.gtt_mappable_entries * 4);
 
-	gtt_map_size = intel_private.base.gtt_total_entries * 4;
+	gtt_map_size = intel_private.gtt_total_entries * 4;
 
 	intel_private.gtt = NULL;
 	if (INTEL_GTT_GEN < 6 && INTEL_GTT_GEN > 2)
@@ -609,7 +615,7 @@ static int intel_gtt_init(void)
 
 	global_cache_flush();   /* FIXME: ? */
 
-	intel_private.base.stolen_size = intel_gtt_stolen_size();
+	intel_private.stolen_size = intel_gtt_stolen_size();
 
 	intel_private.needs_dmar = USE_PCI_DMA_API && INTEL_GTT_GEN > 2;
 
@@ -637,8 +643,7 @@ static int intel_fake_agp_fetch_size(void)
 	unsigned int aper_size;
 	int i;
 
-	aper_size = (intel_private.base.gtt_mappable_entries << PAGE_SHIFT)
-		    / MB(1);
+	aper_size = (intel_private.gtt_mappable_entries << PAGE_SHIFT) / MB(1);
 
 	for (i = 0; i < num_sizes; i++) {
 		if (aper_size == intel_fake_agp_sizes[i].size) {
@@ -845,8 +850,8 @@ static int intel_fake_agp_insert_entries(struct agp_memory *mem,
 	int ret = -EINVAL;
 
 	if (intel_private.clear_fake_agp) {
-		int start = intel_private.base.stolen_size / PAGE_SIZE;
-		int end = intel_private.base.gtt_mappable_entries;
+		int start = intel_private.stolen_size / PAGE_SIZE;
+		int end = intel_private.gtt_mappable_entries;
 		intel_gtt_clear_range(start, end - start);
 		intel_private.clear_fake_agp = false;
 	}
@@ -857,7 +862,7 @@ static int intel_fake_agp_insert_entries(struct agp_memory *mem,
 	if (mem->page_count == 0)
 		goto out;
 
-	if (pg_start + mem->page_count > intel_private.base.gtt_total_entries)
+	if (pg_start + mem->page_count > intel_private.gtt_total_entries)
 		goto out_err;
 
 	if (type != mem->type)
@@ -1366,9 +1371,10 @@ int intel_gmch_probe(struct pci_dev *bridge_pdev, struct pci_dev *gpu_pdev,
 }
 EXPORT_SYMBOL(intel_gmch_probe);
 
-struct intel_gtt *intel_gtt_get(void)
+void intel_gtt_get(size_t *gtt_total, size_t *stolen_size)
 {
-	return &intel_private.base;
+	*gtt_total = intel_private.gtt_total_entries << PAGE_SHIFT;
+	*stolen_size = intel_private.stolen_size;
 }
 EXPORT_SYMBOL(intel_gtt_get);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index dbf676d..35f171e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -408,6 +408,7 @@ struct i915_gtt {
 				   unsigned int pg_start,
 				   enum i915_cache_level cache_level);
 };
+#define gtt_total_entries(gtt) ((gtt).total >> PAGE_SHIFT)
 
 #define I915_PPGTT_PD_ENTRIES 512
 #define I915_PPGTT_PT_ENTRIES 1024
@@ -698,8 +699,6 @@ struct intel_l3_parity {
 };
 
 struct i915_gem_mm {
-	/** Bridge to intel-gtt-ko */
-	struct intel_gtt *gtt;
 	/** Memory allocator for GTT stolen memory */
 	struct drm_mm stolen;
 	/** Memory allocator for GTT */
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index bfc405a..2953ed0 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -182,7 +182,8 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	/* ppgtt PDEs reside in the global gtt pagetable, which has 512*1024
 	 * entries. For aliasing ppgtt support we just steal them at the end for
 	 * now. */
-	first_pd_entry_in_global_pt = dev_priv->mm.gtt->gtt_total_entries - I915_PPGTT_PD_ENTRIES;
+	first_pd_entry_in_global_pt =
+		gtt_total_entries(dev_priv->gtt) - I915_PPGTT_PD_ENTRIES;
 
 	ppgtt->num_pd_entries = I915_PPGTT_PD_ENTRIES;
 	ppgtt->clear_range = gen6_ppgtt_clear_range;
@@ -473,7 +474,7 @@ static void gen6_ggtt_clear_range(struct drm_device *dev,
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	gtt_pte_t scratch_pte;
 	gtt_pte_t __iomem *gtt_base = (gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
-	const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry;
+	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
 	int i;
 
 	if (WARN(num_entries > max_entries,
@@ -634,7 +635,7 @@ void i915_gem_init_global_gtt(struct drm_device *dev)
 	unsigned long gtt_size, mappable_size;
 	int ret;
 
-	gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT;
+	gtt_size = dev_priv->gtt.total;
 	mappable_size = dev_priv->gtt.mappable_end;
 
 	if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) {
@@ -777,7 +778,6 @@ void gen6_gmch_remove(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	iounmap(dev_priv->gtt.gsm);
 	teardown_scratch_page(dev_priv->dev);
-	kfree(dev_priv->mm.gtt);
 }
 
 static int i915_gmch_probe(struct drm_device *dev,
@@ -787,22 +787,13 @@ static int i915_gmch_probe(struct drm_device *dev,
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret;
 
-	/* This is a temporary hack to make the code cleaner in
-	 * i915_gem_gtt_init. I promise it will go away very shortly. */
-	kfree(dev_priv->mm.gtt);
-
 	ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL);
 	if (!ret) {
 		DRM_ERROR("failed to set up gmch\n");
 		return -EIO;
 	}
 
-	dev_priv->mm.gtt = intel_gtt_get();
-	if (!dev_priv->mm.gtt) {
-		DRM_ERROR("Failed to initialize GTT\n");
-		intel_gmch_remove();
-		return -ENODEV;
-	}
+	intel_gtt_get(gtt_total, stolen);
 
 	dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
 	dev_priv->gtt.gtt_clear_range = i915_ggtt_clear_range;
@@ -823,10 +814,6 @@ int i915_gem_gtt_init(struct drm_device *dev)
 	unsigned long gtt_size;
 	int ret;
 
-	dev_priv->mm.gtt = kzalloc(sizeof(*dev_priv->mm.gtt), GFP_KERNEL);
-	if (!dev_priv->mm.gtt)
-		return -ENOMEM;
-
 	gtt->mappable_base = pci_resource_start(dev->pdev, 2);
 	gtt->mappable_end = pci_resource_len(dev->pdev, 2);
 
@@ -840,13 +827,8 @@ int i915_gem_gtt_init(struct drm_device *dev)
 
 	ret = dev_priv->gtt.gtt_probe(dev, &dev_priv->gtt.total,
 				     &dev_priv->gtt.stolen_size);
-	if (ret) {
-		kfree(dev_priv->mm.gtt);
+	if (ret)
 		return ret;
-	}
-
-	dev_priv->mm.gtt->gtt_total_entries = dev_priv->gtt.total >> PAGE_SHIFT;
-	dev_priv->mm.gtt->stolen_size = dev_priv->gtt.stolen_size;
 
 	gtt_size = (dev_priv->gtt.total >> PAGE_SHIFT) * sizeof(gtt_pte_t);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index f21ae17..69d97cb 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -187,11 +187,11 @@ int i915_gem_init_stolen(struct drm_device *dev)
 	if (dev_priv->mm.stolen_base == 0)
 		return 0;
 
-	DRM_DEBUG_KMS("found %d bytes of stolen memory at %08lx\n",
-		      dev_priv->mm.gtt->stolen_size, dev_priv->mm.stolen_base);
+	DRM_DEBUG_KMS("found %zd bytes of stolen memory at %08lx\n",
+		      dev_priv->gtt.stolen_size, dev_priv->mm.stolen_base);
 
 	/* Basic memrange allocator for stolen space */
-	drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->mm.gtt->stolen_size);
+	drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->gtt.stolen_size);
 
 	return 0;
 }
@@ -205,7 +205,7 @@ i915_pages_create_for_stolen(struct drm_device *dev,
 	struct scatterlist *sg;
 
 	DRM_DEBUG_DRIVER("offset=0x%x, size=%d\n", offset, size);
-	BUG_ON(offset > dev_priv->mm.gtt->stolen_size - size);
+	BUG_ON(offset > dev_priv->gtt.stolen_size - size);
 
 	/* We hide that we have no struct page backing our stolen object
 	 * by wrapping the contiguous physical allocation with a fake
diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h
index 769b6c7..cf10555 100644
--- a/include/drm/intel-gtt.h
+++ b/include/drm/intel-gtt.h
@@ -3,15 +3,7 @@
 #ifndef _DRM_INTEL_GTT_H
 #define	_DRM_INTEL_GTT_H
 
-struct intel_gtt {
-	/* Size of memory reserved for graphics by the BIOS */
-	unsigned int stolen_size;
-	/* Total number of gtt entries. */
-	unsigned int gtt_total_entries;
-	/* Part of the gtt that is mappable by the cpu, for those chips where
-	 * this is not the full gtt. */
-	unsigned int gtt_mappable_entries;
-} *intel_gtt_get(void);
+void intel_gtt_get(size_t *gtt_total, size_t *stolen_size);
 
 int intel_gmch_probe(struct pci_dev *bridge_pdev, struct pci_dev *gpu_pdev,
 		     struct agp_bridge_data *bridge);
-- 
1.8.1.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 1/6] drm/i915: vfuncs for gtt_clear_range/insert_entries
  2013-01-24 15:50 [PATCH 0/4] gtt abstractions Daniel Vetter
                   ` (5 preceding siblings ...)
  2013-01-24 21:49 ` [PATCH 1/6] drm/i915: vfuncs for gtt_clear_range/insert_entries Ben Widawsky
@ 2013-01-24 22:44 ` Ben Widawsky
  2013-01-24 22:44   ` [PATCH 2/6] drm/i915: vfuncs for ppgtt Ben Widawsky
                     ` (5 more replies)
  6 siblings, 6 replies; 26+ messages in thread
From: Ben Widawsky @ 2013-01-24 22:44 UTC (permalink / raw)
  To: Intel GFX; +Cc: Daniel Vetter, Ben Widawsky

From: Daniel Vetter <daniel.vetter@ffwll.ch>

We have a few too many differences here, so finally take the prepared
abstraction and run with it. A few smaller changes are required to get
things into shape:

- move i915_cache_level up since we need it in the gt funcs
- split up i915_ggtt_clear_range and move the two functions down to
  where the relevant insert_entries functions are
- adjustments to a few function parameter lists

Now we have 2 functions which deal with the gen6+ global gtt
(gen6_ggtt_ prefix) and 2 functions which deal with the legacy gtt
code in the intel-gtt.c fake agp driver (i915_ggtt_ prefix).

Init is still a bit a mess, but honestly I don't care about that.

One thing I've thought about while deciding on the exact interfaces is
a flag parameter for ->clear_range: We could use that to decide
between writing invalid pte entries or scratch pte entries. In case we
ever get around to fixing all our bugs which currently prevent us from
filling the gtt with empty ptes for the truly unused ranges ...

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>

[bwidawsk: Moved functions to the gtt struct]
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h     |  21 +++++--
 drivers/gpu/drm/i915/i915_gem_gtt.c | 121 ++++++++++++++++++++----------------
 2 files changed, 83 insertions(+), 59 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0a798ee..160c269 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -367,6 +367,12 @@ struct intel_device_info {
 	u8 has_llc:1;
 };
 
+enum i915_cache_level {
+	I915_CACHE_NONE = 0,
+	I915_CACHE_LLC,
+	I915_CACHE_LLC_MLC, /* gen6+, in docs at least! */
+};
+
 /* The Graphics Translation Table is the way in which GEN hardware translates a
  * Graphics Virtual Address into a Physical Address. In addition to the normal
  * collateral associated with any va->pa translations GEN hardware also has a
@@ -388,6 +394,15 @@ struct i915_gtt {
 	bool do_idle_maps;
 	dma_addr_t scratch_page_dma;
 	struct page *scratch_page;
+
+	/* global gtt ops */
+	void (*gtt_clear_range)(struct drm_device *dev,
+				unsigned int first_entry,
+				unsigned int num_entries);
+	void (*gtt_insert_entries)(struct drm_device *dev,
+				   struct sg_table *st,
+				   unsigned int pg_start,
+				   enum i915_cache_level cache_level);
 };
 
 #define I915_PPGTT_PD_ENTRIES 512
@@ -1025,12 +1040,6 @@ enum hdmi_force_audio {
 	HDMI_AUDIO_ON,			/* force turn on HDMI audio */
 };
 
-enum i915_cache_level {
-	I915_CACHE_NONE = 0,
-	I915_CACHE_LLC,
-	I915_CACHE_LLC_MLC, /* gen6+, in docs at least! */
-};
-
 #define I915_GTT_RESERVED ((struct drm_mm_node *)0x1)
 
 struct drm_i915_gem_object_ops {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index a0ba4a9..4712626 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -367,40 +367,14 @@ static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
 		dev_priv->mm.interruptible = interruptible;
 }
 
-static void i915_ggtt_clear_range(struct drm_device *dev,
-				 unsigned first_entry,
-				 unsigned num_entries)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	gtt_pte_t scratch_pte;
-	gtt_pte_t __iomem *gtt_base = (gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
-	const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry;
-	int i;
-
-	if (INTEL_INFO(dev)->gen < 6) {
-		intel_gtt_clear_range(first_entry, num_entries);
-		return;
-	}
-
-	if (WARN(num_entries > max_entries,
-		 "First entry = %d; Num entries = %d (max=%d)\n",
-		 first_entry, num_entries, max_entries))
-		num_entries = max_entries;
-
-	scratch_pte = pte_encode(dev, dev_priv->gtt.scratch_page_dma, I915_CACHE_LLC);
-	for (i = 0; i < num_entries; i++)
-		iowrite32(scratch_pte, &gtt_base[i]);
-	readl(gtt_base);
-}
-
 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj;
 
 	/* First fill our portion of the GTT with scratch pages */
-	i915_ggtt_clear_range(dev, dev_priv->gtt.start / PAGE_SIZE,
-			      dev_priv->gtt.total / PAGE_SIZE);
+	dev_priv->gtt.gtt_clear_range(dev, dev_priv->gtt.start / PAGE_SIZE,
+				      dev_priv->gtt.total / PAGE_SIZE);
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) {
 		i915_gem_clflush_object(obj);
@@ -429,15 +403,13 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
  * within the global GTT as well as accessible by the GPU through the GMADR
  * mapped BAR (dev_priv->mm.gtt->gtt).
  */
-static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj,
-				  enum i915_cache_level level)
+static void gen6_ggtt_insert_entries(struct drm_device *dev,
+				     struct sg_table *st,
+				     unsigned int first_entry,
+				     enum i915_cache_level level)
 {
-	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct sg_table *st = obj->pages;
 	struct scatterlist *sg = st->sgl;
-	const int first_entry = obj->gtt_space->start >> PAGE_SHIFT;
-	const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry;
 	gtt_pte_t __iomem *gtt_entries =
 		(gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
 	int unused, i = 0;
@@ -453,9 +425,6 @@ static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj,
 		}
 	}
 
-	BUG_ON(i > max_entries);
-	BUG_ON(i != obj->base.size / PAGE_SIZE);
-
 	/* XXX: This serves as a posting read to make sure that the PTE has
 	 * actually been updated. There is some concern that even though
 	 * registers and PTEs are within the same BAR that they are potentially
@@ -473,28 +442,69 @@ static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj,
 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
 }
 
+static void gen6_ggtt_clear_range(struct drm_device *dev,
+				  unsigned int first_entry,
+				  unsigned int num_entries)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	gtt_pte_t scratch_pte;
+	gtt_pte_t __iomem *gtt_base = (gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
+	const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry;
+	int i;
+
+	if (WARN(num_entries > max_entries,
+		 "First entry = %d; Num entries = %d (max=%d)\n",
+		 first_entry, num_entries, max_entries))
+		num_entries = max_entries;
+
+	scratch_pte = pte_encode(dev, dev_priv->gtt.scratch_page_dma, I915_CACHE_LLC);
+	for (i = 0; i < num_entries; i++)
+		iowrite32(scratch_pte, &gtt_base[i]);
+	readl(gtt_base);
+}
+
+
+static void i915_ggtt_insert_entries(struct drm_device *dev,
+				     struct sg_table *st,
+				     unsigned int pg_start,
+				     enum i915_cache_level cache_level)
+{
+	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
+
+	intel_gtt_insert_sg_entries(st, pg_start, flags);
+
+}
+
+static void i915_ggtt_clear_range(struct drm_device *dev,
+				  unsigned int first_entry,
+				  unsigned int num_entries)
+{
+	intel_gtt_clear_range(first_entry, num_entries);
+}
+
+
 void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
 			      enum i915_cache_level cache_level)
 {
 	struct drm_device *dev = obj->base.dev;
-	if (INTEL_INFO(dev)->gen < 6) {
-		unsigned int flags = (cache_level == I915_CACHE_NONE) ?
-			AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
-		intel_gtt_insert_sg_entries(obj->pages,
-					    obj->gtt_space->start >> PAGE_SHIFT,
-					    flags);
-	} else {
-		gen6_ggtt_bind_object(obj, cache_level);
-	}
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	dev_priv->gtt.gtt_insert_entries(dev, obj->pages,
+					 obj->gtt_space->start >> PAGE_SHIFT,
+					 cache_level);
 
 	obj->has_global_gtt_mapping = 1;
 }
 
 void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj)
 {
-	i915_ggtt_clear_range(obj->base.dev,
-			      obj->gtt_space->start >> PAGE_SHIFT,
-			      obj->base.size >> PAGE_SHIFT);
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	dev_priv->gtt.gtt_clear_range(obj->base.dev,
+				      obj->gtt_space->start >> PAGE_SHIFT,
+				      obj->base.size >> PAGE_SHIFT);
 
 	obj->has_global_gtt_mapping = 0;
 }
@@ -570,13 +580,12 @@ void i915_gem_setup_global_gtt(struct drm_device *dev,
 			     hole_start, hole_end) {
 		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
 			      hole_start, hole_end);
-		i915_ggtt_clear_range(dev,
-				      hole_start / PAGE_SIZE,
-				      (hole_end-hole_start) / PAGE_SIZE);
+		dev_priv->gtt.gtt_clear_range(dev, hole_start / PAGE_SIZE,
+					      (hole_end-hole_start) / PAGE_SIZE);
 	}
 
 	/* And finally clear the reserved guard page */
-	i915_ggtt_clear_range(dev, end / PAGE_SIZE - 1, 1);
+	dev_priv->gtt.gtt_clear_range(dev, end / PAGE_SIZE - 1, 1);
 }
 
 static bool
@@ -718,6 +727,9 @@ int i915_gem_gtt_init(struct drm_device *dev)
 
 		dev_priv->gtt.do_idle_maps = needs_idle_maps(dev);
 
+		dev_priv->gtt.gtt_clear_range = i915_ggtt_clear_range;
+		dev_priv->gtt.gtt_insert_entries = i915_ggtt_insert_entries;
+
 		return 0;
 	}
 
@@ -771,6 +783,9 @@ int i915_gem_gtt_init(struct drm_device *dev)
 	DRM_DEBUG_DRIVER("GMADR size = %ldM\n", dev_priv->gtt.mappable_end >> 20);
 	DRM_DEBUG_DRIVER("GTT stolen size = %dM\n", dev_priv->mm.gtt->stolen_size >> 20);
 
+	dev_priv->gtt.gtt_clear_range = gen6_ggtt_clear_range;
+	dev_priv->gtt.gtt_insert_entries = gen6_ggtt_insert_entries;
+
 	return 0;
 
 err_out:
-- 
1.8.1.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 2/6] drm/i915: vfuncs for ppgtt
  2013-01-24 22:44 ` [PATCH 1/6] drm/i915: vfuncs for gtt_clear_range/insert_entries Ben Widawsky
@ 2013-01-24 22:44   ` Ben Widawsky
  2013-01-29  7:44     ` Damien Lespiau
  2013-01-24 22:44   ` [PATCH 3/6] drm/i915: pte_encode is gen6+ Ben Widawsky
                     ` (4 subsequent siblings)
  5 siblings, 1 reply; 26+ messages in thread
From: Ben Widawsky @ 2013-01-24 22:44 UTC (permalink / raw)
  To: Intel GFX; +Cc: Daniel Vetter

From: Daniel Vetter <daniel.vetter@ffwll.ch>

Like for the global gtt we want a notch more flexibility here. Only
big change (besides a few tiny function parameter adjustments) was to
move gen6_ppgtt_insert_entries up (and remove _sg_ from its name, we
only have one kind of insert_entries since the last gtt cleanup).

We could also extract the platform ppgtt setup/teardown code a bit
better, but I don't care that much.

With this we have the hw details of pte writing nicely hidden away
behind a bit of abstraction. Which should pave the way for
different/multiple ppgtts (e.g. what we need for real ppgtt support).

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h     |  10 +++-
 drivers/gpu/drm/i915/i915_gem_gtt.c | 111 ++++++++++++++++++------------------
 2 files changed, 65 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 160c269..4d51c4e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -414,6 +414,15 @@ struct i915_hw_ppgtt {
 	uint32_t pd_offset;
 	dma_addr_t *pt_dma_addr;
 	dma_addr_t scratch_page_dma_addr;
+
+	/* pte functions, mirroring the interface of the global gtt. */
+	void (*clear_range)(struct i915_hw_ppgtt *ppgtt,
+			    unsigned int first_entry,
+			    unsigned int num_entries);
+	void (*insert_entries)(struct i915_hw_ppgtt *ppgtt,
+			       struct sg_table *st,
+			       unsigned int pg_start,
+			       enum i915_cache_level cache_level);
 };
 
 
@@ -1664,7 +1673,6 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 				   struct drm_file *file);
 
 /* i915_gem_gtt.c */
-int __must_check i915_gem_init_aliasing_ppgtt(struct drm_device *dev);
 void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev);
 void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
 			    struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 4712626..f63dbc7 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -77,7 +77,7 @@ static inline gtt_pte_t pte_encode(struct drm_device *dev,
 }
 
 /* PPGTT support for Sandybdrige/Gen6 and later */
-static void i915_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
+static void gen6_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
 				   unsigned first_entry,
 				   unsigned num_entries)
 {
@@ -108,7 +108,51 @@ static void i915_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
 	}
 }
 
-int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
+static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt,
+				      struct sg_table *pages,
+				      unsigned first_entry,
+				      enum i915_cache_level cache_level)
+{
+	gtt_pte_t *pt_vaddr;
+	unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
+	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
+	unsigned i, j, m, segment_len;
+	dma_addr_t page_addr;
+	struct scatterlist *sg;
+
+	/* init sg walking */
+	sg = pages->sgl;
+	i = 0;
+	segment_len = sg_dma_len(sg) >> PAGE_SHIFT;
+	m = 0;
+
+	while (i < pages->nents) {
+		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]);
+
+		for (j = first_pte; j < I915_PPGTT_PT_ENTRIES; j++) {
+			page_addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
+			pt_vaddr[j] = pte_encode(ppgtt->dev, page_addr,
+						 cache_level);
+
+			/* grab the next page */
+			if (++m == segment_len) {
+				if (++i == pages->nents)
+					break;
+
+				sg = sg_next(sg);
+				segment_len = sg_dma_len(sg) >> PAGE_SHIFT;
+				m = 0;
+			}
+		}
+
+		kunmap_atomic(pt_vaddr);
+
+		first_pte = 0;
+		act_pd++;
+	}
+}
+
+static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct i915_hw_ppgtt *ppgtt;
@@ -127,6 +171,8 @@ int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
 
 	ppgtt->dev = dev;
 	ppgtt->num_pd_entries = I915_PPGTT_PD_ENTRIES;
+	ppgtt->clear_range = gen6_ppgtt_clear_range;
+	ppgtt->insert_entries = gen6_ppgtt_insert_entries;
 	ppgtt->pt_pages = kzalloc(sizeof(struct page *)*ppgtt->num_pd_entries,
 				  GFP_KERNEL);
 	if (!ppgtt->pt_pages)
@@ -159,8 +205,8 @@ int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
 
 	ppgtt->scratch_page_dma_addr = dev_priv->gtt.scratch_page_dma;
 
-	i915_ppgtt_clear_range(ppgtt, 0,
-			       ppgtt->num_pd_entries*I915_PPGTT_PT_ENTRIES);
+	ppgtt->clear_range(ppgtt, 0,
+			   ppgtt->num_pd_entries*I915_PPGTT_PT_ENTRIES);
 
 	ppgtt->pd_offset = (first_pd_entry_in_global_pt)*sizeof(gtt_pte_t);
 
@@ -209,66 +255,21 @@ void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
 	kfree(ppgtt);
 }
 
-static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt,
-					 const struct sg_table *pages,
-					 unsigned first_entry,
-					 enum i915_cache_level cache_level)
-{
-	gtt_pte_t *pt_vaddr;
-	unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
-	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
-	unsigned i, j, m, segment_len;
-	dma_addr_t page_addr;
-	struct scatterlist *sg;
-
-	/* init sg walking */
-	sg = pages->sgl;
-	i = 0;
-	segment_len = sg_dma_len(sg) >> PAGE_SHIFT;
-	m = 0;
-
-	while (i < pages->nents) {
-		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]);
-
-		for (j = first_pte; j < I915_PPGTT_PT_ENTRIES; j++) {
-			page_addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
-			pt_vaddr[j] = pte_encode(ppgtt->dev, page_addr,
-						 cache_level);
-
-			/* grab the next page */
-			if (++m == segment_len) {
-				if (++i == pages->nents)
-					break;
-
-				sg = sg_next(sg);
-				segment_len = sg_dma_len(sg) >> PAGE_SHIFT;
-				m = 0;
-			}
-		}
-
-		kunmap_atomic(pt_vaddr);
-
-		first_pte = 0;
-		act_pd++;
-	}
-}
-
 void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
 			    struct drm_i915_gem_object *obj,
 			    enum i915_cache_level cache_level)
 {
-	i915_ppgtt_insert_sg_entries(ppgtt,
-				     obj->pages,
-				     obj->gtt_space->start >> PAGE_SHIFT,
-				     cache_level);
+	ppgtt->insert_entries(ppgtt, obj->pages,
+			      obj->gtt_space->start >> PAGE_SHIFT,
+			      cache_level);
 }
 
 void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
 			      struct drm_i915_gem_object *obj)
 {
-	i915_ppgtt_clear_range(ppgtt,
-			       obj->gtt_space->start >> PAGE_SHIFT,
-			       obj->base.size >> PAGE_SHIFT);
+	ppgtt->clear_range(ppgtt,
+			   obj->gtt_space->start >> PAGE_SHIFT,
+			   obj->base.size >> PAGE_SHIFT);
 }
 
 void i915_gem_init_ppgtt(struct drm_device *dev)
-- 
1.8.1.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 3/6] drm/i915: pte_encode is gen6+
  2013-01-24 22:44 ` [PATCH 1/6] drm/i915: vfuncs for gtt_clear_range/insert_entries Ben Widawsky
  2013-01-24 22:44   ` [PATCH 2/6] drm/i915: vfuncs for ppgtt Ben Widawsky
@ 2013-01-24 22:44   ` Ben Widawsky
  2013-01-29  7:46     ` Damien Lespiau
  2013-01-24 22:44   ` [PATCH 4/6] drm/i915: extract hw ppgtt setup/cleanup code Ben Widawsky
                     ` (3 subsequent siblings)
  5 siblings, 1 reply; 26+ messages in thread
From: Ben Widawsky @ 2013-01-24 22:44 UTC (permalink / raw)
  To: Intel GFX; +Cc: Daniel Vetter

From: Daniel Vetter <daniel.vetter@ffwll.ch>

All the other gen6+ hw code has the gen6_ prefix, so be consistent
about it.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index f63dbc7..d171982 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -44,9 +44,9 @@ typedef uint32_t gtt_pte_t;
 #define GEN6_PTE_CACHE_LLC_MLC		(3 << 1)
 #define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
 
-static inline gtt_pte_t pte_encode(struct drm_device *dev,
-				   dma_addr_t addr,
-				   enum i915_cache_level level)
+static inline gtt_pte_t gen6_pte_encode(struct drm_device *dev,
+					dma_addr_t addr,
+					enum i915_cache_level level)
 {
 	gtt_pte_t pte = GEN6_PTE_VALID;
 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
@@ -87,8 +87,9 @@ static void gen6_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
 	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
 	unsigned last_pte, i;
 
-	scratch_pte = pte_encode(ppgtt->dev, ppgtt->scratch_page_dma_addr,
-				 I915_CACHE_LLC);
+	scratch_pte = gen6_pte_encode(ppgtt->dev,
+				      ppgtt->scratch_page_dma_addr,
+				      I915_CACHE_LLC);
 
 	while (num_entries) {
 		last_pte = first_pte + num_entries;
@@ -131,8 +132,8 @@ static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt,
 
 		for (j = first_pte; j < I915_PPGTT_PT_ENTRIES; j++) {
 			page_addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
-			pt_vaddr[j] = pte_encode(ppgtt->dev, page_addr,
-						 cache_level);
+			pt_vaddr[j] = gen6_pte_encode(ppgtt->dev, page_addr,
+						      cache_level);
 
 			/* grab the next page */
 			if (++m == segment_len) {
@@ -421,7 +422,8 @@ static void gen6_ggtt_insert_entries(struct drm_device *dev,
 		len = sg_dma_len(sg) >> PAGE_SHIFT;
 		for (m = 0; m < len; m++) {
 			addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
-			iowrite32(pte_encode(dev, addr, level), &gtt_entries[i]);
+			iowrite32(gen6_pte_encode(dev, addr, level),
+				  &gtt_entries[i]);
 			i++;
 		}
 	}
@@ -433,7 +435,8 @@ static void gen6_ggtt_insert_entries(struct drm_device *dev,
 	 * hardware should work, we must keep this posting read for paranoia.
 	 */
 	if (i != 0)
-		WARN_ON(readl(&gtt_entries[i-1]) != pte_encode(dev, addr, level));
+		WARN_ON(readl(&gtt_entries[i-1])
+			!= gen6_pte_encode(dev, addr, level));
 
 	/* This next bit makes the above posting read even more important. We
 	 * want to flush the TLBs only after we're certain all the PTE updates
@@ -458,7 +461,8 @@ static void gen6_ggtt_clear_range(struct drm_device *dev,
 		 first_entry, num_entries, max_entries))
 		num_entries = max_entries;
 
-	scratch_pte = pte_encode(dev, dev_priv->gtt.scratch_page_dma, I915_CACHE_LLC);
+	scratch_pte = gen6_pte_encode(dev, dev_priv->gtt.scratch_page_dma,
+				      I915_CACHE_LLC);
 	for (i = 0; i < num_entries; i++)
 		iowrite32(scratch_pte, &gtt_base[i]);
 	readl(gtt_base);
-- 
1.8.1.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 4/6] drm/i915: extract hw ppgtt setup/cleanup code
  2013-01-24 22:44 ` [PATCH 1/6] drm/i915: vfuncs for gtt_clear_range/insert_entries Ben Widawsky
  2013-01-24 22:44   ` [PATCH 2/6] drm/i915: vfuncs for ppgtt Ben Widawsky
  2013-01-24 22:44   ` [PATCH 3/6] drm/i915: pte_encode is gen6+ Ben Widawsky
@ 2013-01-24 22:44   ` Ben Widawsky
  2013-01-24 22:44   ` [PATCH 5/6] drm/i915: Add probe and remove to the gtt ops Ben Widawsky
                     ` (2 subsequent siblings)
  5 siblings, 0 replies; 26+ messages in thread
From: Ben Widawsky @ 2013-01-24 22:44 UTC (permalink / raw)
  To: Intel GFX; +Cc: Daniel Vetter

From: Daniel Vetter <daniel.vetter@ffwll.ch>

At the moment only cosmetics, but being able to initialize/cleanup
arbitrary ppgtt address spaces paves the way to have more than one of
them ... Just in case we ever get around to implementing real
per-process address spaces. Note that in that case another vfunc for
ppgtt would be beneficial though. But that can wait until the code
grows a second place which initializes ppgtts.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h     |  1 +
 drivers/gpu/drm/i915/i915_gem_gtt.c | 68 ++++++++++++++++++++++++-------------
 2 files changed, 45 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4d51c4e..953ea3a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -423,6 +423,7 @@ struct i915_hw_ppgtt {
 			       struct sg_table *st,
 			       unsigned int pg_start,
 			       enum i915_cache_level cache_level);
+	void (*cleanup)(struct i915_hw_ppgtt *ppgtt);
 };
 
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index d171982..5502067 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -153,10 +153,28 @@ static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt,
 	}
 }
 
-static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
+static void gen6_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt)
 {
+	int i;
+
+	if (ppgtt->pt_dma_addr) {
+		for (i = 0; i < ppgtt->num_pd_entries; i++)
+			pci_unmap_page(ppgtt->dev->pdev,
+				       ppgtt->pt_dma_addr[i],
+				       4096, PCI_DMA_BIDIRECTIONAL);
+	}
+
+	kfree(ppgtt->pt_dma_addr);
+	for (i = 0; i < ppgtt->num_pd_entries; i++)
+		__free_page(ppgtt->pt_pages[i]);
+	kfree(ppgtt->pt_pages);
+	kfree(ppgtt);
+}
+
+static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
+{
+	struct drm_device *dev = ppgtt->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct i915_hw_ppgtt *ppgtt;
 	unsigned first_pd_entry_in_global_pt;
 	int i;
 	int ret = -ENOMEM;
@@ -166,18 +184,14 @@ static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
 	 * now. */
 	first_pd_entry_in_global_pt = dev_priv->mm.gtt->gtt_total_entries - I915_PPGTT_PD_ENTRIES;
 
-	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
-	if (!ppgtt)
-		return ret;
-
-	ppgtt->dev = dev;
 	ppgtt->num_pd_entries = I915_PPGTT_PD_ENTRIES;
 	ppgtt->clear_range = gen6_ppgtt_clear_range;
 	ppgtt->insert_entries = gen6_ppgtt_insert_entries;
+	ppgtt->cleanup = gen6_ppgtt_cleanup;
 	ppgtt->pt_pages = kzalloc(sizeof(struct page *)*ppgtt->num_pd_entries,
 				  GFP_KERNEL);
 	if (!ppgtt->pt_pages)
-		goto err_ppgtt;
+		return -ENOMEM;
 
 	for (i = 0; i < ppgtt->num_pd_entries; i++) {
 		ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL);
@@ -211,8 +225,6 @@ static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
 
 	ppgtt->pd_offset = (first_pd_entry_in_global_pt)*sizeof(gtt_pte_t);
 
-	dev_priv->mm.aliasing_ppgtt = ppgtt;
-
 	return 0;
 
 err_pd_pin:
@@ -228,8 +240,27 @@ err_pt_alloc:
 			__free_page(ppgtt->pt_pages[i]);
 	}
 	kfree(ppgtt->pt_pages);
-err_ppgtt:
-	kfree(ppgtt);
+
+	return ret;
+}
+
+static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_hw_ppgtt *ppgtt;
+	int ret;
+
+	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
+	if (!ppgtt)
+		return -ENOMEM;
+
+	ppgtt->dev = dev;
+
+	ret = gen6_ppgtt_init(ppgtt);
+	if (ret)
+		kfree(ppgtt);
+	else
+		dev_priv->mm.aliasing_ppgtt = ppgtt;
 
 	return ret;
 }
@@ -238,22 +269,11 @@ void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
-	int i;
 
 	if (!ppgtt)
 		return;
 
-	if (ppgtt->pt_dma_addr) {
-		for (i = 0; i < ppgtt->num_pd_entries; i++)
-			pci_unmap_page(dev->pdev, ppgtt->pt_dma_addr[i],
-				       4096, PCI_DMA_BIDIRECTIONAL);
-	}
-
-	kfree(ppgtt->pt_dma_addr);
-	for (i = 0; i < ppgtt->num_pd_entries; i++)
-		__free_page(ppgtt->pt_pages[i]);
-	kfree(ppgtt->pt_pages);
-	kfree(ppgtt);
+	ppgtt->cleanup(ppgtt);
 }
 
 void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
-- 
1.8.1.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 5/6] drm/i915: Add probe and remove to the gtt ops
  2013-01-24 22:44 ` [PATCH 1/6] drm/i915: vfuncs for gtt_clear_range/insert_entries Ben Widawsky
                     ` (2 preceding siblings ...)
  2013-01-24 22:44   ` [PATCH 4/6] drm/i915: extract hw ppgtt setup/cleanup code Ben Widawsky
@ 2013-01-24 22:44   ` Ben Widawsky
  2013-01-24 22:45   ` [PATCH 6/6] drm/i915: Resume dissecting intel_gtt Ben Widawsky
  2013-01-29  7:42   ` [PATCH 1/6] drm/i915: vfuncs for gtt_clear_range/insert_entries Damien Lespiau
  5 siblings, 0 replies; 26+ messages in thread
From: Ben Widawsky @ 2013-01-24 22:44 UTC (permalink / raw)
  To: Intel GFX; +Cc: Daniel Vetter, Ben Widawsky

The idea, and much of the code came originally from:

commit 0712f0249c3148d8cf42a3703403c278590d4de5
Author: Ben Widawsky <ben@bwidawsk.net>
Date:   Fri Jan 18 17:23:16 2013 -0800

    drm/i915: Create a vtable for i915 gtt

Daniel didn't like the color of that patch series, and so I asked him to
start something which appealed to his sense of color. The preceding
patches are those, and now this is going on top of that.

[extracted from the original commit message]

One immediately obvious thing to implement is our gmch probing. The init
function was getting massively bloated. Fundamentally, all that's needed
from GMCH probing is the GTT size, and the stolen size. It makes design
sense to put the mappable calculation in there as well, but the code
turns out a bit nicer without it (IMO)

The intel_gtt bridge thing is still here, but the subsequent patches
will finish ripping that out.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_dma.c     |   2 +-
 drivers/gpu/drm/i915/i915_drv.h     |   5 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c | 182 +++++++++++++++++++++---------------
 3 files changed, 110 insertions(+), 79 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 11c7aa8..cf06103 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1665,7 +1665,7 @@ out_mtrrfree:
 out_rmmap:
 	pci_iounmap(dev->pdev, dev_priv->regs);
 put_gmch:
-	i915_gem_gtt_fini(dev);
+	dev_priv->gtt.gtt_remove(dev);
 put_bridge:
 	pci_dev_put(dev_priv->bridge_dev);
 free_priv:
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 953ea3a..dbf676d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -383,6 +383,7 @@ enum i915_cache_level {
 struct i915_gtt {
 	unsigned long start;		/* Start offset of used GTT */
 	size_t total;			/* Total size GTT can map */
+	size_t stolen_size;		/* Total size of stolen memory */
 
 	unsigned long mappable_end;	/* End offset that we can CPU map */
 	struct io_mapping *mappable;	/* Mapping to our CPU mappable region */
@@ -396,6 +397,9 @@ struct i915_gtt {
 	struct page *scratch_page;
 
 	/* global gtt ops */
+	int (*gtt_probe)(struct drm_device *dev, size_t *gtt_total,
+			  size_t *stolen);
+	void (*gtt_remove)(struct drm_device *dev);
 	void (*gtt_clear_range)(struct drm_device *dev,
 				unsigned int first_entry,
 				unsigned int num_entries);
@@ -1691,7 +1695,6 @@ void i915_gem_init_global_gtt(struct drm_device *dev);
 void i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start,
 			       unsigned long mappable_end, unsigned long end);
 int i915_gem_gtt_init(struct drm_device *dev);
-void i915_gem_gtt_fini(struct drm_device *dev);
 static inline void i915_gem_chipset_flush(struct drm_device *dev)
 {
 	if (INTEL_INFO(dev)->gen < 6)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 5502067..bfc405a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -707,14 +707,14 @@ static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
 	return snb_gmch_ctl << 20;
 }
 
-static inline unsigned int gen6_get_stolen_size(u16 snb_gmch_ctl)
+static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
 {
 	snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
 	snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
 	return snb_gmch_ctl << 25; /* 32 MB units */
 }
 
-static inline unsigned int gen7_get_stolen_size(u16 snb_gmch_ctl)
+static inline size_t gen7_get_stolen_size(u16 snb_gmch_ctl)
 {
 	static const int stolen_decoder[] = {
 		0, 0, 0, 0, 0, 32, 48, 64, 128, 256, 96, 160, 224, 352};
@@ -723,109 +723,137 @@ static inline unsigned int gen7_get_stolen_size(u16 snb_gmch_ctl)
 	return stolen_decoder[snb_gmch_ctl] << 20;
 }
 
-int i915_gem_gtt_init(struct drm_device *dev)
+static int gen6_gmch_probe(struct drm_device *dev,
+			   size_t *gtt_total,
+			   size_t *stolen)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	phys_addr_t gtt_bus_addr;
+	unsigned int gtt_size;
 	u16 snb_gmch_ctl;
 	int ret;
 
-	dev_priv->gtt.mappable_base = pci_resource_start(dev->pdev, 2);
-	dev_priv->gtt.mappable_end = pci_resource_len(dev->pdev, 2);
-
-	/* On modern platforms we need not worry ourself with the legacy
-	 * hostbridge query stuff. Skip it entirely
+	/* 64/512MB is the current min/max we actually know of, but this is just
+	 * a coarse sanity check.
 	 */
-	if (INTEL_INFO(dev)->gen < 6) {
-		ret = intel_gmch_probe(dev_priv->bridge_dev, dev->pdev, NULL);
-		if (!ret) {
-			DRM_ERROR("failed to set up gmch\n");
-			return -EIO;
-		}
-
-		dev_priv->mm.gtt = intel_gtt_get();
-		if (!dev_priv->mm.gtt) {
-			DRM_ERROR("Failed to initialize GTT\n");
-			intel_gmch_remove();
-			return -ENODEV;
-		}
-
-		dev_priv->gtt.do_idle_maps = needs_idle_maps(dev);
-
-		dev_priv->gtt.gtt_clear_range = i915_ggtt_clear_range;
-		dev_priv->gtt.gtt_insert_entries = i915_ggtt_insert_entries;
-
-		return 0;
+	if ((dev_priv->gtt.mappable_end < (64<<20) ||
+	     (dev_priv->gtt.mappable_end > (512<<20)))) {
+		DRM_ERROR("Unknown GMADR size (%lx)\n", dev_priv->gtt.mappable_end);
+		return -ENXIO;
 	}
 
 	if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
 		pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
-
-	dev_priv->mm.gtt = kzalloc(sizeof(*dev_priv->mm.gtt), GFP_KERNEL);
-	if (!dev_priv->mm.gtt)
-		return -ENOMEM;
-
-	/* For GEN6+ the PTEs for the ggtt live at 2MB + BAR0 */
-	gtt_bus_addr = pci_resource_start(dev->pdev, 0) + (2<<20);
-
-	/* i9xx_setup */
 	pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
-	dev_priv->mm.gtt->gtt_total_entries =
-		gen6_get_total_gtt_size(snb_gmch_ctl) / sizeof(gtt_pte_t);
-	if (INTEL_INFO(dev)->gen < 7)
-		dev_priv->mm.gtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl);
-	else
-		dev_priv->mm.gtt->stolen_size = gen7_get_stolen_size(snb_gmch_ctl);
+	gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl);
 
-	/* 64/512MB is the current min/max we actually know of, but this is just a
-	 * coarse sanity check.
-	 */
-	if ((dev_priv->gtt.mappable_end < (64<<20) ||
-	    (dev_priv->gtt.mappable_end > (512<<20)))) {
-		DRM_ERROR("Unknown GMADR size (%lx)\n",
-			  dev_priv->gtt.mappable_end);
-		ret = -ENXIO;
-		goto err_out;
-	}
+	if (IS_GEN7(dev))
+		*stolen = gen7_get_stolen_size(snb_gmch_ctl);
+	else
+		*stolen = gen6_get_stolen_size(snb_gmch_ctl);
 
-	ret = setup_scratch_page(dev);
-	if (ret) {
-		DRM_ERROR("Scratch setup failed\n");
-		goto err_out;
-	}
+	*gtt_total = (gtt_size / sizeof(gtt_pte_t)) << PAGE_SHIFT;
 
-	dev_priv->gtt.gsm = ioremap_wc(gtt_bus_addr,
-				       dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t));
+	/* For GEN6+ the PTEs for the ggtt live at 2MB + BAR0 */
+	gtt_bus_addr = pci_resource_start(dev->pdev, 0) + (2<<20);
+	dev_priv->gtt.gsm = ioremap_wc(gtt_bus_addr, gtt_size);
 	if (!dev_priv->gtt.gsm) {
 		DRM_ERROR("Failed to map the gtt page table\n");
-		teardown_scratch_page(dev);
-		ret = -ENOMEM;
-		goto err_out;
+		return -ENOMEM;
 	}
 
-	/* GMADR is the PCI aperture used by SW to access tiled GFX surfaces in a linear fashion. */
-	DRM_INFO("Memory usable by graphics device = %dM\n", dev_priv->mm.gtt->gtt_total_entries >> 8);
-	DRM_DEBUG_DRIVER("GMADR size = %ldM\n", dev_priv->gtt.mappable_end >> 20);
-	DRM_DEBUG_DRIVER("GTT stolen size = %dM\n", dev_priv->mm.gtt->stolen_size >> 20);
+	ret = setup_scratch_page(dev);
+	if (ret)
+		DRM_ERROR("Scratch setup failed\n");
 
 	dev_priv->gtt.gtt_clear_range = gen6_ggtt_clear_range;
 	dev_priv->gtt.gtt_insert_entries = gen6_ggtt_insert_entries;
 
-	return 0;
-
-err_out:
-	kfree(dev_priv->mm.gtt);
-	if (INTEL_INFO(dev)->gen < 6)
-		intel_gmch_remove();
 	return ret;
 }
 
-void i915_gem_gtt_fini(struct drm_device *dev)
+void gen6_gmch_remove(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	iounmap(dev_priv->gtt.gsm);
-	teardown_scratch_page(dev);
-	if (INTEL_INFO(dev)->gen < 6)
-		intel_gmch_remove();
+	teardown_scratch_page(dev_priv->dev);
+	kfree(dev_priv->mm.gtt);
+}
+
+static int i915_gmch_probe(struct drm_device *dev,
+			   size_t *gtt_total,
+			   size_t *stolen)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret;
+
+	/* This is a temporary hack to make the code cleaner in
+	 * i915_gem_gtt_init. I promise it will go away very shortly. */
 	kfree(dev_priv->mm.gtt);
+
+	ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL);
+	if (!ret) {
+		DRM_ERROR("failed to set up gmch\n");
+		return -EIO;
+	}
+
+	dev_priv->mm.gtt = intel_gtt_get();
+	if (!dev_priv->mm.gtt) {
+		DRM_ERROR("Failed to initialize GTT\n");
+		intel_gmch_remove();
+		return -ENODEV;
+	}
+
+	dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
+	dev_priv->gtt.gtt_clear_range = i915_ggtt_clear_range;
+	dev_priv->gtt.gtt_insert_entries = i915_ggtt_insert_entries;
+
+	return 0;
+}
+
+static void i915_gmch_remove(struct drm_device *dev)
+{
+	intel_gmch_remove();
+}
+
+int i915_gem_gtt_init(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_gtt *gtt = &dev_priv->gtt;
+	unsigned long gtt_size;
+	int ret;
+
+	dev_priv->mm.gtt = kzalloc(sizeof(*dev_priv->mm.gtt), GFP_KERNEL);
+	if (!dev_priv->mm.gtt)
+		return -ENOMEM;
+
+	gtt->mappable_base = pci_resource_start(dev->pdev, 2);
+	gtt->mappable_end = pci_resource_len(dev->pdev, 2);
+
+	if (INTEL_INFO(dev)->gen <= 5) {
+		dev_priv->gtt.gtt_probe = i915_gmch_probe;
+		dev_priv->gtt.gtt_remove =i915_gmch_remove;
+	} else {
+		dev_priv->gtt.gtt_probe = gen6_gmch_probe;
+		dev_priv->gtt.gtt_remove = gen6_gmch_remove;
+	}
+
+	ret = dev_priv->gtt.gtt_probe(dev, &dev_priv->gtt.total,
+				     &dev_priv->gtt.stolen_size);
+	if (ret) {
+		kfree(dev_priv->mm.gtt);
+		return ret;
+	}
+
+	dev_priv->mm.gtt->gtt_total_entries = dev_priv->gtt.total >> PAGE_SHIFT;
+	dev_priv->mm.gtt->stolen_size = dev_priv->gtt.stolen_size;
+
+	gtt_size = (dev_priv->gtt.total >> PAGE_SHIFT) * sizeof(gtt_pte_t);
+
+	/* GMADR is the PCI aperture used by SW to access tiled GFX surfaces in a linear fashion. */
+	DRM_INFO("Memory usable by graphics device = %zdM\n", dev_priv->gtt.total >> 20);
+	DRM_DEBUG_DRIVER("GMADR size = %ldM\n", dev_priv->gtt.mappable_end >> 20);
+	DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", dev_priv->gtt.stolen_size >> 20);
+
+	return 0;
 }
-- 
1.8.1.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 6/6] drm/i915: Resume dissecting intel_gtt
  2013-01-24 22:44 ` [PATCH 1/6] drm/i915: vfuncs for gtt_clear_range/insert_entries Ben Widawsky
                     ` (3 preceding siblings ...)
  2013-01-24 22:44   ` [PATCH 5/6] drm/i915: Add probe and remove to the gtt ops Ben Widawsky
@ 2013-01-24 22:45   ` Ben Widawsky
  2013-01-29  8:40     ` Damien Lespiau
  2013-01-29  7:42   ` [PATCH 1/6] drm/i915: vfuncs for gtt_clear_range/insert_entries Damien Lespiau
  5 siblings, 1 reply; 26+ messages in thread
From: Ben Widawsky @ 2013-01-24 22:45 UTC (permalink / raw)
  To: Intel GFX; +Cc: Daniel Vetter, Ben Widawsky

With the probe call in our dispatch table, we can now cut away two of
the remaining members in the intel_gtt shared struct.

v2: Rebased on top of Daniel's series

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/char/agp/intel-gtt.c           | 36 ++++++++++++++++++++--------------
 drivers/gpu/drm/i915/i915_drv.h        |  3 +--
 drivers/gpu/drm/i915/i915_gem_gtt.c    | 30 ++++++----------------------
 drivers/gpu/drm/i915/i915_gem_stolen.c |  8 ++++----
 include/drm/intel-gtt.h                | 10 +---------
 5 files changed, 33 insertions(+), 54 deletions(-)

diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index ff5f348..d8e7e6c 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -60,7 +60,6 @@ struct intel_gtt_driver {
 };
 
 static struct _intel_private {
-	struct intel_gtt base;
 	const struct intel_gtt_driver *driver;
 	struct pci_dev *pcidev;	/* device one */
 	struct pci_dev *bridge_dev;
@@ -80,6 +79,13 @@ static struct _intel_private {
 	/* Whether i915 needs to use the dmar apis or not. */
 	unsigned int needs_dmar : 1;
 	phys_addr_t gma_bus_addr;
+	/*  Size of memory reserved for graphics by the BIOS */
+	unsigned int stolen_size;
+	/* Total number of gtt entries. */
+	unsigned int gtt_total_entries;
+	/* Part of the gtt that is mappable by the cpu, for those chips where
+	 * this is not the full gtt. */
+	unsigned int gtt_mappable_entries;
 } intel_private;
 
 #define INTEL_GTT_GEN	intel_private.driver->gen
@@ -510,7 +516,7 @@ static unsigned int intel_gtt_total_entries(void)
 		/* On previous hardware, the GTT size was just what was
 		 * required to map the aperture.
 		 */
-		return intel_private.base.gtt_mappable_entries;
+		return intel_private.gtt_mappable_entries;
 	}
 }
 
@@ -576,8 +582,8 @@ static int intel_gtt_init(void)
 	if (ret != 0)
 		return ret;
 
-	intel_private.base.gtt_mappable_entries = intel_gtt_mappable_entries();
-	intel_private.base.gtt_total_entries = intel_gtt_total_entries();
+	intel_private.gtt_mappable_entries = intel_gtt_mappable_entries();
+	intel_private.gtt_total_entries = intel_gtt_total_entries();
 
 	/* save the PGETBL reg for resume */
 	intel_private.PGETBL_save =
@@ -589,10 +595,10 @@ static int intel_gtt_init(void)
 
 	dev_info(&intel_private.bridge_dev->dev,
 			"detected gtt size: %dK total, %dK mappable\n",
-			intel_private.base.gtt_total_entries * 4,
-			intel_private.base.gtt_mappable_entries * 4);
+			intel_private.gtt_total_entries * 4,
+			intel_private.gtt_mappable_entries * 4);
 
-	gtt_map_size = intel_private.base.gtt_total_entries * 4;
+	gtt_map_size = intel_private.gtt_total_entries * 4;
 
 	intel_private.gtt = NULL;
 	if (INTEL_GTT_GEN < 6 && INTEL_GTT_GEN > 2)
@@ -609,7 +615,7 @@ static int intel_gtt_init(void)
 
 	global_cache_flush();   /* FIXME: ? */
 
-	intel_private.base.stolen_size = intel_gtt_stolen_size();
+	intel_private.stolen_size = intel_gtt_stolen_size();
 
 	intel_private.needs_dmar = USE_PCI_DMA_API && INTEL_GTT_GEN > 2;
 
@@ -637,8 +643,7 @@ static int intel_fake_agp_fetch_size(void)
 	unsigned int aper_size;
 	int i;
 
-	aper_size = (intel_private.base.gtt_mappable_entries << PAGE_SHIFT)
-		    / MB(1);
+	aper_size = (intel_private.gtt_mappable_entries << PAGE_SHIFT) / MB(1);
 
 	for (i = 0; i < num_sizes; i++) {
 		if (aper_size == intel_fake_agp_sizes[i].size) {
@@ -845,8 +850,8 @@ static int intel_fake_agp_insert_entries(struct agp_memory *mem,
 	int ret = -EINVAL;
 
 	if (intel_private.clear_fake_agp) {
-		int start = intel_private.base.stolen_size / PAGE_SIZE;
-		int end = intel_private.base.gtt_mappable_entries;
+		int start = intel_private.stolen_size / PAGE_SIZE;
+		int end = intel_private.gtt_mappable_entries;
 		intel_gtt_clear_range(start, end - start);
 		intel_private.clear_fake_agp = false;
 	}
@@ -857,7 +862,7 @@ static int intel_fake_agp_insert_entries(struct agp_memory *mem,
 	if (mem->page_count == 0)
 		goto out;
 
-	if (pg_start + mem->page_count > intel_private.base.gtt_total_entries)
+	if (pg_start + mem->page_count > intel_private.gtt_total_entries)
 		goto out_err;
 
 	if (type != mem->type)
@@ -1366,9 +1371,10 @@ int intel_gmch_probe(struct pci_dev *bridge_pdev, struct pci_dev *gpu_pdev,
 }
 EXPORT_SYMBOL(intel_gmch_probe);
 
-struct intel_gtt *intel_gtt_get(void)
+void intel_gtt_get(size_t *gtt_total, size_t *stolen_size)
 {
-	return &intel_private.base;
+	*gtt_total = intel_private.gtt_total_entries << PAGE_SHIFT;
+	*stolen_size = intel_private.stolen_size;
 }
 EXPORT_SYMBOL(intel_gtt_get);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index dbf676d..35f171e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -408,6 +408,7 @@ struct i915_gtt {
 				   unsigned int pg_start,
 				   enum i915_cache_level cache_level);
 };
+#define gtt_total_entries(gtt) ((gtt).total >> PAGE_SHIFT)
 
 #define I915_PPGTT_PD_ENTRIES 512
 #define I915_PPGTT_PT_ENTRIES 1024
@@ -698,8 +699,6 @@ struct intel_l3_parity {
 };
 
 struct i915_gem_mm {
-	/** Bridge to intel-gtt-ko */
-	struct intel_gtt *gtt;
 	/** Memory allocator for GTT stolen memory */
 	struct drm_mm stolen;
 	/** Memory allocator for GTT */
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index bfc405a..2953ed0 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -182,7 +182,8 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	/* ppgtt PDEs reside in the global gtt pagetable, which has 512*1024
 	 * entries. For aliasing ppgtt support we just steal them at the end for
 	 * now. */
-	first_pd_entry_in_global_pt = dev_priv->mm.gtt->gtt_total_entries - I915_PPGTT_PD_ENTRIES;
+	first_pd_entry_in_global_pt =
+		gtt_total_entries(dev_priv->gtt) - I915_PPGTT_PD_ENTRIES;
 
 	ppgtt->num_pd_entries = I915_PPGTT_PD_ENTRIES;
 	ppgtt->clear_range = gen6_ppgtt_clear_range;
@@ -473,7 +474,7 @@ static void gen6_ggtt_clear_range(struct drm_device *dev,
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	gtt_pte_t scratch_pte;
 	gtt_pte_t __iomem *gtt_base = (gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
-	const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry;
+	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
 	int i;
 
 	if (WARN(num_entries > max_entries,
@@ -634,7 +635,7 @@ void i915_gem_init_global_gtt(struct drm_device *dev)
 	unsigned long gtt_size, mappable_size;
 	int ret;
 
-	gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT;
+	gtt_size = dev_priv->gtt.total;
 	mappable_size = dev_priv->gtt.mappable_end;
 
 	if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) {
@@ -777,7 +778,6 @@ void gen6_gmch_remove(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	iounmap(dev_priv->gtt.gsm);
 	teardown_scratch_page(dev_priv->dev);
-	kfree(dev_priv->mm.gtt);
 }
 
 static int i915_gmch_probe(struct drm_device *dev,
@@ -787,22 +787,13 @@ static int i915_gmch_probe(struct drm_device *dev,
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret;
 
-	/* This is a temporary hack to make the code cleaner in
-	 * i915_gem_gtt_init. I promise it will go away very shortly. */
-	kfree(dev_priv->mm.gtt);
-
 	ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL);
 	if (!ret) {
 		DRM_ERROR("failed to set up gmch\n");
 		return -EIO;
 	}
 
-	dev_priv->mm.gtt = intel_gtt_get();
-	if (!dev_priv->mm.gtt) {
-		DRM_ERROR("Failed to initialize GTT\n");
-		intel_gmch_remove();
-		return -ENODEV;
-	}
+	intel_gtt_get(gtt_total, stolen);
 
 	dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
 	dev_priv->gtt.gtt_clear_range = i915_ggtt_clear_range;
@@ -823,10 +814,6 @@ int i915_gem_gtt_init(struct drm_device *dev)
 	unsigned long gtt_size;
 	int ret;
 
-	dev_priv->mm.gtt = kzalloc(sizeof(*dev_priv->mm.gtt), GFP_KERNEL);
-	if (!dev_priv->mm.gtt)
-		return -ENOMEM;
-
 	gtt->mappable_base = pci_resource_start(dev->pdev, 2);
 	gtt->mappable_end = pci_resource_len(dev->pdev, 2);
 
@@ -840,13 +827,8 @@ int i915_gem_gtt_init(struct drm_device *dev)
 
 	ret = dev_priv->gtt.gtt_probe(dev, &dev_priv->gtt.total,
 				     &dev_priv->gtt.stolen_size);
-	if (ret) {
-		kfree(dev_priv->mm.gtt);
+	if (ret)
 		return ret;
-	}
-
-	dev_priv->mm.gtt->gtt_total_entries = dev_priv->gtt.total >> PAGE_SHIFT;
-	dev_priv->mm.gtt->stolen_size = dev_priv->gtt.stolen_size;
 
 	gtt_size = (dev_priv->gtt.total >> PAGE_SHIFT) * sizeof(gtt_pte_t);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index f21ae17..69d97cb 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -187,11 +187,11 @@ int i915_gem_init_stolen(struct drm_device *dev)
 	if (dev_priv->mm.stolen_base == 0)
 		return 0;
 
-	DRM_DEBUG_KMS("found %d bytes of stolen memory at %08lx\n",
-		      dev_priv->mm.gtt->stolen_size, dev_priv->mm.stolen_base);
+	DRM_DEBUG_KMS("found %zd bytes of stolen memory at %08lx\n",
+		      dev_priv->gtt.stolen_size, dev_priv->mm.stolen_base);
 
 	/* Basic memrange allocator for stolen space */
-	drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->mm.gtt->stolen_size);
+	drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->gtt.stolen_size);
 
 	return 0;
 }
@@ -205,7 +205,7 @@ i915_pages_create_for_stolen(struct drm_device *dev,
 	struct scatterlist *sg;
 
 	DRM_DEBUG_DRIVER("offset=0x%x, size=%d\n", offset, size);
-	BUG_ON(offset > dev_priv->mm.gtt->stolen_size - size);
+	BUG_ON(offset > dev_priv->gtt.stolen_size - size);
 
 	/* We hide that we have no struct page backing our stolen object
 	 * by wrapping the contiguous physical allocation with a fake
diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h
index 769b6c7..cf10555 100644
--- a/include/drm/intel-gtt.h
+++ b/include/drm/intel-gtt.h
@@ -3,15 +3,7 @@
 #ifndef _DRM_INTEL_GTT_H
 #define	_DRM_INTEL_GTT_H
 
-struct intel_gtt {
-	/* Size of memory reserved for graphics by the BIOS */
-	unsigned int stolen_size;
-	/* Total number of gtt entries. */
-	unsigned int gtt_total_entries;
-	/* Part of the gtt that is mappable by the cpu, for those chips where
-	 * this is not the full gtt. */
-	unsigned int gtt_mappable_entries;
-} *intel_gtt_get(void);
+void intel_gtt_get(size_t *gtt_total, size_t *stolen_size);
 
 int intel_gmch_probe(struct pci_dev *bridge_pdev, struct pci_dev *gpu_pdev,
 		     struct agp_bridge_data *bridge);
-- 
1.8.1.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/6] drm/i915: vfuncs for gtt_clear_range/insert_entries
  2013-01-24 22:44 ` [PATCH 1/6] drm/i915: vfuncs for gtt_clear_range/insert_entries Ben Widawsky
                     ` (4 preceding siblings ...)
  2013-01-24 22:45   ` [PATCH 6/6] drm/i915: Resume dissecting intel_gtt Ben Widawsky
@ 2013-01-29  7:42   ` Damien Lespiau
  2013-01-29  9:13     ` Daniel Vetter
  5 siblings, 1 reply; 26+ messages in thread
From: Damien Lespiau @ 2013-01-29  7:42 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Daniel Vetter, Intel GFX

On Thu, Jan 24, 2013 at 02:44:55PM -0800, Ben Widawsky wrote:
> From: Daniel Vetter <daniel.vetter@ffwll.ch>
> 
> We have a few too many differences here, so finally take the prepared
> abstraction and run with it. A few smaller changes are required to get
> things into shape:
> 
> - move i915_cache_level up since we need it in the gt funcs
> - split up i915_ggtt_clear_range and move the two functions down to
>   where the relevant insert_entries functions are
> - adjustments to a few function parameter lists
> 
> Now we have 2 functions which deal with the gen6+ global gtt
> (gen6_ggtt_ prefix) and 2 functions which deal with the legacy gtt
> code in the intel-gtt.c fake agp driver (i915_ggtt_ prefix).
> 
> Init is still a bit a mess, but honestly I don't care about that.
> 
> One thing I've thought about while deciding on the exact interfaces is
> a flag parameter for ->clear_range: We could use that to decide
> between writing invalid pte entries or scratch pte entries. In case we
> ever get around to fixing all our bugs which currently prevent us from
> filling the gtt with empty ptes for the truly unused ranges ...
> 
> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> 
> [bwidawsk: Moved functions to the gtt struct]
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>

Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>

-- 
Damien

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/6] drm/i915: vfuncs for ppgtt
  2013-01-24 22:44   ` [PATCH 2/6] drm/i915: vfuncs for ppgtt Ben Widawsky
@ 2013-01-29  7:44     ` Damien Lespiau
  0 siblings, 0 replies; 26+ messages in thread
From: Damien Lespiau @ 2013-01-29  7:44 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Daniel Vetter, Intel GFX

On Thu, Jan 24, 2013 at 02:44:56PM -0800, Ben Widawsky wrote:
> From: Daniel Vetter <daniel.vetter@ffwll.ch>
> 
> Like for the global gtt we want a notch more flexibility here. Only
> big change (besides a few tiny function parameter adjustments) was to
> move gen6_ppgtt_insert_entries up (and remove _sg_ from its name, we
> only have one kind of insert_entries since the last gtt cleanup).
> 
> We could also extract the platform ppgtt setup/teardown code a bit
> better, but I don't care that much.
> 
> With this we have the hw details of pte writing nicely hidden away
> behind a bit of abstraction. Which should pave the way for
> different/multiple ppgtts (e.g. what we need for real ppgtt support).
> 
> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>

Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>

-- 
Damien

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/6] drm/i915: pte_encode is gen6+
  2013-01-24 22:44   ` [PATCH 3/6] drm/i915: pte_encode is gen6+ Ben Widawsky
@ 2013-01-29  7:46     ` Damien Lespiau
  0 siblings, 0 replies; 26+ messages in thread
From: Damien Lespiau @ 2013-01-29  7:46 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Daniel Vetter, Intel GFX

On Thu, Jan 24, 2013 at 02:44:57PM -0800, Ben Widawsky wrote:
> From: Daniel Vetter <daniel.vetter@ffwll.ch>
> 
> All the other gen6+ hw code has the gen6_ prefix, so be consistent
> about it.
> 
> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>

Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>

-- 
Damien

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 4/6] drm/i915: extract hw ppgtt setup/cleanup code
  2013-01-24 21:49   ` [PATCH 4/6] drm/i915: extract hw ppgtt setup/cleanup code Ben Widawsky
@ 2013-01-29  7:58     ` Damien Lespiau
  0 siblings, 0 replies; 26+ messages in thread
From: Damien Lespiau @ 2013-01-29  7:58 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Daniel Vetter, GFX, Intel

On Thu, Jan 24, 2013 at 01:49:56PM -0800, Ben Widawsky wrote:
> From: Daniel Vetter <daniel.vetter@ffwll.ch>
> 
> At the moment only cosmetics, but being able to initialize/cleanup
> arbitrary ppgtt address spaces paves the way to have more than one of
> them ... Just in case we ever get around to implementing real
> per-process address spaces. Note that in that case another vfunc for
> ppgtt would be beneficial though. But that can wait until the code
> grows a second place which initializes ppgtts.
> 
> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>

Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>

-- 
Damien

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 5/6] drm/i915: Add probe and remove to the gtt ops
  2013-01-24 21:49   ` [PATCH 5/6] drm/i915: Add probe and remove to the gtt ops Ben Widawsky
@ 2013-01-29  8:30     ` Damien Lespiau
  0 siblings, 0 replies; 26+ messages in thread
From: Damien Lespiau @ 2013-01-29  8:30 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Daniel Vetter, GFX, Intel

On Thu, Jan 24, 2013 at 01:49:57PM -0800, Ben Widawsky wrote:
> The idea, and much of the code came originally from:
> 
> commit 0712f0249c3148d8cf42a3703403c278590d4de5
> Author: Ben Widawsky <ben@bwidawsk.net>
> Date:   Fri Jan 18 17:23:16 2013 -0800
> 
>     drm/i915: Create a vtable for i915 gtt
> 
> Daniel didn't like the color of that patch series, and so I asked him to
> start something which appealed to his sense of color. The preceding
> patches are those, and now this is going on top of that.
> 
> [extracted from the original commit message]
> 
> One immediately obvious thing to implement is our gmch probing. The init
> function was getting massively bloated. Fundamentally, all that's needed
> from GMCH probing is the GTT size, and the stolen size. It makes design
> sense to put the mappable calculation in there as well, but the code
> turns out a bit nicer without it (IMO)
> 
> The intel_gtt bridge thing is still here, but the subsequent patches
> will finish ripping that out.
> 
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>

Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>

-- 
Damien

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 6/6] drm/i915: Resume dissecting intel_gtt
  2013-01-24 22:45   ` [PATCH 6/6] drm/i915: Resume dissecting intel_gtt Ben Widawsky
@ 2013-01-29  8:40     ` Damien Lespiau
  2013-01-29 19:02       ` Ben Widawsky
  0 siblings, 1 reply; 26+ messages in thread
From: Damien Lespiau @ 2013-01-29  8:40 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Daniel Vetter, Intel GFX

On Thu, Jan 24, 2013 at 02:45:00PM -0800, Ben Widawsky wrote:
> With the probe call in our dispatch table, we can now cut away two of
> the remaining members in the intel_gtt shared struct.
> 
> v2: Rebased on top of Daniel's series
> 
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>

Shouldn't the commit message be that you get rid of struct intel_gtt
entirely?

Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>

-- 
Damien

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/6] drm/i915: vfuncs for gtt_clear_range/insert_entries
  2013-01-29  7:42   ` [PATCH 1/6] drm/i915: vfuncs for gtt_clear_range/insert_entries Damien Lespiau
@ 2013-01-29  9:13     ` Daniel Vetter
  0 siblings, 0 replies; 26+ messages in thread
From: Daniel Vetter @ 2013-01-29  9:13 UTC (permalink / raw)
  To: Damien Lespiau; +Cc: Daniel Vetter, Ben Widawsky, Intel GFX

On Tue, Jan 29, 2013 at 07:42:57AM +0000, Damien Lespiau wrote:
> On Thu, Jan 24, 2013 at 02:44:55PM -0800, Ben Widawsky wrote:
> > From: Daniel Vetter <daniel.vetter@ffwll.ch>
> > 
> > We have a few too many differences here, so finally take the prepared
> > abstraction and run with it. A few smaller changes are required to get
> > things into shape:
> > 
> > - move i915_cache_level up since we need it in the gt funcs
> > - split up i915_ggtt_clear_range and move the two functions down to
> >   where the relevant insert_entries functions are
> > - adjustments to a few function parameter lists
> > 
> > Now we have 2 functions which deal with the gen6+ global gtt
> > (gen6_ggtt_ prefix) and 2 functions which deal with the legacy gtt
> > code in the intel-gtt.c fake agp driver (i915_ggtt_ prefix).
> > 
> > Init is still a bit a mess, but honestly I don't care about that.
> > 
> > One thing I've thought about while deciding on the exact interfaces is
> > a flag parameter for ->clear_range: We could use that to decide
> > between writing invalid pte entries or scratch pte entries. In case we
> > ever get around to fixing all our bugs which currently prevent us from
> > filling the gtt with empty ptes for the truly unused ranges ...
> > 
> > Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > 
> > [bwidawsk: Moved functions to the gtt struct]
> > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> 
> Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>

Merged the series into dinq, thanks for the review.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 6/6] drm/i915: Resume dissecting intel_gtt
  2013-01-29  8:40     ` Damien Lespiau
@ 2013-01-29 19:02       ` Ben Widawsky
  0 siblings, 0 replies; 26+ messages in thread
From: Ben Widawsky @ 2013-01-29 19:02 UTC (permalink / raw)
  To: Damien Lespiau; +Cc: Daniel Vetter, Intel GFX

On Tue, Jan 29, 2013 at 08:40:06AM +0000, Damien Lespiau wrote:
> On Thu, Jan 24, 2013 at 02:45:00PM -0800, Ben Widawsky wrote:
> > With the probe call in our dispatch table, we can now cut away two of
> > the remaining members in the intel_gtt shared struct.
> > 
> > v2: Rebased on top of Daniel's series
> > 
> > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> 
> Shouldn't the commit message be that you get rid of struct intel_gtt
> entirely?

Yes, I corrected this info on IRC a few days ago. Daniel, please fix
that up if/when you take the patch.

> 
> Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>
> 
> -- 
> Damien

-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2013-01-29 19:00 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-01-24 15:50 [PATCH 0/4] gtt abstractions Daniel Vetter
2013-01-24 15:50 ` [PATCH 1/4] drm/i915: vfuncs for gtt_clear_range/insert_entries Daniel Vetter
2013-01-24 15:50 ` [PATCH 2/4] drm/i915: vfuncs for ppgtt Daniel Vetter
2013-01-24 15:50 ` [PATCH 3/4] drm/i915: pte_encode is gen6+ Daniel Vetter
2013-01-24 15:50 ` [PATCH 4/4] drm/i915: extract hw ppgtt setup/cleanup code Daniel Vetter
2013-01-24 19:41 ` [PATCH 0/4] gtt abstractions Ben Widawsky
2013-01-24 21:49 ` [PATCH 1/6] drm/i915: vfuncs for gtt_clear_range/insert_entries Ben Widawsky
2013-01-24 21:49   ` [PATCH 2/6] drm/i915: vfuncs for ppgtt Ben Widawsky
2013-01-24 21:49   ` [PATCH 3/6] drm/i915: pte_encode is gen6+ Ben Widawsky
2013-01-24 21:49   ` [PATCH 4/6] drm/i915: extract hw ppgtt setup/cleanup code Ben Widawsky
2013-01-29  7:58     ` Damien Lespiau
2013-01-24 21:49   ` [PATCH 5/6] drm/i915: Add probe and remove to the gtt ops Ben Widawsky
2013-01-29  8:30     ` Damien Lespiau
2013-01-24 21:49   ` [PATCH 6/6] drm/i915: Resume dissecting intel_gtt Ben Widawsky
2013-01-24 22:44 ` [PATCH 1/6] drm/i915: vfuncs for gtt_clear_range/insert_entries Ben Widawsky
2013-01-24 22:44   ` [PATCH 2/6] drm/i915: vfuncs for ppgtt Ben Widawsky
2013-01-29  7:44     ` Damien Lespiau
2013-01-24 22:44   ` [PATCH 3/6] drm/i915: pte_encode is gen6+ Ben Widawsky
2013-01-29  7:46     ` Damien Lespiau
2013-01-24 22:44   ` [PATCH 4/6] drm/i915: extract hw ppgtt setup/cleanup code Ben Widawsky
2013-01-24 22:44   ` [PATCH 5/6] drm/i915: Add probe and remove to the gtt ops Ben Widawsky
2013-01-24 22:45   ` [PATCH 6/6] drm/i915: Resume dissecting intel_gtt Ben Widawsky
2013-01-29  8:40     ` Damien Lespiau
2013-01-29 19:02       ` Ben Widawsky
2013-01-29  7:42   ` [PATCH 1/6] drm/i915: vfuncs for gtt_clear_range/insert_entries Damien Lespiau
2013-01-29  9:13     ` Daniel Vetter

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).