All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/18] drm: Introduce drm_mm_create_block()
@ 2012-10-19 17:03 Chris Wilson
  2012-10-19 17:03 ` [PATCH 02/18] drm/i915: Fix detection of stolen base for gen2 Chris Wilson
                   ` (17 more replies)
  0 siblings, 18 replies; 51+ messages in thread
From: Chris Wilson @ 2012-10-19 17:03 UTC (permalink / raw)
  To: intel-gfx; +Cc: dri-devel, Dave Airlie

To be used later by i915 to preallocate exact blocks of space from the
range manager.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Dave Airlie <airlied@redhat.com>
Cc: dri-devel@lists.freedestkop.org
---
 drivers/gpu/drm/drm_mm.c |   49 ++++++++++++++++++++++++++++++++++++++++++++++
 include/drm/drm_mm.h     |    4 ++++
 2 files changed, 53 insertions(+)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 9bb82f7..5db8c20 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -161,6 +161,55 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
 	}
 }
 
+struct drm_mm_node *drm_mm_create_block(struct drm_mm *mm,
+					unsigned long start,
+					unsigned long size,
+					bool atomic)
+{
+	struct drm_mm_node *hole, *node;
+	unsigned long end = start + size;
+
+	list_for_each_entry(hole, &mm->hole_stack, hole_stack) {
+		unsigned long hole_start;
+		unsigned long hole_end;
+
+		BUG_ON(!hole->hole_follows);
+		hole_start = drm_mm_hole_node_start(hole);
+		hole_end = drm_mm_hole_node_end(hole);
+
+		if (hole_start > start || hole_end < end)
+			continue;
+
+		node = drm_mm_kmalloc(mm, atomic);
+		if (unlikely(node == NULL))
+			return NULL;
+
+		node->start = start;
+		node->size = size;
+		node->mm = mm;
+		node->allocated = 1;
+
+		INIT_LIST_HEAD(&node->hole_stack);
+		list_add(&node->node_list, &hole->node_list);
+
+		if (start == hole_start) {
+			hole->hole_follows = 0;
+			list_del_init(&hole->hole_stack);
+		}
+
+		node->hole_follows = 0;
+		if (end != hole_end) {
+			list_add(&node->hole_stack, &mm->hole_stack);
+			node->hole_follows = 1;
+		}
+
+		return node;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL(drm_mm_create_block);
+
 struct drm_mm_node *drm_mm_get_block_generic(struct drm_mm_node *hole_node,
 					     unsigned long size,
 					     unsigned alignment,
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index 06d7f79..4020f96 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -102,6 +102,10 @@ static inline bool drm_mm_initialized(struct drm_mm *mm)
 /*
  * Basic range manager support (drm_mm.c)
  */
+extern struct drm_mm_node *drm_mm_create_block(struct drm_mm *mm,
+					       unsigned long start,
+					       unsigned long size,
+					       bool atomic);
 extern struct drm_mm_node *drm_mm_get_block_generic(struct drm_mm_node *node,
 						    unsigned long size,
 						    unsigned alignment,
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 02/18] drm/i915: Fix detection of stolen base for gen2
  2012-10-19 17:03 [PATCH 01/18] drm: Introduce drm_mm_create_block() Chris Wilson
@ 2012-10-19 17:03 ` Chris Wilson
  2012-11-01 23:51   ` Ben Widawsky
  2012-10-19 17:03 ` [PATCH 03/18] drm/i915: Fix location of stolen memory register for SandyBridge+ Chris Wilson
                   ` (16 subsequent siblings)
  17 siblings, 1 reply; 51+ messages in thread
From: Chris Wilson @ 2012-10-19 17:03 UTC (permalink / raw)
  To: intel-gfx

It was not until the G33 refresh, that a PCI config register was
introduced that explicitly said where the stolen memory was. Prior to
865G there was not even a register that said where the end of usable
low memory was and where the stolen memory began (or ended depending
upon chipset). Before then, one has to look at the BIOS memory maps to
find the Top of Memory. Alas that is not exported by arch/x86 and so we
have to resort to disabling stolen memory on gen2 for the time being.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h        |    1 +
 drivers/gpu/drm/i915/i915_gem_stolen.c |   69 ++++++++++++++------------------
 2 files changed, 31 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4728d30..687f379 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -705,6 +705,7 @@ typedef struct drm_i915_private {
 		unsigned long gtt_start;
 		unsigned long gtt_mappable_end;
 		unsigned long gtt_end;
+		unsigned long stolen_base; /* limited to low memory (32-bit) */
 
 		struct io_mapping *gtt_mapping;
 		phys_addr_t gtt_base_addr;
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index ada2e90..a01ff74 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -43,56 +43,43 @@
  * for is a boon.
  */
 
-#define PTE_ADDRESS_MASK		0xfffff000
-#define PTE_ADDRESS_MASK_HIGH		0x000000f0 /* i915+ */
-#define PTE_MAPPING_TYPE_UNCACHED	(0 << 1)
-#define PTE_MAPPING_TYPE_DCACHE		(1 << 1) /* i830 only */
-#define PTE_MAPPING_TYPE_CACHED		(3 << 1)
-#define PTE_MAPPING_TYPE_MASK		(3 << 1)
-#define PTE_VALID			(1 << 0)
-
-/**
- * i915_stolen_to_phys - take an offset into stolen memory and turn it into
- *                       a physical one
- * @dev: drm device
- * @offset: address to translate
- *
- * Some chip functions require allocations from stolen space and need the
- * physical address of the memory in question.
- */
-static unsigned long i915_stolen_to_phys(struct drm_device *dev, u32 offset)
+static unsigned long i915_stolen_to_physical(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct pci_dev *pdev = dev_priv->bridge_dev;
 	u32 base;
 
-#if 0
 	/* On the machines I have tested the Graphics Base of Stolen Memory
-	 * is unreliable, so compute the base by subtracting the stolen memory
-	 * from the Top of Low Usable DRAM which is where the BIOS places
-	 * the graphics stolen memory.
+	 * is unreliable, so on those compute the base by subtracting the
+	 * stolen memory from the Top of Low Usable DRAM which is where the
+	 * BIOS places the graphics stolen memory.
+	 *
+	 * On gen2, the layout is slightly different with the Graphics Segment
+	 * immediately following Top of Memory (or Top of Usable DRAM). Note
+	 * it appears that TOUD is only reported by 865g, so we just use the
+	 * top of memory as determined by the e820 probe.
+	 *
+	 * XXX gen2 requires an unavailable symbol and 945gm fails with
+	 * its value of TOLUD.
 	 */
+	base = 0;
 	if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
-		/* top 32bits are reserved = 0 */
+		/* Read Graphics Base of Stolen Memory directly */
 		pci_read_config_dword(pdev, 0xA4, &base);
-	} else {
-		/* XXX presume 8xx is the same as i915 */
-		pci_bus_read_config_dword(pdev->bus, 2, 0x5C, &base);
-	}
-#else
-	if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
-		u16 val;
-		pci_read_config_word(pdev, 0xb0, &val);
-		base = val >> 4 << 20;
-	} else {
+#if 0
+	} else if (IS_GEN3(dev)) {
 		u8 val;
+		/* Stolen is immediately below Top of Low Usable DRAM */
 		pci_read_config_byte(pdev, 0x9c, &val);
 		base = val >> 3 << 27;
-	}
-	base -= dev_priv->mm.gtt->stolen_size;
+		base -= dev_priv->mm.gtt->stolen_size;
+	} else {
+		/* Stolen is immediately above Top of Memory */
+		base = max_low_pfn_mapped << PAGE_SHIFT;
 #endif
+	}
 
-	return base + offset;
+	return base;
 }
 
 static void i915_warn_stolen(struct drm_device *dev)
@@ -117,7 +104,7 @@ static void i915_setup_compression(struct drm_device *dev, int size)
 	if (!compressed_fb)
 		goto err;
 
-	cfb_base = i915_stolen_to_phys(dev, compressed_fb->start);
+	cfb_base = dev_priv->mm.stolen_base + compressed_fb->start;
 	if (!cfb_base)
 		goto err_fb;
 
@@ -130,7 +117,7 @@ static void i915_setup_compression(struct drm_device *dev, int size)
 		if (!compressed_llb)
 			goto err_fb;
 
-		ll_base = i915_stolen_to_phys(dev, compressed_llb->start);
+		ll_base = dev_priv->mm.stolen_base + compressed_llb->start;
 		if (!ll_base)
 			goto err_llb;
 	}
@@ -149,7 +136,7 @@ static void i915_setup_compression(struct drm_device *dev, int size)
 	}
 
 	DRM_DEBUG_KMS("FBC base 0x%08lx, ll base 0x%08lx, size %dM\n",
-		      cfb_base, ll_base, size >> 20);
+		      (long)cfb_base, (long)ll_base, size >> 20);
 	return;
 
 err_llb:
@@ -181,6 +168,10 @@ int i915_gem_init_stolen(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	unsigned long prealloc_size = dev_priv->mm.gtt->stolen_size;
 
+	dev_priv->mm.stolen_base = i915_stolen_to_physical(dev);
+	if (dev_priv->mm.stolen_base == 0)
+		return 0;
+
 	/* Basic memrange allocator for stolen space */
 	drm_mm_init(&dev_priv->mm.stolen, 0, prealloc_size);
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 03/18] drm/i915: Fix location of stolen memory register for SandyBridge+
  2012-10-19 17:03 [PATCH 01/18] drm: Introduce drm_mm_create_block() Chris Wilson
  2012-10-19 17:03 ` [PATCH 02/18] drm/i915: Fix detection of stolen base for gen2 Chris Wilson
@ 2012-10-19 17:03 ` Chris Wilson
  2012-10-26 21:58   ` Ben Widawsky
  2012-10-19 17:03 ` [PATCH 04/18] drm/i915: Avoid clearing preallocated regions from the GTT Chris Wilson
                   ` (15 subsequent siblings)
  17 siblings, 1 reply; 51+ messages in thread
From: Chris Wilson @ 2012-10-19 17:03 UTC (permalink / raw)
  To: intel-gfx

A few of the earlier registers where enlarged and so the Base Data of
Stolen Memory Register (BDSM) was pushed to 0xb0.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/drm/i915/i915_gem_stolen.c |   12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index a01ff74..d023ed6 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -63,7 +63,14 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev)
 	 * its value of TOLUD.
 	 */
 	base = 0;
-	if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
+	if (INTEL_INFO(dev)->gen >= 6) {
+		/* Read Base Data of Stolen Memory Register (BDSM) directly.
+		 * Note that there is also a MCHBAR miror at 0x1080c0 or
+		 * we could use device 2:0x5c instead.
+		*/
+		pci_read_config_dword(pdev, 0xB0, &base);
+		base &= ~4095; /* lower bits used for locking register */
+	} else if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
 		/* Read Graphics Base of Stolen Memory directly */
 		pci_read_config_dword(pdev, 0xA4, &base);
 #if 0
@@ -172,6 +179,9 @@ int i915_gem_init_stolen(struct drm_device *dev)
 	if (dev_priv->mm.stolen_base == 0)
 		return 0;
 
+	DRM_DEBUG_KMS("found %d bytes of stolen memory at %08lx\n",
+		      dev_priv->mm.gtt->stolen_size, dev_priv->mm.stolen_base);
+
 	/* Basic memrange allocator for stolen space */
 	drm_mm_init(&dev_priv->mm.stolen, 0, prealloc_size);
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 04/18] drm/i915: Avoid clearing preallocated regions from the GTT
  2012-10-19 17:03 [PATCH 01/18] drm: Introduce drm_mm_create_block() Chris Wilson
  2012-10-19 17:03 ` [PATCH 02/18] drm/i915: Fix detection of stolen base for gen2 Chris Wilson
  2012-10-19 17:03 ` [PATCH 03/18] drm/i915: Fix location of stolen memory register for SandyBridge+ Chris Wilson
@ 2012-10-19 17:03 ` Chris Wilson
  2012-10-26 22:22   ` Ben Widawsky
  2012-10-19 17:03 ` [PATCH 05/18] drm: Introduce an iterator over holes in the drm_mm range manager Chris Wilson
                   ` (14 subsequent siblings)
  17 siblings, 1 reply; 51+ messages in thread
From: Chris Wilson @ 2012-10-19 17:03 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/drm/i915/i915_drv.h     |    2 ++
 drivers/gpu/drm/i915/i915_gem_gtt.c |   35 ++++++++++++++++++++++++++++++++---
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 687f379..589b2f3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -908,6 +908,8 @@ enum i915_cache_level {
 	I915_CACHE_LLC_MLC, /* gen6+, in docs at least! */
 };
 
+#define I915_GTT_RESERVED ((struct drm_mm_node *)0x1)
+
 struct drm_i915_gem_object_ops {
 	/* Interface between the GEM object and its backing storage.
 	 * get_pages() is called once prior to the use of the associated set
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 47e427e..d9d3fc7 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -378,18 +378,47 @@ void i915_gem_init_global_gtt(struct drm_device *dev,
 			      unsigned long end)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_mm_node *entry;
+	struct drm_i915_gem_object *obj;
 
-	/* Substract the guard page ... */
+	/* Subtract the guard page ... */
 	drm_mm_init(&dev_priv->mm.gtt_space, start, end - start - PAGE_SIZE);
 	if (!HAS_LLC(dev))
 		dev_priv->mm.gtt_space.color_adjust = i915_gtt_color_adjust;
 
+	/* Mark any preallocated objects as occupied */
+	list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) {
+		DRM_DEBUG_KMS("reserving preallocated space: %x + %zx\n",
+			      obj->gtt_offset, obj->base.size);
+
+		BUG_ON(obj->gtt_space != I915_GTT_RESERVED);
+		obj->gtt_space = drm_mm_create_block(&dev_priv->mm.gtt_space,
+						     obj->gtt_offset,
+						     obj->base.size,
+						     false);
+		obj->has_global_gtt_mapping = 1;
+	}
+
 	dev_priv->mm.gtt_start = start;
 	dev_priv->mm.gtt_mappable_end = mappable_end;
 	dev_priv->mm.gtt_end = end;
 	dev_priv->mm.gtt_total = end - start;
 	dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start;
 
-	/* ... but ensure that we clear the entire range. */
-	intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE);
+	/* Clear any non-preallocated blocks */
+	list_for_each_entry(entry, &dev_priv->mm.gtt_space.hole_stack, hole_stack) {
+		unsigned long hole_start = entry->start + entry->size;
+		unsigned long hole_end = list_entry(entry->node_list.next,
+						    struct drm_mm_node,
+						    node_list)->start;
+
+		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
+			      hole_start, hole_end);
+
+		intel_gtt_clear_range(hole_start / PAGE_SIZE,
+				      (hole_end-hole_start) / PAGE_SIZE);
+	}
+
+	/* And finally clear the reserved guard page */
+	intel_gtt_clear_range(end / PAGE_SIZE - 1, 1);
 }
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 05/18] drm: Introduce an iterator over holes in the drm_mm range manager
  2012-10-19 17:03 [PATCH 01/18] drm: Introduce drm_mm_create_block() Chris Wilson
                   ` (2 preceding siblings ...)
  2012-10-19 17:03 ` [PATCH 04/18] drm/i915: Avoid clearing preallocated regions from the GTT Chris Wilson
@ 2012-10-19 17:03 ` Chris Wilson
  2012-11-05 13:41   ` Ben Widawsky
  2012-10-19 17:03 ` [PATCH 06/18] drm/i915: Delay allocation of stolen space for FBC Chris Wilson
                   ` (13 subsequent siblings)
  17 siblings, 1 reply; 51+ messages in thread
From: Chris Wilson @ 2012-10-19 17:03 UTC (permalink / raw)
  To: intel-gfx; +Cc: Dave Airlie

This will be used i915 in forthcoming patches in order to measure the
largest contiguous chunk of memory available for enabling chipset
features.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_mm.c |   55 +++++++++++++++-------------------------------
 include/drm/drm_mm.h     |   26 ++++++++++++++++++++++
 2 files changed, 44 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 5db8c20..c3d11ec 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -102,20 +102,6 @@ int drm_mm_pre_get(struct drm_mm *mm)
 }
 EXPORT_SYMBOL(drm_mm_pre_get);
 
-static inline unsigned long drm_mm_hole_node_start(struct drm_mm_node *hole_node)
-{
-	return hole_node->start + hole_node->size;
-}
-
-static inline unsigned long drm_mm_hole_node_end(struct drm_mm_node *hole_node)
-{
-	struct drm_mm_node *next_node =
-		list_entry(hole_node->node_list.next, struct drm_mm_node,
-			   node_list);
-
-	return next_node->start;
-}
-
 static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
 				 struct drm_mm_node *node,
 				 unsigned long size, unsigned alignment,
@@ -127,7 +113,7 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
 	unsigned long adj_start = hole_start;
 	unsigned long adj_end = hole_end;
 
-	BUG_ON(!hole_node->hole_follows || node->allocated);
+	BUG_ON(node->allocated);
 
 	if (mm->color_adjust)
 		mm->color_adjust(hole_node, color, &adj_start, &adj_end);
@@ -168,15 +154,10 @@ struct drm_mm_node *drm_mm_create_block(struct drm_mm *mm,
 {
 	struct drm_mm_node *hole, *node;
 	unsigned long end = start + size;
+	unsigned long hole_start;
+	unsigned long hole_end;
 
-	list_for_each_entry(hole, &mm->hole_stack, hole_stack) {
-		unsigned long hole_start;
-		unsigned long hole_end;
-
-		BUG_ON(!hole->hole_follows);
-		hole_start = drm_mm_hole_node_start(hole);
-		hole_end = drm_mm_hole_node_end(hole);
-
+	drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
 		if (hole_start > start || hole_end < end)
 			continue;
 
@@ -361,8 +342,10 @@ void drm_mm_remove_node(struct drm_mm_node *node)
 				== drm_mm_hole_node_end(node));
 		list_del(&node->hole_stack);
 	} else
-		BUG_ON(drm_mm_hole_node_start(node)
-				!= drm_mm_hole_node_end(node));
+		BUG_ON(node->start + node->size !=
+		       list_entry(node->node_list.next,
+				  struct drm_mm_node, node_list)->start);
+
 
 	if (!prev_node->hole_follows) {
 		prev_node->hole_follows = 1;
@@ -420,6 +403,8 @@ struct drm_mm_node *drm_mm_search_free_generic(const struct drm_mm *mm,
 {
 	struct drm_mm_node *entry;
 	struct drm_mm_node *best;
+	unsigned long adj_start;
+	unsigned long adj_end;
 	unsigned long best_size;
 
 	BUG_ON(mm->scanned_blocks);
@@ -427,17 +412,13 @@ struct drm_mm_node *drm_mm_search_free_generic(const struct drm_mm *mm,
 	best = NULL;
 	best_size = ~0UL;
 
-	list_for_each_entry(entry, &mm->hole_stack, hole_stack) {
-		unsigned long adj_start = drm_mm_hole_node_start(entry);
-		unsigned long adj_end = drm_mm_hole_node_end(entry);
-
+	drm_mm_for_each_hole(entry, mm, adj_start, adj_end) {
 		if (mm->color_adjust) {
 			mm->color_adjust(entry, color, &adj_start, &adj_end);
 			if (adj_end <= adj_start)
 				continue;
 		}
 
-		BUG_ON(!entry->hole_follows);
 		if (!check_free_hole(adj_start, adj_end, size, alignment))
 			continue;
 
@@ -464,6 +445,8 @@ struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_mm *mm,
 {
 	struct drm_mm_node *entry;
 	struct drm_mm_node *best;
+	unsigned long adj_start;
+	unsigned long adj_end;
 	unsigned long best_size;
 
 	BUG_ON(mm->scanned_blocks);
@@ -471,13 +454,11 @@ struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_mm *mm,
 	best = NULL;
 	best_size = ~0UL;
 
-	list_for_each_entry(entry, &mm->hole_stack, hole_stack) {
-		unsigned long adj_start = drm_mm_hole_node_start(entry) < start ?
-			start : drm_mm_hole_node_start(entry);
-		unsigned long adj_end = drm_mm_hole_node_end(entry) > end ?
-			end : drm_mm_hole_node_end(entry);
-
-		BUG_ON(!entry->hole_follows);
+	drm_mm_for_each_hole(entry, mm, adj_start, adj_end) {
+		if (adj_start < start)
+			adj_start = start;
+		if (adj_end > end)
+			adj_end = end;
 
 		if (mm->color_adjust) {
 			mm->color_adjust(entry, color, &adj_start, &adj_end);
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index 4020f96..d710a10 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -89,6 +89,19 @@ static inline bool drm_mm_initialized(struct drm_mm *mm)
 {
 	return mm->hole_stack.next;
 }
+
+static inline unsigned long drm_mm_hole_node_start(struct drm_mm_node *hole_node)
+{
+	BUG_ON(!hole_node->hole_follows);
+	return hole_node->start + hole_node->size;
+}
+
+static inline unsigned long drm_mm_hole_node_end(struct drm_mm_node *hole_node)
+{
+	return list_entry(hole_node->node_list.next,
+			  struct drm_mm_node, node_list)->start;
+}
+
 #define drm_mm_for_each_node(entry, mm) list_for_each_entry(entry, \
 						&(mm)->head_node.node_list, \
 						node_list)
@@ -99,6 +112,19 @@ static inline bool drm_mm_initialized(struct drm_mm *mm)
 	     entry != NULL; entry = next, \
 		next = entry ? list_entry(entry->node_list.next, \
 			struct drm_mm_node, node_list) : NULL) \
+
+/* Note that we need to unroll list_for_each_entry in order to inline
+ * setting hole_start and hole_end on each iteration and keep the
+ * macro sane.
+ */
+#define drm_mm_for_each_hole(entry, mm, hole_start, hole_end) \
+	for (entry = list_entry((mm)->hole_stack.next, typeof(struct drm_mm_node), hole_stack); \
+	     &entry->hole_stack != &(mm)->hole_stack ? \
+	     hole_start = drm_mm_hole_node_start(entry), \
+	     hole_end = drm_mm_hole_node_end(entry) : \
+	     0; \
+	     entry = list_entry(entry->hole_stack.next, typeof(struct drm_mm_node), hole_stack))
+
 /*
  * Basic range manager support (drm_mm.c)
  */
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 06/18] drm/i915: Delay allocation of stolen space for FBC
  2012-10-19 17:03 [PATCH 01/18] drm: Introduce drm_mm_create_block() Chris Wilson
                   ` (3 preceding siblings ...)
  2012-10-19 17:03 ` [PATCH 05/18] drm: Introduce an iterator over holes in the drm_mm range manager Chris Wilson
@ 2012-10-19 17:03 ` Chris Wilson
  2012-11-05 13:44   ` Ben Widawsky
  2012-10-19 17:03 ` [PATCH 07/18] drm/i915: Defer allocation of stolen memory for FBC until actual first use Chris Wilson
                   ` (12 subsequent siblings)
  17 siblings, 1 reply; 51+ messages in thread
From: Chris Wilson @ 2012-10-19 17:03 UTC (permalink / raw)
  To: intel-gfx

As we may wish to wrap regions preallocated by the BIOS, we need to do
that before carving out contiguous chunks of stolen space for FBC.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/drm/i915/i915_drv.h        |    2 +
 drivers/gpu/drm/i915/i915_gem_stolen.c |  115 ++++++++++++++++----------------
 drivers/gpu/drm/i915/intel_display.c   |    3 +
 drivers/gpu/drm/i915/intel_pm.c        |    3 +-
 4 files changed, 66 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 589b2f3..997abb3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1516,6 +1516,8 @@ int i915_gem_evict_everything(struct drm_device *dev);
 
 /* i915_gem_stolen.c */
 int i915_gem_init_stolen(struct drm_device *dev);
+int i915_gem_stolen_setup_compression(struct drm_device *dev);
+void i915_gem_stolen_cleanup_compression(struct drm_device *dev);
 void i915_gem_cleanup_stolen(struct drm_device *dev);
 
 /* i915_gem_tiling.c */
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index d023ed6..68ef22a 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -89,21 +89,13 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev)
 	return base;
 }
 
-static void i915_warn_stolen(struct drm_device *dev)
-{
-	DRM_INFO("not enough stolen space for compressed buffer, disabling\n");
-	DRM_INFO("hint: you may be able to increase stolen memory size in the BIOS to avoid this\n");
-}
-
-static void i915_setup_compression(struct drm_device *dev, int size)
+static int i915_setup_compression(struct drm_device *dev, int size)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_mm_node *compressed_fb, *uninitialized_var(compressed_llb);
-	unsigned long cfb_base;
-	unsigned long ll_base = 0;
 
-	/* Just in case the BIOS is doing something questionable. */
-	intel_disable_fbc(dev);
+	DRM_DEBUG_KMS("reserving %d bytes of contiguous stolen space for FBC\n",
+		      size);
 
 	compressed_fb = drm_mm_search_free(&dev_priv->mm.stolen, size, 4096, 0);
 	if (compressed_fb)
@@ -111,11 +103,11 @@ static void i915_setup_compression(struct drm_device *dev, int size)
 	if (!compressed_fb)
 		goto err;
 
-	cfb_base = dev_priv->mm.stolen_base + compressed_fb->start;
-	if (!cfb_base)
-		goto err_fb;
-
-	if (!(IS_GM45(dev) || HAS_PCH_SPLIT(dev))) {
+	if (HAS_PCH_SPLIT(dev))
+		I915_WRITE(ILK_DPFC_CB_BASE, compressed_fb->start);
+	else if (IS_GM45(dev)) {
+		I915_WRITE(DPFC_CB_BASE, compressed_fb->start);
+	} else {
 		compressed_llb = drm_mm_search_free(&dev_priv->mm.stolen,
 						    4096, 4096, 0);
 		if (compressed_llb)
@@ -124,56 +116,81 @@ static void i915_setup_compression(struct drm_device *dev, int size)
 		if (!compressed_llb)
 			goto err_fb;
 
-		ll_base = dev_priv->mm.stolen_base + compressed_llb->start;
-		if (!ll_base)
-			goto err_llb;
-	}
+		dev_priv->compressed_llb = compressed_llb;
 
-	dev_priv->cfb_size = size;
+		I915_WRITE(FBC_CFB_BASE,
+			   dev_priv->mm.stolen_base + compressed_fb->start);
+		I915_WRITE(FBC_LL_BASE,
+			   dev_priv->mm.stolen_base + compressed_llb->start);
+	}
 
 	dev_priv->compressed_fb = compressed_fb;
-	if (HAS_PCH_SPLIT(dev))
-		I915_WRITE(ILK_DPFC_CB_BASE, compressed_fb->start);
-	else if (IS_GM45(dev)) {
-		I915_WRITE(DPFC_CB_BASE, compressed_fb->start);
-	} else {
-		I915_WRITE(FBC_CFB_BASE, cfb_base);
-		I915_WRITE(FBC_LL_BASE, ll_base);
-		dev_priv->compressed_llb = compressed_llb;
-	}
+	dev_priv->cfb_size = size;
 
-	DRM_DEBUG_KMS("FBC base 0x%08lx, ll base 0x%08lx, size %dM\n",
-		      (long)cfb_base, (long)ll_base, size >> 20);
-	return;
+	return size;
 
-err_llb:
-	drm_mm_put_block(compressed_llb);
 err_fb:
 	drm_mm_put_block(compressed_fb);
 err:
 	dev_priv->no_fbc_reason = FBC_STOLEN_TOO_SMALL;
-	i915_warn_stolen(dev);
+	DRM_INFO("not enough stolen space for compressed buffer (need %d bytes), disabling\n", size);
+	DRM_INFO("hint: you may be able to increase stolen memory size in the BIOS to avoid this\n");
+	return 0;
+}
+
+int i915_gem_stolen_setup_compression(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_mm_node *node;
+	unsigned long hole_start, hole_end, size;
+
+	if (dev_priv->mm.stolen_base == 0)
+		return 0;
+
+	if (dev_priv->cfb_size)
+		return dev_priv->cfb_size;
+
+	/* Try to set up FBC with a reasonable compressed buffer size */
+	size = 0;
+	drm_mm_for_each_hole(node, &dev_priv->mm.stolen, hole_start, hole_end) {
+		unsigned long hole_size = hole_end - hole_start;
+		if (hole_size > size)
+			size = hole_size;
+	}
+
+	/* Try to get a 32M buffer... */
+	if (size > (36*1024*1024))
+		size = 32*1024*1024;
+	else /* fall back to 3/4 of the stolen space */
+		size = size * 3 / 4;
+
+	return i915_setup_compression(dev, size);
 }
 
-static void i915_cleanup_compression(struct drm_device *dev)
+void i915_gem_stolen_cleanup_compression(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	drm_mm_put_block(dev_priv->compressed_fb);
+	if (dev_priv->cfb_size == 0)
+		return;
+
+	if (dev_priv->compressed_fb)
+		drm_mm_put_block(dev_priv->compressed_fb);
+
 	if (dev_priv->compressed_llb)
 		drm_mm_put_block(dev_priv->compressed_llb);
+
+	dev_priv->cfb_size = 0;
 }
 
 void i915_gem_cleanup_stolen(struct drm_device *dev)
 {
-	if (I915_HAS_FBC(dev) && i915_powersave)
-		i915_cleanup_compression(dev);
+	i915_gem_stolen_cleanup_compression(dev);
 }
 
 int i915_gem_init_stolen(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	unsigned long prealloc_size = dev_priv->mm.gtt->stolen_size;
 
 	dev_priv->mm.stolen_base = i915_stolen_to_physical(dev);
 	if (dev_priv->mm.stolen_base == 0)
@@ -183,21 +200,7 @@ int i915_gem_init_stolen(struct drm_device *dev)
 		      dev_priv->mm.gtt->stolen_size, dev_priv->mm.stolen_base);
 
 	/* Basic memrange allocator for stolen space */
-	drm_mm_init(&dev_priv->mm.stolen, 0, prealloc_size);
-
-	/* Try to set up FBC with a reasonable compressed buffer size */
-	if (I915_HAS_FBC(dev) && i915_powersave) {
-		int cfb_size;
-
-		/* Leave 1M for line length buffer & misc. */
-
-		/* Try to get a 32M buffer... */
-		if (prealloc_size > (36*1024*1024))
-			cfb_size = 32*1024*1024;
-		else /* fall back to 7/8 of the stolen space */
-			cfb_size = prealloc_size * 7 / 8;
-		i915_setup_compression(dev, cfb_size);
-	}
+	drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->mm.gtt->stolen_size);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index c2c219b..2ef497d0 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -8351,6 +8351,9 @@ void intel_modeset_init(struct drm_device *dev)
 	/* Just disable it once at startup */
 	i915_disable_vga(dev);
 	intel_setup_outputs(dev);
+
+	/* Just in case the BIOS is doing something questionable. */
+	intel_disable_fbc(dev);
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 2b3cddf..9ee53cb 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -440,7 +440,7 @@ void intel_update_fbc(struct drm_device *dev)
 		dev_priv->no_fbc_reason = FBC_MODULE_PARAM;
 		goto out_disable;
 	}
-	if (intel_fb->obj->base.size > dev_priv->cfb_size) {
+	if (intel_fb->obj->base.size > i915_gem_stolen_setup_compression(dev)) {
 		DRM_DEBUG_KMS("framebuffer too large, disabling "
 			      "compression\n");
 		dev_priv->no_fbc_reason = FBC_STOLEN_TOO_SMALL;
@@ -526,6 +526,7 @@ out_disable:
 		DRM_DEBUG_KMS("unsupported config, disabling FBC\n");
 		intel_disable_fbc(dev);
 	}
+	i915_gem_stolen_cleanup_compression(dev);
 }
 
 static void i915_pineview_get_mem_freq(struct drm_device *dev)
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 07/18] drm/i915: Defer allocation of stolen memory for FBC until actual first use
  2012-10-19 17:03 [PATCH 01/18] drm: Introduce drm_mm_create_block() Chris Wilson
                   ` (4 preceding siblings ...)
  2012-10-19 17:03 ` [PATCH 06/18] drm/i915: Delay allocation of stolen space for FBC Chris Wilson
@ 2012-10-19 17:03 ` Chris Wilson
  2012-11-05 15:00   ` Ben Widawsky
  2012-10-19 17:03 ` [PATCH 08/18] drm/i915: Allow objects to be created with no backing pages, but stolen space Chris Wilson
                   ` (11 subsequent siblings)
  17 siblings, 1 reply; 51+ messages in thread
From: Chris Wilson @ 2012-10-19 17:03 UTC (permalink / raw)
  To: intel-gfx

As FBC is commonly disabled due to limitations of the chipset upon
output configurations, on many systems FBC is never enabled. For those
systems, it is advantageous to make use of the stolen memory for other
objects and so we defer allocation of the FBC chunk until we actually
require it. This increases the likelihood of that allocation failing,
which in turns means that we are already taking advantage of the stolen
memory!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_pm.c |   13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 9ee53cb..cbf3f38 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -440,12 +440,6 @@ void intel_update_fbc(struct drm_device *dev)
 		dev_priv->no_fbc_reason = FBC_MODULE_PARAM;
 		goto out_disable;
 	}
-	if (intel_fb->obj->base.size > i915_gem_stolen_setup_compression(dev)) {
-		DRM_DEBUG_KMS("framebuffer too large, disabling "
-			      "compression\n");
-		dev_priv->no_fbc_reason = FBC_STOLEN_TOO_SMALL;
-		goto out_disable;
-	}
 	if ((crtc->mode.flags & DRM_MODE_FLAG_INTERLACE) ||
 	    (crtc->mode.flags & DRM_MODE_FLAG_DBLSCAN)) {
 		DRM_DEBUG_KMS("mode incompatible with compression, "
@@ -479,6 +473,13 @@ void intel_update_fbc(struct drm_device *dev)
 	if (in_dbg_master())
 		goto out_disable;
 
+	if (intel_fb->obj->base.size > i915_gem_stolen_setup_compression(dev)) {
+		DRM_DEBUG_KMS("framebuffer too large, disabling "
+			      "compression\n");
+		dev_priv->no_fbc_reason = FBC_STOLEN_TOO_SMALL;
+		goto out_disable;
+	}
+
 	/* If the scanout has not changed, don't modify the FBC settings.
 	 * Note that we make the fundamental assumption that the fb->obj
 	 * cannot be unpinned (and have its GTT offset and fence revoked)
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 08/18] drm/i915: Allow objects to be created with no backing pages, but stolen space
  2012-10-19 17:03 [PATCH 01/18] drm: Introduce drm_mm_create_block() Chris Wilson
                   ` (5 preceding siblings ...)
  2012-10-19 17:03 ` [PATCH 07/18] drm/i915: Defer allocation of stolen memory for FBC until actual first use Chris Wilson
@ 2012-10-19 17:03 ` Chris Wilson
  2012-10-19 17:03 ` [PATCH 09/18] drm/i915: Differentiate between prime and stolen objects Chris Wilson
                   ` (10 subsequent siblings)
  17 siblings, 0 replies; 51+ messages in thread
From: Chris Wilson @ 2012-10-19 17:03 UTC (permalink / raw)
  To: intel-gfx

In order to accommodate objects that are not backed by struct pages, but
instead point into a contiguous region of stolen space, we need to make
various changes to avoid dereferencing obj->pages or obj->base.filp.

First introduce a marker for the stolen object, that specifies its
offset into the stolen region and implies that it has no backing pages.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c |    2 ++
 drivers/gpu/drm/i915/i915_drv.h     |    2 ++
 2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 3c5710f..c0d1029 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -125,6 +125,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 	if (obj->gtt_space != NULL)
 		seq_printf(m, " (gtt offset: %08x, size: %08x)",
 			   obj->gtt_offset, (unsigned int)obj->gtt_space->size);
+	if (obj->stolen)
+		seq_printf(m, " (stolen: %08lx)", obj->stolen->start);
 	if (obj->pin_mappable || obj->fault_mappable) {
 		char s[3], *t = s;
 		if (obj->pin_mappable)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 997abb3..055bd08 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -935,6 +935,8 @@ struct drm_i915_gem_object {
 
 	/** Current space allocated to this object in the GTT, if any. */
 	struct drm_mm_node *gtt_space;
+	/** Stolen memory for this object, instead of being backed by shmem. */
+	struct drm_mm_node *stolen;
 	struct list_head gtt_list;
 
 	/** This object's place on the active/inactive lists */
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 09/18] drm/i915: Differentiate between prime and stolen objects
  2012-10-19 17:03 [PATCH 01/18] drm: Introduce drm_mm_create_block() Chris Wilson
                   ` (6 preceding siblings ...)
  2012-10-19 17:03 ` [PATCH 08/18] drm/i915: Allow objects to be created with no backing pages, but stolen space Chris Wilson
@ 2012-10-19 17:03 ` Chris Wilson
  2012-10-19 17:03 ` [PATCH 10/18] drm/i915: Support readback of stolen objects upon error Chris Wilson
                   ` (9 subsequent siblings)
  17 siblings, 0 replies; 51+ messages in thread
From: Chris Wilson @ 2012-10-19 17:03 UTC (permalink / raw)
  To: intel-gfx

Stolen objects also share the property that they have no backing shmemfs
filp, but they can be used with pwrite/pread/gtt-mapping.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/drm/i915/i915_drv.h |    5 +++++
 drivers/gpu/drm/i915/i915_gem.c |    4 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 055bd08..6b893c7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1071,6 +1071,11 @@ struct drm_i915_gem_object {
 	atomic_t pending_flip;
 };
 
+inline static bool i915_gem_object_is_prime(struct drm_i915_gem_object *obj)
+{
+	return obj->base.import_attach != NULL;
+}
+
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index eb3316b..29402e3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -553,7 +553,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 	/* prime objects have no backing filp to GEM pread/pwrite
 	 * pages from.
 	 */
-	if (!obj->base.filp) {
+	if (i915_gem_object_is_prime(obj)) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -902,7 +902,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	/* prime objects have no backing filp to GEM pread/pwrite
 	 * pages from.
 	 */
-	if (!obj->base.filp) {
+	if (i915_gem_object_is_prime(obj)) {
 		ret = -EINVAL;
 		goto out;
 	}
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 10/18] drm/i915: Support readback of stolen objects upon error
  2012-10-19 17:03 [PATCH 01/18] drm: Introduce drm_mm_create_block() Chris Wilson
                   ` (7 preceding siblings ...)
  2012-10-19 17:03 ` [PATCH 09/18] drm/i915: Differentiate between prime and stolen objects Chris Wilson
@ 2012-10-19 17:03 ` Chris Wilson
  2012-11-05 15:41   ` Ben Widawsky
  2012-10-19 17:03 ` [PATCH 11/18] drm/i915: Handle stolen objects in pwrite Chris Wilson
                   ` (8 subsequent siblings)
  17 siblings, 1 reply; 51+ messages in thread
From: Chris Wilson @ 2012-10-19 17:03 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/drm/i915/i915_irq.c |    8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index d07c787..df3c10a 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -924,6 +924,14 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
 						     reloc_offset);
 			memcpy_fromio(d, s, PAGE_SIZE);
 			io_mapping_unmap_atomic(s);
+		} else if (src->stolen) {
+			unsigned long offset;
+
+			offset = dev_priv->mm.stolen_base;
+			offset += src->stolen->start;
+			offset += i << PAGE_SHIFT;
+
+			memcpy_fromio(d, (void *)offset, PAGE_SIZE);
 		} else {
 			struct page *page;
 			void *s;
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 11/18] drm/i915: Handle stolen objects in pwrite
  2012-10-19 17:03 [PATCH 01/18] drm: Introduce drm_mm_create_block() Chris Wilson
                   ` (8 preceding siblings ...)
  2012-10-19 17:03 ` [PATCH 10/18] drm/i915: Support readback of stolen objects upon error Chris Wilson
@ 2012-10-19 17:03 ` Chris Wilson
  2012-10-19 17:03 ` [PATCH 12/18] drm/i915: Handle stolen objects for pread Chris Wilson
                   ` (7 subsequent siblings)
  17 siblings, 0 replies; 51+ messages in thread
From: Chris Wilson @ 2012-10-19 17:03 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c |  159 ++++++++++++++++++++++++---------------
 1 file changed, 100 insertions(+), 59 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 29402e3..2108761 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -664,19 +664,17 @@ out:
  * needs_clflush_before is set and flushes out any written cachelines after
  * writing if needs_clflush is set. */
 static int
-shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
+shmem_pwrite_fast(char *vaddr, int shmem_page_offset, int page_length,
 		  char __user *user_data,
 		  bool page_do_bit17_swizzling,
 		  bool needs_clflush_before,
 		  bool needs_clflush_after)
 {
-	char *vaddr;
 	int ret;
 
 	if (unlikely(page_do_bit17_swizzling))
 		return -EINVAL;
 
-	vaddr = kmap_atomic(page);
 	if (needs_clflush_before)
 		drm_clflush_virt_range(vaddr + shmem_page_offset,
 				       page_length);
@@ -686,7 +684,6 @@ shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
 	if (needs_clflush_after)
 		drm_clflush_virt_range(vaddr + shmem_page_offset,
 				       page_length);
-	kunmap_atomic(vaddr);
 
 	return ret ? -EFAULT : 0;
 }
@@ -694,16 +691,14 @@ shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
 /* Only difference to the fast-path function is that this can handle bit17
  * and uses non-atomic copy and kmap functions. */
 static int
-shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
+shmem_pwrite_slow(char *vaddr, int shmem_page_offset, int page_length,
 		  char __user *user_data,
 		  bool page_do_bit17_swizzling,
 		  bool needs_clflush_before,
 		  bool needs_clflush_after)
 {
-	char *vaddr;
 	int ret;
 
-	vaddr = kmap(page);
 	if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
 		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
 					     page_length,
@@ -720,7 +715,6 @@ shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
 		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
 					     page_length,
 					     page_do_bit17_swizzling);
-	kunmap(page);
 
 	return ret ? -EFAULT : 0;
 }
@@ -731,10 +725,11 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 		      struct drm_i915_gem_pwrite *args,
 		      struct drm_file *file)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	ssize_t remain;
 	loff_t offset;
 	char __user *user_data;
-	int shmem_page_offset, page_length, ret = 0;
+	int page_length, ret = 0;
 	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
 	int hit_slowpath = 0;
 	int needs_clflush_after = 0;
@@ -770,74 +765,120 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 	if (ret)
 		return ret;
 
-	i915_gem_object_pin_pages(obj);
-
 	offset = args->offset;
 	obj->dirty = 1;
 
-	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
-		struct page *page;
-		int partial_cacheline_write;
+	if (obj->stolen) {
+		char *vaddr;
 
-		if (i < offset >> PAGE_SHIFT)
-			continue;
+		vaddr = (char *)dev_priv->mm.stolen_base;
+		vaddr += obj->stolen->start + offset;
 
-		if (remain <= 0)
-			break;
+		offset = offset_in_page(offset);
+		while (remain > 0) {
+			int partial_cacheline_write;
 
-		/* Operation in this page
-		 *
-		 * shmem_page_offset = offset within page in shmem file
-		 * page_length = bytes to copy for this page
-		 */
-		shmem_page_offset = offset_in_page(offset);
+			page_length = remain;
+			if ((offset + page_length) > PAGE_SIZE)
+				page_length = PAGE_SIZE - offset;
 
-		page_length = remain;
-		if ((shmem_page_offset + page_length) > PAGE_SIZE)
-			page_length = PAGE_SIZE - shmem_page_offset;
+			/* If we don't overwrite a cacheline completely we need to be
+			 * careful to have up-to-date data by first clflushing. Don't
+			 * overcomplicate things and flush the entire patch. */
+			partial_cacheline_write = needs_clflush_before &&
+				((offset | page_length) & (boot_cpu_data.x86_clflush_size - 1));
 
-		/* If we don't overwrite a cacheline completely we need to be
-		 * careful to have up-to-date data by first clflushing. Don't
-		 * overcomplicate things and flush the entire patch. */
-		partial_cacheline_write = needs_clflush_before &&
-			((shmem_page_offset | page_length)
-				& (boot_cpu_data.x86_clflush_size - 1));
+			page_do_bit17_swizzling = obj_do_bit17_swizzling &&
+				((uintptr_t)vaddr & (1 << 17)) != 0;
 
-		page = sg_page(sg);
-		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
-			(page_to_phys(page) & (1 << 17)) != 0;
+			ret = shmem_pwrite_fast(vaddr, offset, page_length,
+						user_data, page_do_bit17_swizzling,
+						partial_cacheline_write,
+						needs_clflush_after);
 
-		ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
-					user_data, page_do_bit17_swizzling,
-					partial_cacheline_write,
-					needs_clflush_after);
-		if (ret == 0)
-			goto next_page;
+			if (ret == 0)
+				goto next_stolen;
 
-		hit_slowpath = 1;
-		mutex_unlock(&dev->struct_mutex);
-		ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
-					user_data, page_do_bit17_swizzling,
-					partial_cacheline_write,
-					needs_clflush_after);
+			hit_slowpath = 1;
+			mutex_unlock(&dev->struct_mutex);
 
-		mutex_lock(&dev->struct_mutex);
+			ret = shmem_pwrite_slow(vaddr, offset, page_length,
+						user_data, page_do_bit17_swizzling,
+						partial_cacheline_write,
+						needs_clflush_after);
 
-next_page:
-		set_page_dirty(page);
-		mark_page_accessed(page);
+			mutex_lock(&dev->struct_mutex);
+			if (ret)
+				goto out;
 
-		if (ret)
-			goto out;
+next_stolen:
+			remain -= page_length;
+			user_data += page_length;
+			offset = 0;
+		}
+	} else {
+		i915_gem_object_pin_pages(obj);
 
-		remain -= page_length;
-		user_data += page_length;
-		offset += page_length;
+		offset = offset_in_page(offset);
+		for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
+			struct page *page;
+			char *vaddr;
+			int partial_cacheline_write;
+
+			if (i < args->offset >> PAGE_SHIFT)
+				continue;
+
+			if (remain <= 0)
+				break;
+
+			page_length = remain;
+			if ((offset + page_length) > PAGE_SIZE)
+				page_length = PAGE_SIZE - offset;
+
+			/* If we don't overwrite a cacheline completely we need to be
+			 * careful to have up-to-date data by first clflushing. Don't
+			 * overcomplicate things and flush the entire patch. */
+			partial_cacheline_write = needs_clflush_before &&
+				((offset | page_length) & (boot_cpu_data.x86_clflush_size - 1));
+
+			page = sg_page(sg);
+			page_do_bit17_swizzling = obj_do_bit17_swizzling &&
+				(page_to_phys(page) & (1 << 17)) != 0;
+
+			vaddr = kmap_atomic(page);
+			ret = shmem_pwrite_fast(vaddr, offset, page_length,
+						user_data, page_do_bit17_swizzling,
+						partial_cacheline_write,
+						needs_clflush_after);
+			kunmap_atomic(vaddr);
+
+			if (ret == 0)
+				goto next_page;
+
+			hit_slowpath = 1;
+			mutex_unlock(&dev->struct_mutex);
+
+			vaddr = kmap(page);
+			ret = shmem_pwrite_slow(vaddr, offset, page_length,
+						user_data, page_do_bit17_swizzling,
+						partial_cacheline_write,
+						needs_clflush_after);
+			kunmap(page);
+
+			mutex_lock(&dev->struct_mutex);
+			if (ret)
+				goto out_unpin;
+
+next_page:
+			remain -= page_length;
+			user_data += page_length;
+			offset = 0;
+		}
+out_unpin:
+		i915_gem_object_unpin_pages(obj);
 	}
 
 out:
-	i915_gem_object_unpin_pages(obj);
-
 	if (hit_slowpath) {
 		/* Fixup: Kill any reinstated backing storage pages */
 		if (obj->madv == __I915_MADV_PURGED)
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 12/18] drm/i915: Handle stolen objects for pread
  2012-10-19 17:03 [PATCH 01/18] drm: Introduce drm_mm_create_block() Chris Wilson
                   ` (9 preceding siblings ...)
  2012-10-19 17:03 ` [PATCH 11/18] drm/i915: Handle stolen objects in pwrite Chris Wilson
@ 2012-10-19 17:03 ` Chris Wilson
  2012-10-19 17:03 ` [PATCH 13/18] drm/i915: Introduce i915_gem_object_create_stolen() Chris Wilson
                   ` (6 subsequent siblings)
  17 siblings, 0 replies; 51+ messages in thread
From: Chris Wilson @ 2012-10-19 17:03 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c |  175 ++++++++++++++++++++++++++-------------
 1 file changed, 116 insertions(+), 59 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2108761..d75c5c3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -324,24 +324,21 @@ __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
  * Flushes invalid cachelines before reading the target if
  * needs_clflush is set. */
 static int
-shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
+shmem_pread_fast(char *vaddr, int shmem_page_offset, int page_length,
 		 char __user *user_data,
 		 bool page_do_bit17_swizzling, bool needs_clflush)
 {
-	char *vaddr;
 	int ret;
 
 	if (unlikely(page_do_bit17_swizzling))
 		return -EINVAL;
 
-	vaddr = kmap_atomic(page);
 	if (needs_clflush)
 		drm_clflush_virt_range(vaddr + shmem_page_offset,
 				       page_length);
 	ret = __copy_to_user_inatomic(user_data,
 				      vaddr + shmem_page_offset,
 				      page_length);
-	kunmap_atomic(vaddr);
 
 	return ret ? -EFAULT : 0;
 }
@@ -371,14 +368,12 @@ shmem_clflush_swizzled_range(char *addr, unsigned long length,
 /* Only difference to the fast-path function is that this can handle bit17
  * and uses non-atomic copy and kmap functions. */
 static int
-shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
+shmem_pread_slow(char *vaddr, int shmem_page_offset, int page_length,
 		 char __user *user_data,
 		 bool page_do_bit17_swizzling, bool needs_clflush)
 {
-	char *vaddr;
 	int ret;
 
-	vaddr = kmap(page);
 	if (needs_clflush)
 		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
 					     page_length,
@@ -392,7 +387,6 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
 		ret = __copy_to_user(user_data,
 				     vaddr + shmem_page_offset,
 				     page_length);
-	kunmap(page);
 
 	return ret ? - EFAULT : 0;
 }
@@ -403,6 +397,7 @@ i915_gem_shmem_pread(struct drm_device *dev,
 		     struct drm_i915_gem_pread *args,
 		     struct drm_file *file)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	char __user *user_data;
 	ssize_t remain;
 	loff_t offset;
@@ -433,76 +428,138 @@ i915_gem_shmem_pread(struct drm_device *dev,
 		}
 	}
 
-	ret = i915_gem_object_get_pages(obj);
-	if (ret)
-		return ret;
+	offset = args->offset;
 
-	i915_gem_object_pin_pages(obj);
+	if (obj->stolen) {
+		char *vaddr;
 
-	offset = args->offset;
+		vaddr = (char *)dev_priv->mm.stolen_base;
+		vaddr += obj->stolen->start + offset;
 
-	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
-		struct page *page;
+		shmem_page_offset = offset_in_page(offset);
+		while (remain > 0) {
+			/* Operation in this page
+			 *
+			 * shmem_page_offset = offset within page in shmem file
+			 * page_length = bytes to copy for this page
+			 */
+			page_length = remain;
+			if ((shmem_page_offset + page_length) > PAGE_SIZE)
+				page_length = PAGE_SIZE - shmem_page_offset;
 
-		if (i < offset >> PAGE_SHIFT)
-			continue;
+			page_do_bit17_swizzling = obj_do_bit17_swizzling &&
+				((uintptr_t)vaddr & (1 << 17)) != 0;
 
-		if (remain <= 0)
-			break;
+			ret = shmem_pread_fast(vaddr, shmem_page_offset, page_length,
+					       user_data, page_do_bit17_swizzling,
+					       needs_clflush);
+			if (ret == 0)
+				goto next_stolen;
 
-		/* Operation in this page
-		 *
-		 * shmem_page_offset = offset within page in shmem file
-		 * page_length = bytes to copy for this page
-		 */
-		shmem_page_offset = offset_in_page(offset);
-		page_length = remain;
-		if ((shmem_page_offset + page_length) > PAGE_SIZE)
-			page_length = PAGE_SIZE - shmem_page_offset;
+			hit_slowpath = 1;
+			mutex_unlock(&dev->struct_mutex);
 
-		page = sg_page(sg);
-		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
-			(page_to_phys(page) & (1 << 17)) != 0;
+			if (!prefaulted) {
+				ret = fault_in_multipages_writeable(user_data, remain);
+				/* Userspace is tricking us, but we've already clobbered
+				 * its pages with the prefault and promised to write the
+				 * data up to the first fault. Hence ignore any errors
+				 * and just continue. */
+				(void)ret;
+				prefaulted = 1;
+			}
 
-		ret = shmem_pread_fast(page, shmem_page_offset, page_length,
-				       user_data, page_do_bit17_swizzling,
-				       needs_clflush);
-		if (ret == 0)
-			goto next_page;
+			ret = shmem_pread_slow(vaddr, shmem_page_offset, page_length,
+					       user_data, page_do_bit17_swizzling,
+					       needs_clflush);
 
-		hit_slowpath = 1;
-		mutex_unlock(&dev->struct_mutex);
+			mutex_lock(&dev->struct_mutex);
+			if (ret)
+				goto out;
 
-		if (!prefaulted) {
-			ret = fault_in_multipages_writeable(user_data, remain);
-			/* Userspace is tricking us, but we've already clobbered
-			 * its pages with the prefault and promised to write the
-			 * data up to the first fault. Hence ignore any errors
-			 * and just continue. */
-			(void)ret;
-			prefaulted = 1;
+next_stolen:
+			remain -= page_length;
+			user_data += page_length;
+			vaddr += page_length;
+			shmem_page_offset = 0;
 		}
+	} else {
+		ret = i915_gem_object_get_pages(obj);
+		if (ret)
+			return ret;
 
-		ret = shmem_pread_slow(page, shmem_page_offset, page_length,
-				       user_data, page_do_bit17_swizzling,
-				       needs_clflush);
+		i915_gem_object_pin_pages(obj);
 
-		mutex_lock(&dev->struct_mutex);
+		for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
+			char *vaddr;
+			struct page *page;
 
-next_page:
-		mark_page_accessed(page);
+			if (i < offset >> PAGE_SHIFT)
+				continue;
 
-		if (ret)
-			goto out;
+			if (remain <= 0)
+				break;
 
-		remain -= page_length;
-		user_data += page_length;
-		offset += page_length;
+			/* Operation in this page
+			 *
+			 * shmem_page_offset = offset within page in shmem file
+			 * page_length = bytes to copy for this page
+			 */
+			shmem_page_offset = offset_in_page(offset);
+			page_length = remain;
+			if ((shmem_page_offset + page_length) > PAGE_SIZE)
+				page_length = PAGE_SIZE - shmem_page_offset;
+
+			page = sg_page(sg);
+			mark_page_accessed(page);
+
+			page_do_bit17_swizzling = obj_do_bit17_swizzling &&
+				(page_to_phys(page) & (1 << 17)) != 0;
+
+			vaddr = kmap_atomic(page);
+			ret = shmem_pread_fast(vaddr, shmem_page_offset, page_length,
+					       user_data, page_do_bit17_swizzling,
+					       needs_clflush);
+			kunmap_atomic(vaddr);
+
+			if (ret == 0)
+				goto next_page;
+
+			hit_slowpath = 1;
+			mutex_unlock(&dev->struct_mutex);
+
+			if (!prefaulted) {
+				ret = fault_in_multipages_writeable(user_data, remain);
+				/* Userspace is tricking us, but we've already clobbered
+				 * its pages with the prefault and promised to write the
+				 * data up to the first fault. Hence ignore any errors
+				 * and just continue. */
+				(void)ret;
+				prefaulted = 1;
+			}
+
+			vaddr = kmap(page);
+			ret = shmem_pread_slow(vaddr, shmem_page_offset, page_length,
+					       user_data, page_do_bit17_swizzling,
+					       needs_clflush);
+			kunmap(page);
+
+			mutex_lock(&dev->struct_mutex);
+
+			if (ret)
+				goto out_unpin;
+
+next_page:
+			remain -= page_length;
+			user_data += page_length;
+			offset += page_length;
+		}
+out_unpin:
+		i915_gem_object_unpin_pages(obj);
 	}
 
-out:
-	i915_gem_object_unpin_pages(obj);
 
+out:
 	if (hit_slowpath) {
 		/* Fixup: Kill any reinstated backing storage pages */
 		if (obj->madv == __I915_MADV_PURGED)
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 13/18] drm/i915: Introduce i915_gem_object_create_stolen()
  2012-10-19 17:03 [PATCH 01/18] drm: Introduce drm_mm_create_block() Chris Wilson
                   ` (10 preceding siblings ...)
  2012-10-19 17:03 ` [PATCH 12/18] drm/i915: Handle stolen objects for pread Chris Wilson
@ 2012-10-19 17:03 ` Chris Wilson
  2012-11-05 16:32   ` Ben Widawsky
  2012-10-19 17:03 ` [PATCH 14/18] drm/i915: Allocate fbcon from stolen memory Chris Wilson
                   ` (5 subsequent siblings)
  17 siblings, 1 reply; 51+ messages in thread
From: Chris Wilson @ 2012-10-19 17:03 UTC (permalink / raw)
  To: intel-gfx

Allow for the creation of GEM objects backed by stolen memory. As these
are not backed by ordinary pages, we create a fake dma mapping and store
the address in the scatterlist rather than obj->pages.

v2: Mark _i915_gem_object_create_stolen() as static, as noticed by Jesse
Barnes.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/drm/i915/i915_drv.h        |    3 +
 drivers/gpu/drm/i915/i915_gem.c        |    1 +
 drivers/gpu/drm/i915/i915_gem_stolen.c |  122 ++++++++++++++++++++++++++++++++
 3 files changed, 126 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6b893c7..5a0e0cd 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1526,6 +1526,9 @@ int i915_gem_init_stolen(struct drm_device *dev);
 int i915_gem_stolen_setup_compression(struct drm_device *dev);
 void i915_gem_stolen_cleanup_compression(struct drm_device *dev);
 void i915_gem_cleanup_stolen(struct drm_device *dev);
+struct drm_i915_gem_object *
+i915_gem_object_create_stolen(struct drm_device *dev, u32 size);
+void i915_gem_object_release_stolen(struct drm_i915_gem_object *obj);
 
 /* i915_gem_tiling.c */
 void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d75c5c3..0349899 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3857,6 +3857,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	obj->pages_pin_count = 0;
 	i915_gem_object_put_pages(obj);
 	i915_gem_object_free_mmap_offset(obj);
+	i915_gem_object_release_stolen(obj);
 
 	BUG_ON(obj->pages);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 68ef22a..86c4af4 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -204,3 +204,125 @@ int i915_gem_init_stolen(struct drm_device *dev)
 
 	return 0;
 }
+
+static struct sg_table *
+i915_pages_create_for_stolen(struct drm_device *dev,
+			     u32 offset, u32 size)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct sg_table *st;
+	struct scatterlist *sg;
+
+	/* We hide that we have no struct page backing our stolen object
+	 * by wrapping the contiguous physical allocation with a fake
+	 * dma mapping in a single scatterlist.
+	 */
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (st == NULL)
+		return NULL;
+
+	if (!sg_alloc_table(st, 1, GFP_KERNEL)) {
+		kfree(st);
+		return NULL;
+	}
+
+	sg = st->sgl;
+	sg->offset = offset;
+	sg->length = size;
+
+	sg_dma_address(sg) = dev_priv->mm.stolen_base + offset;
+	sg_dma_len(sg) = size;
+
+	return st;
+}
+
+static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
+{
+	BUG();
+	return -EINVAL;
+}
+
+static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj)
+{
+	/* Should only be called during free */
+	sg_free_table(obj->pages);
+	kfree(obj->pages);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = {
+	.get_pages = i915_gem_object_get_pages_stolen,
+	.put_pages = i915_gem_object_put_pages_stolen,
+};
+
+static struct drm_i915_gem_object *
+_i915_gem_object_create_stolen(struct drm_device *dev,
+			       struct drm_mm_node *stolen)
+{
+	struct drm_i915_gem_object *obj;
+
+	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+	if (obj == NULL)
+		return NULL;
+
+	if (drm_gem_private_object_init(dev, &obj->base, stolen->size))
+		goto cleanup;
+
+	i915_gem_object_init(obj, &i915_gem_object_stolen_ops);
+
+	obj->pages = i915_pages_create_for_stolen(dev,
+						  stolen->start, stolen->size);
+	if (obj->pages == NULL)
+		goto cleanup;
+
+	obj->has_dma_mapping = true;
+	obj->pages_pin_count = 1;
+	obj->stolen = stolen;
+
+	obj->base.write_domain = I915_GEM_DOMAIN_GTT;
+	obj->base.read_domains = I915_GEM_DOMAIN_GTT;
+	obj->cache_level = I915_CACHE_NONE;
+
+	return obj;
+
+cleanup:
+	kfree(obj);
+	return NULL;
+}
+
+struct drm_i915_gem_object *
+i915_gem_object_create_stolen(struct drm_device *dev, u32 size)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_object *obj;
+	struct drm_mm_node *stolen;
+
+	if (dev_priv->mm.stolen_base == 0)
+		return 0;
+
+	DRM_DEBUG_KMS("creating stolen object: size=%x\n", size);
+	if (size == 0)
+		return NULL;
+
+	stolen = drm_mm_search_free(&dev_priv->mm.stolen, size, 4096, 0);
+	if (stolen)
+		stolen = drm_mm_get_block(stolen, size, 4096);
+	if (stolen == NULL)
+		return NULL;
+
+	obj = _i915_gem_object_create_stolen(dev, stolen);
+	if (obj)
+		return obj;
+
+	drm_mm_put_block(stolen);
+	return NULL;
+}
+
+void
+i915_gem_object_release_stolen(struct drm_i915_gem_object *obj)
+{
+	if (obj->stolen) {
+		drm_mm_put_block(obj->stolen);
+		obj->stolen = NULL;
+	}
+}
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 14/18] drm/i915: Allocate fbcon from stolen memory
  2012-10-19 17:03 [PATCH 01/18] drm: Introduce drm_mm_create_block() Chris Wilson
                   ` (11 preceding siblings ...)
  2012-10-19 17:03 ` [PATCH 13/18] drm/i915: Introduce i915_gem_object_create_stolen() Chris Wilson
@ 2012-10-19 17:03 ` Chris Wilson
  2012-10-19 17:03 ` [PATCH 15/18] drm/i915: Allocate ringbuffers " Chris Wilson
                   ` (4 subsequent siblings)
  17 siblings, 0 replies; 51+ messages in thread
From: Chris Wilson @ 2012-10-19 17:03 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/drm/i915/intel_fb.c |    4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
index 97f6735..9de9cd9 100644
--- a/drivers/gpu/drm/i915/intel_fb.c
+++ b/drivers/gpu/drm/i915/intel_fb.c
@@ -84,7 +84,9 @@ static int intelfb_create(struct intel_fbdev *ifbdev,
 
 	size = mode_cmd.pitches[0] * mode_cmd.height;
 	size = ALIGN(size, PAGE_SIZE);
-	obj = i915_gem_alloc_object(dev, size);
+	obj = i915_gem_object_create_stolen(dev, size);
+	if (obj == NULL)
+		obj = i915_gem_alloc_object(dev, size);
 	if (!obj) {
 		DRM_ERROR("failed to allocate framebuffer\n");
 		ret = -ENOMEM;
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 15/18] drm/i915: Allocate ringbuffers from stolen memory
  2012-10-19 17:03 [PATCH 01/18] drm: Introduce drm_mm_create_block() Chris Wilson
                   ` (12 preceding siblings ...)
  2012-10-19 17:03 ` [PATCH 14/18] drm/i915: Allocate fbcon from stolen memory Chris Wilson
@ 2012-10-19 17:03 ` Chris Wilson
  2012-10-19 17:03 ` [PATCH 16/18] drm/i915: Allocate overlay registers " Chris Wilson
                   ` (3 subsequent siblings)
  17 siblings, 0 replies; 51+ messages in thread
From: Chris Wilson @ 2012-10-19 17:03 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c |    6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 6c6f95a..13dbe5d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1100,7 +1100,11 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 			return ret;
 	}
 
-	obj = i915_gem_alloc_object(dev, ring->size);
+	obj = NULL;
+	if (!HAS_LLC(dev))
+		obj = i915_gem_object_create_stolen(dev, ring->size);
+	if (obj == NULL)
+		obj = i915_gem_alloc_object(dev, ring->size);
 	if (obj == NULL) {
 		DRM_ERROR("Failed to allocate ringbuffer\n");
 		ret = -ENOMEM;
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 16/18] drm/i915: Allocate overlay registers from stolen memory
  2012-10-19 17:03 [PATCH 01/18] drm: Introduce drm_mm_create_block() Chris Wilson
                   ` (13 preceding siblings ...)
  2012-10-19 17:03 ` [PATCH 15/18] drm/i915: Allocate ringbuffers " Chris Wilson
@ 2012-10-19 17:03 ` Chris Wilson
  2012-11-05 17:39   ` Ben Widawsky
  2012-10-19 17:03 ` [PATCH 17/18] drm/i915: Use a slab for object allocation Chris Wilson
                   ` (2 subsequent siblings)
  17 siblings, 1 reply; 51+ messages in thread
From: Chris Wilson @ 2012-10-19 17:03 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/drm/i915/intel_overlay.c |    6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index afd0f30..2fa20a4 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -1368,8 +1368,10 @@ void intel_setup_overlay(struct drm_device *dev)
 
 	overlay->dev = dev;
 
-	reg_bo = i915_gem_alloc_object(dev, PAGE_SIZE);
-	if (!reg_bo)
+	reg_bo = i915_gem_object_create_stolen(dev, PAGE_SIZE);
+	if (reg_bo == NULL)
+		reg_bo = i915_gem_alloc_object(dev, PAGE_SIZE);
+	if (reg_bo == NULL)
 		goto out_free;
 	overlay->reg_bo = reg_bo;
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 17/18] drm/i915: Use a slab for object allocation
  2012-10-19 17:03 [PATCH 01/18] drm: Introduce drm_mm_create_block() Chris Wilson
                   ` (14 preceding siblings ...)
  2012-10-19 17:03 ` [PATCH 16/18] drm/i915: Allocate overlay registers " Chris Wilson
@ 2012-10-19 17:03 ` Chris Wilson
  2012-10-24 20:21   ` Paulo Zanoni
                     ` (2 more replies)
  2012-10-19 17:03 ` [PATCH 18/18] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl Chris Wilson
  2012-10-26 21:47 ` [PATCH 01/18] drm: Introduce drm_mm_create_block() Ben Widawsky
  17 siblings, 3 replies; 51+ messages in thread
From: Chris Wilson @ 2012-10-19 17:03 UTC (permalink / raw)
  To: intel-gfx

The primary purpose of this was to debug some use-after-free memory
corruption that was causing an OOPS inside drm/i915. As it turned out
the corruption was being caused elsewhere and i915.ko as a major user of
many objects was being hit hardest.

Indeed as we do frequent the generic kmalloc caches, dedicating one to
ourselves (or at least naming one for us depending upon the core) aids
debugging our own slab usage.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/drm/i915/i915_dma.c        |    3 +++
 drivers/gpu/drm/i915/i915_drv.h        |    4 ++++
 drivers/gpu/drm/i915/i915_gem.c        |   28 +++++++++++++++++++++++-----
 drivers/gpu/drm/i915/i915_gem_dmabuf.c |    5 ++---
 drivers/gpu/drm/i915/i915_gem_stolen.c |    4 ++--
 5 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 14271aa..69969be 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1776,6 +1776,9 @@ int i915_driver_unload(struct drm_device *dev)
 
 	destroy_workqueue(dev_priv->wq);
 
+	if (dev_priv->slab)
+		kmem_cache_destroy(dev_priv->slab);
+
 	pci_dev_put(dev_priv->bridge_dev);
 	kfree(dev->dev_private);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5a0e0cd..5084b29 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -398,6 +398,7 @@ struct intel_gmbus {
 
 typedef struct drm_i915_private {
 	struct drm_device *dev;
+	struct kmem_cache *slab;
 
 	const struct intel_device_info *info;
 
@@ -1341,12 +1342,15 @@ int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
 void i915_gem_load(struct drm_device *dev);
+void *i915_gem_object_alloc(struct drm_device *dev);
+void i915_gem_object_free(struct drm_i915_gem_object *obj);
 int i915_gem_init_object(struct drm_gem_object *obj);
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
 			 const struct drm_i915_gem_object_ops *ops);
 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 						  size_t size);
 void i915_gem_free_object(struct drm_gem_object *obj);
+
 int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
 				     uint32_t alignment,
 				     bool map_and_fenceable,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0349899..8183c0f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -193,6 +193,18 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
+void *i915_gem_object_alloc(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	return kmem_cache_alloc(dev_priv->slab, GFP_KERNEL | __GFP_ZERO);
+}
+
+void i915_gem_object_free(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+	kmem_cache_free(dev_priv->slab, obj);
+}
+
 static int
 i915_gem_create(struct drm_file *file,
 		struct drm_device *dev,
@@ -216,7 +228,7 @@ i915_gem_create(struct drm_file *file,
 	if (ret) {
 		drm_gem_object_release(&obj->base);
 		i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
-		kfree(obj);
+		i915_gem_object_free(obj);
 		return ret;
 	}
 
@@ -3780,12 +3792,12 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 	struct address_space *mapping;
 	u32 mask;
 
-	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+	obj = i915_gem_object_alloc(dev);
 	if (obj == NULL)
 		return NULL;
 
 	if (drm_gem_object_init(dev, &obj->base, size) != 0) {
-		kfree(obj);
+		i915_gem_object_free(obj);
 		return NULL;
 	}
 
@@ -3868,7 +3880,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
 
 	kfree(obj->bit_17);
-	kfree(obj);
+	i915_gem_object_free(obj);
 }
 
 int
@@ -4246,8 +4258,14 @@ init_ring_lists(struct intel_ring_buffer *ring)
 void
 i915_gem_load(struct drm_device *dev)
 {
-	int i;
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	int i;
+
+	dev_priv->slab =
+		kmem_cache_create("i915_gem_object",
+				  sizeof(struct drm_i915_gem_object), 0,
+				  SLAB_HWCACHE_ALIGN,
+				  NULL);
 
 	INIT_LIST_HEAD(&dev_priv->mm.active_list);
 	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index ca3497e..f307e31 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -276,8 +276,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
 	if (IS_ERR(attach))
 		return ERR_CAST(attach);
 
-
-	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+	obj = i915_gem_object_alloc(dev);
 	if (obj == NULL) {
 		ret = -ENOMEM;
 		goto fail_detach;
@@ -285,7 +284,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
 
 	ret = drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
 	if (ret) {
-		kfree(obj);
+		i915_gem_object_free(obj);
 		goto fail_detach;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 86c4af4..d7cfa71 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -261,7 +261,7 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
 {
 	struct drm_i915_gem_object *obj;
 
-	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+	obj = i915_gem_object_alloc(dev);
 	if (obj == NULL)
 		return NULL;
 
@@ -286,7 +286,7 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
 	return obj;
 
 cleanup:
-	kfree(obj);
+	i915_gem_object_free(obj);
 	return NULL;
 }
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 18/18] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2012-10-19 17:03 [PATCH 01/18] drm: Introduce drm_mm_create_block() Chris Wilson
                   ` (15 preceding siblings ...)
  2012-10-19 17:03 ` [PATCH 17/18] drm/i915: Use a slab for object allocation Chris Wilson
@ 2012-10-19 17:03 ` Chris Wilson
  2012-10-22 22:21   ` Jesse Barnes
  2012-10-26 21:47 ` [PATCH 01/18] drm: Introduce drm_mm_create_block() Ben Widawsky
  17 siblings, 1 reply; 51+ messages in thread
From: Chris Wilson @ 2012-10-19 17:03 UTC (permalink / raw)
  To: intel-gfx

By exporting the ability to map user address and inserting PTEs
representing their backing pages into the GTT, we can exploit UMA in order
to utilize normal application data as a texture source or even as a
render target (depending upon the capabilities of the chipset). This has
a number of uses, with zero-copy downloads to the GPU and efficient
readback making the intermixed streaming of CPU and GPU operations
fairly efficient. This ability has many widespread implications from
faster rendering of client-side software rasterisers (chromium),
mitigation of stalls due to read back (firefox) and to faster pipelining
of texture data (such as pixel buffer objects in GL).

v2: Compile with CONFIG_MMU_NOTIFIER

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile           |    1 +
 drivers/gpu/drm/i915/i915_dma.c         |    1 +
 drivers/gpu/drm/i915/i915_drv.h         |   21 +++
 drivers/gpu/drm/i915/i915_gem.c         |    9 +-
 drivers/gpu/drm/i915/i915_gem_userptr.c |  277 +++++++++++++++++++++++++++++++
 include/drm/i915_drm.h                  |   15 ++
 6 files changed, 321 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_userptr.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 0f2c549..754d665 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -14,6 +14,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o \
 	  i915_gem_gtt.o \
 	  i915_gem_stolen.o \
 	  i915_gem_tiling.o \
+	  i915_gem_userptr.o \
 	  i915_sysfs.o \
 	  i915_trace_points.o \
 	  intel_display.o \
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 69969be..b6e284a 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1899,6 +1899,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_ROOT_ONLY|DRM_UNLOCKED),
 };
 
 int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5084b29..77c78d9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -40,6 +40,7 @@
 #include <linux/backlight.h>
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
+#include <linux/mmu_notifier.h>
 
 /* General customization:
  */
@@ -927,6 +928,7 @@ struct drm_i915_gem_object_ops {
 	 */
 	int (*get_pages)(struct drm_i915_gem_object *);
 	void (*put_pages)(struct drm_i915_gem_object *);
+	void (*release)(struct drm_i915_gem_object *);
 };
 
 struct drm_i915_gem_object {
@@ -1072,6 +1074,23 @@ struct drm_i915_gem_object {
 	atomic_t pending_flip;
 };
 
+struct i915_gem_userptr_object {
+	struct drm_i915_gem_object gem;
+	uintptr_t user_ptr;
+	size_t user_size;
+	int read_only;
+
+	struct mm_struct *mm;
+#if defined(CONFIG_MMU_NOTIFIER)
+	struct mmu_notifier mn;
+#endif
+};
+
+union drm_i915_gem_objects {
+	struct drm_i915_gem_object base;
+	struct i915_gem_userptr_object userptr;
+};
+
 inline static bool i915_gem_object_is_prime(struct drm_i915_gem_object *obj)
 {
 	return obj->base.import_attach != NULL;
@@ -1333,6 +1352,8 @@ int i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv);
 int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv);
+int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
 int i915_gem_set_tiling(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
 int i915_gem_get_tiling(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8183c0f..cda4dc8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2565,9 +2565,9 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 	/* Avoid an unnecessary call to unbind on rebind. */
 	obj->map_and_fenceable = true;
 
+	obj->gtt_offset -= obj->gtt_space->start;
 	drm_mm_put_block(obj->gtt_space);
 	obj->gtt_space = NULL;
-	obj->gtt_offset = 0;
 
 	return 0;
 }
@@ -3083,7 +3083,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
 	list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list);
 	list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
 
-	obj->gtt_offset = obj->gtt_space->start;
+	obj->gtt_offset += obj->gtt_space->start;
 
 	fenceable =
 		obj->gtt_space->size == fence_size &&
@@ -3876,6 +3876,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	if (obj->base.import_attach)
 		drm_prime_gem_destroy(&obj->base, NULL);
 
+	if (obj->ops->release)
+		obj->ops->release(obj);
+
 	drm_gem_object_release(&obj->base);
 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
 
@@ -4263,7 +4266,7 @@ i915_gem_load(struct drm_device *dev)
 
 	dev_priv->slab =
 		kmem_cache_create("i915_gem_object",
-				  sizeof(struct drm_i915_gem_object), 0,
+				  sizeof(union drm_i915_gem_objects), 0,
 				  SLAB_HWCACHE_ALIGN,
 				  NULL);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
new file mode 100644
index 0000000..645290a
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -0,0 +1,277 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "drm.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+#include <linux/mmu_notifier.h>
+#include <linux/swap.h>
+
+static struct i915_gem_userptr_object *to_userptr_object(struct drm_i915_gem_object *obj)
+{
+	return container_of(obj, struct i915_gem_userptr_object, gem);
+}
+
+#if defined(CONFIG_MMU_NOTIFIER)
+static void i915_gem_userptr_mn_release(struct mmu_notifier *mn,
+					struct mm_struct *mm)
+{
+	struct i915_gem_userptr_object *vmap;
+
+	vmap = container_of(mn, struct i915_gem_userptr_object, mn);
+	BUG_ON(vmap->mm != mm);
+	vmap->mm = NULL;
+
+	/* XXX Schedule an eventual unbind? E.g. hook into require request?
+	 * However, locking will be complicated.
+	 */
+}
+
+static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
+	.release = i915_gem_userptr_mn_release,
+};
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct i915_gem_userptr_object *vmap)
+{
+	if (vmap->mm) {
+		mmu_notifier_unregister(&vmap->mn, vmap->mm);
+		BUG_ON(vmap->mm);
+	}
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct i915_gem_userptr_object *vmap)
+{
+	vmap->mn.ops = &i915_gem_userptr_notifier;
+	return mmu_notifier_register(&vmap->mn, vmap->mm);
+}
+
+#else
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct i915_gem_userptr_object *vmap)
+{
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct i915_gem_userptr_object *vmap)
+{
+	return 0;
+}
+#endif
+
+static int
+i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
+{
+	struct i915_gem_userptr_object *vmap = to_userptr_object(obj);
+	int num_pages = obj->base.size >> PAGE_SHIFT;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	struct page **pvec;
+	int n, pinned, ret;
+
+	if (vmap->mm == NULL)
+		return -EFAULT;
+
+	if (!access_ok(vmap->read_only ? VERIFY_READ : VERIFY_WRITE,
+		       (char __user *)vmap->user_ptr, vmap->user_size))
+		return -EFAULT;
+
+	/* If userspace should engineer that these pages are replaced in
+	 * the vma between us binding this page into the GTT and completion
+	 * of rendering... Their loss. If they change the mapping of their
+	 * pages they need to create a new bo to point to the new vma.
+	 *
+	 * However, that still leaves open the possibility of the vma
+	 * being copied upon fork. Which falls under the same userspace
+	 * synchronisation issue as a regular bo, except that this time
+	 * the process may not be expecting that a particular piece of
+	 * memory is tied to the GPU.
+	 */
+
+	pvec = kmalloc(num_pages*sizeof(struct page *),
+		       GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
+	if (pvec == NULL) {
+		pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
+		if (pvec == NULL)
+			return -ENOMEM;
+	}
+
+	pinned = 0;
+	if (vmap->mm == current->mm)
+		pinned = __get_user_pages_fast(vmap->user_ptr, num_pages,
+					       !vmap->read_only, pvec);
+	if (pinned < num_pages) {
+		struct mm_struct *mm = vmap->mm;
+		ret = 0;
+		mutex_unlock(&obj->base.dev->struct_mutex);
+		down_read(&mm->mmap_sem);
+		if (vmap->mm != NULL)
+			ret = get_user_pages(current, mm,
+					     vmap->user_ptr + (pinned << PAGE_SHIFT),
+					     num_pages - pinned,
+					     !vmap->read_only, 0,
+					     pvec + pinned,
+					     NULL);
+		up_read(&mm->mmap_sem);
+		mutex_lock(&obj->base.dev->struct_mutex);
+		if (ret > 0)
+			pinned += ret;
+
+		if (obj->pages || pinned < num_pages) {
+			ret = obj->pages ? 0 : -EFAULT;
+			goto cleanup_pinned;
+		}
+	}
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (st == NULL) {
+		ret = -ENOMEM;
+		goto cleanup_pinned;
+	}
+
+	if (sg_alloc_table(st, num_pages, GFP_KERNEL)) {
+		ret = -ENOMEM;
+		goto cleanup_st;
+	}
+
+	for_each_sg(st->sgl, sg, num_pages, n)
+		sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
+	drm_free_large(pvec);
+
+	obj->pages = st;
+	return 0;
+
+cleanup_st:
+	kfree(st);
+cleanup_pinned:
+	release_pages(pvec, pinned, 0);
+	drm_free_large(pvec);
+	return ret;
+}
+
+static void
+i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
+{
+	struct scatterlist *sg;
+	int i;
+
+	if (obj->madv != I915_MADV_WILLNEED)
+		obj->dirty = 0;
+
+	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
+		struct page *page = sg_page(sg);
+
+		if (obj->dirty)
+			set_page_dirty(page);
+
+		mark_page_accessed(page);
+		page_cache_release(page);
+	}
+	obj->dirty = 0;
+
+	sg_free_table(obj->pages);
+	kfree(obj->pages);
+}
+
+static void
+i915_gem_userptr_release(struct drm_i915_gem_object *obj)
+{
+	struct i915_gem_userptr_object *vmap = to_userptr_object(obj);
+
+	i915_gem_userptr_release__mmu_notifier(vmap);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
+	.get_pages = i915_gem_userptr_get_pages,
+	.put_pages = i915_gem_userptr_put_pages,
+	.release = i915_gem_userptr_release,
+};
+
+/**
+ * Creates a new mm object that wraps some user memory.
+ */
+int
+i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_userptr *args = data;
+	struct i915_gem_userptr_object *obj;
+	loff_t first_data_page, last_data_page;
+	int num_pages;
+	int ret;
+	u32 handle;
+
+	first_data_page = args->user_ptr / PAGE_SIZE;
+	last_data_page = (args->user_ptr + args->user_size - 1) / PAGE_SIZE;
+	num_pages = last_data_page - first_data_page + 1;
+	if (num_pages * PAGE_SIZE > dev_priv->mm.gtt_total)
+		return -E2BIG;
+
+	ret = fault_in_multipages_readable((char __user *)(uintptr_t)args->user_ptr,
+					   args->user_size);
+	if (ret)
+		return ret;
+
+	/* Allocate the new object */
+	obj = i915_gem_object_alloc(dev);
+	if (obj == NULL)
+		return -ENOMEM;
+
+	if (drm_gem_private_object_init(dev, &obj->gem.base,
+					num_pages * PAGE_SIZE)) {
+		i915_gem_object_free(&obj->gem);
+		return -ENOMEM;
+	}
+
+	i915_gem_object_init(&obj->gem, &i915_gem_userptr_ops);
+	obj->gem.cache_level = I915_CACHE_LLC_MLC;
+
+	obj->gem.gtt_offset = offset_in_page(args->user_ptr);
+	obj->user_ptr = args->user_ptr;
+	obj->user_size = args->user_size;
+	obj->read_only = args->flags & I915_USERPTR_READ_ONLY;
+
+	/* And keep a pointer to the current->mm for resolving the user pages
+	 * at binding. This means that we need to hook into the mmu_notifier
+	 * in order to detect if the mmu is destroyed.
+	 */
+	obj->mm = current->mm;
+	ret = i915_gem_userptr_init__mmu_notifier(obj);
+	if (ret)
+		return ret;
+
+	ret = drm_gem_handle_create(file, &obj->gem.base, &handle);
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference(&obj->gem.base);
+	if (ret)
+		return ret;
+
+	args->handle = handle;
+	return 0;
+}
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 0e6e135..4f41f8d 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -206,6 +206,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_GEM_SET_CACHEING	0x2f
 #define DRM_I915_GEM_GET_CACHEING	0x30
 #define DRM_I915_REG_READ		0x31
+#define DRM_I915_GEM_USERPTR		0x32
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -255,6 +256,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create)
 #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
 #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
+#define DRM_IOCTL_I915_GEM_USERPTR			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -476,6 +478,19 @@ struct drm_i915_gem_mmap_gtt {
 	__u64 offset;
 };
 
+struct drm_i915_gem_userptr {
+	__u64 user_ptr;
+	__u32 user_size;
+	__u32 flags;
+#define I915_USERPTR_READ_ONLY 0x1
+	/**
+	 * Returned handle for the object.
+	 *
+	 * Object handles are nonzero.
+	 */
+	__u32 handle;
+};
+
 struct drm_i915_gem_set_domain {
 	/** Handle for the object */
 	__u32 handle;
-- 
1.7.10.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* Re: [PATCH 18/18] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2012-10-19 17:03 ` [PATCH 18/18] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl Chris Wilson
@ 2012-10-22 22:21   ` Jesse Barnes
  2012-10-23  7:20     ` Daniel Vetter
  0 siblings, 1 reply; 51+ messages in thread
From: Jesse Barnes @ 2012-10-22 22:21 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, 19 Oct 2012 18:03:24 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> By exporting the ability to map user address and inserting PTEs
> representing their backing pages into the GTT, we can exploit UMA in order
> to utilize normal application data as a texture source or even as a
> render target (depending upon the capabilities of the chipset). This has
> a number of uses, with zero-copy downloads to the GPU and efficient
> readback making the intermixed streaming of CPU and GPU operations
> fairly efficient. This ability has many widespread implications from
> faster rendering of client-side software rasterisers (chromium),
> mitigation of stalls due to read back (firefox) and to faster pipelining
> of texture data (such as pixel buffer objects in GL).
> 
> v2: Compile with CONFIG_MMU_NOTIFIER

I want to understand the root-only nature of this better.

Is locking complexity the only reason we can't treat these pages like
normal BOs which can be swapped out from under us as long as they're
not pinned into the GTT?  Or are there other complications in managing
the refcount for these pages?

Reminds me: do we also check our bo allocations against the current
task's address space, data, file size, locked memory, file count,
and rss limits?  I dug into the shmem code at one point and seem to
remember that we didn't.  If not, it might be a good thing to add under
a new config option at some point.

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 18/18] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2012-10-22 22:21   ` Jesse Barnes
@ 2012-10-23  7:20     ` Daniel Vetter
  2012-10-23 17:50       ` Eric Anholt
  0 siblings, 1 reply; 51+ messages in thread
From: Daniel Vetter @ 2012-10-23  7:20 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: intel-gfx

On Tue, Oct 23, 2012 at 12:21 AM, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> On Fri, 19 Oct 2012 18:03:24 +0100
> Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
>> By exporting the ability to map user address and inserting PTEs
>> representing their backing pages into the GTT, we can exploit UMA in order
>> to utilize normal application data as a texture source or even as a
>> render target (depending upon the capabilities of the chipset). This has
>> a number of uses, with zero-copy downloads to the GPU and efficient
>> readback making the intermixed streaming of CPU and GPU operations
>> fairly efficient. This ability has many widespread implications from
>> faster rendering of client-side software rasterisers (chromium),
>> mitigation of stalls due to read back (firefox) and to faster pipelining
>> of texture data (such as pixel buffer objects in GL).
>>
>> v2: Compile with CONFIG_MMU_NOTIFIER
>
> I want to understand the root-only nature of this better.
>
> Is locking complexity the only reason we can't treat these pages like
> normal BOs which can be swapped out from under us as long as they're
> not pinned into the GTT?  Or are there other complications in managing
> the refcount for these pages?

Essentially the complication in pinning tons of memory that we don't
control, and hence might upset other kernel mm parts (like page
migration). For native objects we can always fix this by intercepting
more of the vm callbacks for the shmem backing storage (i.e. gemfs).
But for foreing storage we need to use the mmu_notifiers, and atm
those expect that a pte shootdown is synchronous (which we can't do),
so there's some work left to get done imo.

> Reminds me: do we also check our bo allocations against the current
> task's address space, data, file size, locked memory, file count,
> and rss limits?  I dug into the shmem code at one point and seem to
> remember that we didn't.  If not, it might be a good thing to add under
> a new config option at some point.

Nope, we don't limit nor properly track userspace bo allocations. If
we run up against the OOM killer, it tends to kill everything _but_
the gem process hogging a few GB, since all those allocations are not
associated with a process (e.g. for execbuf or for gtt mmaps, cpu
shmem mmaps should account properly).
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 18/18] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2012-10-23  7:20     ` Daniel Vetter
@ 2012-10-23 17:50       ` Eric Anholt
  0 siblings, 0 replies; 51+ messages in thread
From: Eric Anholt @ 2012-10-23 17:50 UTC (permalink / raw)
  To: Daniel Vetter, Jesse Barnes; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 1844 bytes --]

Daniel Vetter <daniel@ffwll.ch> writes:

> On Tue, Oct 23, 2012 at 12:21 AM, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
>> On Fri, 19 Oct 2012 18:03:24 +0100
>> Chris Wilson <chris@chris-wilson.co.uk> wrote:
>>
>>> By exporting the ability to map user address and inserting PTEs
>>> representing their backing pages into the GTT, we can exploit UMA in order
>>> to utilize normal application data as a texture source or even as a
>>> render target (depending upon the capabilities of the chipset). This has
>>> a number of uses, with zero-copy downloads to the GPU and efficient
>>> readback making the intermixed streaming of CPU and GPU operations
>>> fairly efficient. This ability has many widespread implications from
>>> faster rendering of client-side software rasterisers (chromium),
>>> mitigation of stalls due to read back (firefox) and to faster pipelining
>>> of texture data (such as pixel buffer objects in GL).
>>>
>>> v2: Compile with CONFIG_MMU_NOTIFIER
>>
>> I want to understand the root-only nature of this better.
>>
>> Is locking complexity the only reason we can't treat these pages like
>> normal BOs which can be swapped out from under us as long as they're
>> not pinned into the GTT?  Or are there other complications in managing
>> the refcount for these pages?
>
> Essentially the complication in pinning tons of memory that we don't
> control, and hence might upset other kernel mm parts (like page
> migration). For native objects we can always fix this by intercepting
> more of the vm callbacks for the shmem backing storage (i.e. gemfs).
> But for foreing storage we need to use the mmu_notifiers, and atm
> those expect that a pte shootdown is synchronous (which we can't do),
> so there's some work left to get done imo.

So, the code isn't done, and the solution is to push it as root-only?
Seriously?

[-- Attachment #1.2: Type: application/pgp-signature, Size: 197 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 17/18] drm/i915: Use a slab for object allocation
  2012-10-19 17:03 ` [PATCH 17/18] drm/i915: Use a slab for object allocation Chris Wilson
@ 2012-10-24 20:21   ` Paulo Zanoni
  2012-11-05 17:49   ` Ben Widawsky
  2012-11-05 18:07   ` Ben Widawsky
  2 siblings, 0 replies; 51+ messages in thread
From: Paulo Zanoni @ 2012-10-24 20:21 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

Hi

2012/10/19 Chris Wilson <chris@chris-wilson.co.uk>:
> The primary purpose of this was to debug some use-after-free memory
> corruption that was causing an OOPS inside drm/i915. As it turned out
> the corruption was being caused elsewhere and i915.ko as a major user of
> many objects was being hit hardest.
>
> Indeed as we do frequent the generic kmalloc caches, dedicating one to
> ourselves (or at least naming one for us depending upon the core) aids
> debugging our own slab usage.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

So I applied patches 1-17 in 2 machines (SNB and HSW). Patch 18 does
not apply anymore, it needs to be rebased, so I did not apply it. Both
machines boot and work fine without any unusual errors.

While running "dmesg | grep stolen" one machines tells me it found
32mb and it matches the "pre-allocated" option on my BIOS. The other
machine gives me 64mb but I couldn't find anything related to this on
the BIOS. In both cases the PCI message "detected XXXK stolen memory"
matches the amount print by i915_gem_init_stolen.

On the SNB machine which had 64mb of stolen memory I also see
"reserving 33554432 bytes of contiguous stolen space for FBC".

I'm not sure if this is enough for a tested-by tag, so feel free to
add if you think this test was enough. Please tell me if I need to
test something else.
Tested-by: Paulo Zanoni <paulo.r.zanoni@intel.com>

Thanks,
Paulo

> ---
>  drivers/gpu/drm/i915/i915_dma.c        |    3 +++
>  drivers/gpu/drm/i915/i915_drv.h        |    4 ++++
>  drivers/gpu/drm/i915/i915_gem.c        |   28 +++++++++++++++++++++++-----
>  drivers/gpu/drm/i915/i915_gem_dmabuf.c |    5 ++---
>  drivers/gpu/drm/i915/i915_gem_stolen.c |    4 ++--
>  5 files changed, 34 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 14271aa..69969be 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1776,6 +1776,9 @@ int i915_driver_unload(struct drm_device *dev)
>
>         destroy_workqueue(dev_priv->wq);
>
> +       if (dev_priv->slab)
> +               kmem_cache_destroy(dev_priv->slab);
> +
>         pci_dev_put(dev_priv->bridge_dev);
>         kfree(dev->dev_private);
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 5a0e0cd..5084b29 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -398,6 +398,7 @@ struct intel_gmbus {
>
>  typedef struct drm_i915_private {
>         struct drm_device *dev;
> +       struct kmem_cache *slab;
>
>         const struct intel_device_info *info;
>
> @@ -1341,12 +1342,15 @@ int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>  int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
>                         struct drm_file *file_priv);
>  void i915_gem_load(struct drm_device *dev);
> +void *i915_gem_object_alloc(struct drm_device *dev);
> +void i915_gem_object_free(struct drm_i915_gem_object *obj);
>  int i915_gem_init_object(struct drm_gem_object *obj);
>  void i915_gem_object_init(struct drm_i915_gem_object *obj,
>                          const struct drm_i915_gem_object_ops *ops);
>  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>                                                   size_t size);
>  void i915_gem_free_object(struct drm_gem_object *obj);
> +
>  int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
>                                      uint32_t alignment,
>                                      bool map_and_fenceable,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 0349899..8183c0f 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -193,6 +193,18 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>         return 0;
>  }
>
> +void *i915_gem_object_alloc(struct drm_device *dev)
> +{
> +       struct drm_i915_private *dev_priv = dev->dev_private;
> +       return kmem_cache_alloc(dev_priv->slab, GFP_KERNEL | __GFP_ZERO);
> +}
> +
> +void i915_gem_object_free(struct drm_i915_gem_object *obj)
> +{
> +       struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> +       kmem_cache_free(dev_priv->slab, obj);
> +}
> +
>  static int
>  i915_gem_create(struct drm_file *file,
>                 struct drm_device *dev,
> @@ -216,7 +228,7 @@ i915_gem_create(struct drm_file *file,
>         if (ret) {
>                 drm_gem_object_release(&obj->base);
>                 i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
> -               kfree(obj);
> +               i915_gem_object_free(obj);
>                 return ret;
>         }
>
> @@ -3780,12 +3792,12 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>         struct address_space *mapping;
>         u32 mask;
>
> -       obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> +       obj = i915_gem_object_alloc(dev);
>         if (obj == NULL)
>                 return NULL;
>
>         if (drm_gem_object_init(dev, &obj->base, size) != 0) {
> -               kfree(obj);
> +               i915_gem_object_free(obj);
>                 return NULL;
>         }
>
> @@ -3868,7 +3880,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>         i915_gem_info_remove_obj(dev_priv, obj->base.size);
>
>         kfree(obj->bit_17);
> -       kfree(obj);
> +       i915_gem_object_free(obj);
>  }
>
>  int
> @@ -4246,8 +4258,14 @@ init_ring_lists(struct intel_ring_buffer *ring)
>  void
>  i915_gem_load(struct drm_device *dev)
>  {
> -       int i;
>         drm_i915_private_t *dev_priv = dev->dev_private;
> +       int i;
> +
> +       dev_priv->slab =
> +               kmem_cache_create("i915_gem_object",
> +                                 sizeof(struct drm_i915_gem_object), 0,
> +                                 SLAB_HWCACHE_ALIGN,
> +                                 NULL);
>
>         INIT_LIST_HEAD(&dev_priv->mm.active_list);
>         INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
> diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> index ca3497e..f307e31 100644
> --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> @@ -276,8 +276,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
>         if (IS_ERR(attach))
>                 return ERR_CAST(attach);
>
> -
> -       obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> +       obj = i915_gem_object_alloc(dev);
>         if (obj == NULL) {
>                 ret = -ENOMEM;
>                 goto fail_detach;
> @@ -285,7 +284,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
>
>         ret = drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
>         if (ret) {
> -               kfree(obj);
> +               i915_gem_object_free(obj);
>                 goto fail_detach;
>         }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index 86c4af4..d7cfa71 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -261,7 +261,7 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
>  {
>         struct drm_i915_gem_object *obj;
>
> -       obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> +       obj = i915_gem_object_alloc(dev);
>         if (obj == NULL)
>                 return NULL;
>
> @@ -286,7 +286,7 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
>         return obj;
>
>  cleanup:
> -       kfree(obj);
> +       i915_gem_object_free(obj);
>         return NULL;
>  }
>
> --
> 1.7.10.4
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx



-- 
Paulo Zanoni

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 01/18] drm: Introduce drm_mm_create_block()
  2012-10-19 17:03 [PATCH 01/18] drm: Introduce drm_mm_create_block() Chris Wilson
                   ` (16 preceding siblings ...)
  2012-10-19 17:03 ` [PATCH 18/18] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl Chris Wilson
@ 2012-10-26 21:47 ` Ben Widawsky
  2012-10-28  9:57   ` Chris Wilson
  17 siblings, 1 reply; 51+ messages in thread
From: Ben Widawsky @ 2012-10-26 21:47 UTC (permalink / raw)
  To: Chris Wilson; +Cc: dri-devel, Dave Airlie, intel-gfx

On Fri, 19 Oct 2012 18:03:07 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> To be used later by i915 to preallocate exact blocks of space from the
> range manager.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Dave Airlie <airlied@redhat.com>
> Cc: dri-devel@lists.freedestkop.org

With bikesheds below addressed or not:
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
> ---
>  drivers/gpu/drm/drm_mm.c |   49
> ++++++++++++++++++++++++++++++++++++++++++++++
> include/drm/drm_mm.h     |    4 ++++ 2 files changed, 53 insertions(+)
> 
> diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
> index 9bb82f7..5db8c20 100644
> --- a/drivers/gpu/drm/drm_mm.c
> +++ b/drivers/gpu/drm/drm_mm.c
> @@ -161,6 +161,55 @@ static void drm_mm_insert_helper(struct
> drm_mm_node *hole_node, }
>  }
>  
> +struct drm_mm_node *drm_mm_create_block(struct drm_mm *mm,
> +					unsigned long start,
> +					unsigned long size,
> +					bool atomic)
> +{

<bikeshed>
You could add a best_fit field like some of the other interfaces which
will try to find a start == hole_start and end == hole_end. I'd guess
this interface won't be called enough to worry about fragmentation too
much though.
</bikeshed>

> +	struct drm_mm_node *hole, *node;
> +	unsigned long end = start + size;
> +
> +	list_for_each_entry(hole, &mm->hole_stack, hole_stack) {
> +		unsigned long hole_start;
> +		unsigned long hole_end;
> +
> +		BUG_ON(!hole->hole_follows);

<bikeshed>
This isn't bad, but I don't think sticking the bug here is all that
helpful in finding where the bug occured, since it wasn't here. WARN is
perhaps more useful, but equally unhelpful IMO.
</bikeshed>

> +		hole_start = drm_mm_hole_node_start(hole);
> +		hole_end = drm_mm_hole_node_end(hole);
> +
> +		if (hole_start > start || hole_end < end)
> +			continue;
> +
> +		node = drm_mm_kmalloc(mm, atomic);
> +		if (unlikely(node == NULL))
> +			return NULL;
> +
> +		node->start = start;
> +		node->size = size;
> +		node->mm = mm;
> +		node->allocated = 1;
> +
> +		INIT_LIST_HEAD(&node->hole_stack);
> +		list_add(&node->node_list, &hole->node_list);
> +
> +		if (start == hole_start) {
> +			hole->hole_follows = 0;
> +			list_del_init(&hole->hole_stack);
> +		}
> +
> +		node->hole_follows = 0;
> +		if (end != hole_end) {
> +			list_add(&node->hole_stack, &mm->hole_stack);
> +			node->hole_follows = 1;
> +		}
> +
> +		return node;
> +	}
> +
> +	return NULL;
> +}
> +EXPORT_SYMBOL(drm_mm_create_block);
> +
>  struct drm_mm_node *drm_mm_get_block_generic(struct drm_mm_node
> *hole_node, unsigned long size,
>  					     unsigned alignment,
> diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
> index 06d7f79..4020f96 100644
> --- a/include/drm/drm_mm.h
> +++ b/include/drm/drm_mm.h
> @@ -102,6 +102,10 @@ static inline bool drm_mm_initialized(struct
> drm_mm *mm) /*
>   * Basic range manager support (drm_mm.c)
>   */
> +extern struct drm_mm_node *drm_mm_create_block(struct drm_mm *mm,
> +					       unsigned long start,
> +					       unsigned long size,
> +					       bool atomic);
>  extern struct drm_mm_node *drm_mm_get_block_generic(struct
> drm_mm_node *node, unsigned long size,
>  						    unsigned
> alignment,



-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 03/18] drm/i915: Fix location of stolen memory register for SandyBridge+
  2012-10-19 17:03 ` [PATCH 03/18] drm/i915: Fix location of stolen memory register for SandyBridge+ Chris Wilson
@ 2012-10-26 21:58   ` Ben Widawsky
  2012-10-28  9:48     ` Chris Wilson
  0 siblings, 1 reply; 51+ messages in thread
From: Ben Widawsky @ 2012-10-26 21:58 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, 19 Oct 2012 18:03:09 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> A few of the earlier registers where enlarged and so the Base Data of
> Stolen Memory Register (BDSM) was pushed to 0xb0.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

This patch seems irrelevant to me. I have a i915_stolen_to_phys which
already looks correct (git blame shows you last updated it in April).

Can you help unconfuse me?

> ---
>  drivers/gpu/drm/i915/i915_gem_stolen.c |   12 +++++++++++-
>  1 file changed, 11 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c
> b/drivers/gpu/drm/i915/i915_gem_stolen.c index a01ff74..d023ed6 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -63,7 +63,14 @@ static unsigned long
> i915_stolen_to_physical(struct drm_device *dev)
>  	 * its value of TOLUD.
>  	 */
>  	base = 0;
> -	if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
> +	if (INTEL_INFO(dev)->gen >= 6) {
> +		/* Read Base Data of Stolen Memory Register (BDSM)
> directly.
> +		 * Note that there is also a MCHBAR miror at
> 0x1080c0 or
> +		 * we could use device 2:0x5c instead.
> +		*/
> +		pci_read_config_dword(pdev, 0xB0, &base);
> +		base &= ~4095; /* lower bits used for locking
> register */
> +	} else if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
>  		/* Read Graphics Base of Stolen Memory directly */
>  		pci_read_config_dword(pdev, 0xA4, &base);
>  #if 0
> @@ -172,6 +179,9 @@ int i915_gem_init_stolen(struct drm_device *dev)
>  	if (dev_priv->mm.stolen_base == 0)
>  		return 0;
>  
> +	DRM_DEBUG_KMS("found %d bytes of stolen memory at %08lx\n",
> +		      dev_priv->mm.gtt->stolen_size,
> dev_priv->mm.stolen_base); +
>  	/* Basic memrange allocator for stolen space */
>  	drm_mm_init(&dev_priv->mm.stolen, 0, prealloc_size);
>  



-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 04/18] drm/i915: Avoid clearing preallocated regions from the GTT
  2012-10-19 17:03 ` [PATCH 04/18] drm/i915: Avoid clearing preallocated regions from the GTT Chris Wilson
@ 2012-10-26 22:22   ` Ben Widawsky
  0 siblings, 0 replies; 51+ messages in thread
From: Ben Widawsky @ 2012-10-26 22:22 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, 19 Oct 2012 18:03:10 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> ---
>  drivers/gpu/drm/i915/i915_drv.h     |    2 ++
>  drivers/gpu/drm/i915/i915_gem_gtt.c |   35 ++++++++++++++++++++++++++++++++---
>  2 files changed, 34 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 687f379..589b2f3 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -908,6 +908,8 @@ enum i915_cache_level {
>  	I915_CACHE_LLC_MLC, /* gen6+, in docs at least! */
>  };
>  
> +#define I915_GTT_RESERVED ((struct drm_mm_node *)0x1)
> +
>  struct drm_i915_gem_object_ops {
>  	/* Interface between the GEM object and its backing storage.
>  	 * get_pages() is called once prior to the use of the associated set
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 47e427e..d9d3fc7 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -378,18 +378,47 @@ void i915_gem_init_global_gtt(struct drm_device *dev,
>  			      unsigned long end)
>  {
>  	drm_i915_private_t *dev_priv = dev->dev_private;
> +	struct drm_mm_node *entry;
> +	struct drm_i915_gem_object *obj;
>  
> -	/* Substract the guard page ... */
> +	/* Subtract the guard page ... */
>  	drm_mm_init(&dev_priv->mm.gtt_space, start, end - start - PAGE_SIZE);
>  	if (!HAS_LLC(dev))
>  		dev_priv->mm.gtt_space.color_adjust = i915_gtt_color_adjust;
>  
> +	/* Mark any preallocated objects as occupied */
> +	list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) {
> +		DRM_DEBUG_KMS("reserving preallocated space: %x + %zx\n",
> +			      obj->gtt_offset, obj->base.size);
> +
> +		BUG_ON(obj->gtt_space != I915_GTT_RESERVED);
> +		obj->gtt_space = drm_mm_create_block(&dev_priv->mm.gtt_space,
> +						     obj->gtt_offset,
> +						     obj->base.size,
> +						     false);
> +		obj->has_global_gtt_mapping = 1;
> +	}
> +

I think this requires more explanation. I believe on driver init the
bound_list should be empty. What am I missing? If this is for some
future thing where you're going to bind objects before we init the gtt,
a comment in the commit message would have been really terrific.

>  	dev_priv->mm.gtt_start = start;
>  	dev_priv->mm.gtt_mappable_end = mappable_end;
>  	dev_priv->mm.gtt_end = end;
>  	dev_priv->mm.gtt_total = end - start;
>  	dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start;
>  
> -	/* ... but ensure that we clear the entire range. */
> -	intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE);
> +	/* Clear any non-preallocated blocks */
> +	list_for_each_entry(entry, &dev_priv->mm.gtt_space.hole_stack, hole_stack) {
> +		unsigned long hole_start = entry->start + entry->size;
> +		unsigned long hole_end = list_entry(entry->node_list.next,
> +						    struct drm_mm_node,
> +						    node_list)->start;
> +
> +		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
> +			      hole_start, hole_end);
> +
> +		intel_gtt_clear_range(hole_start / PAGE_SIZE, +
> (hole_end-hole_start) / PAGE_SIZE); +	} + +	/* And finally clear the
> reserved guard page */ +	intel_gtt_clear_range(end / PAGE_SIZE -
> 1, 1); }

-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 03/18] drm/i915: Fix location of stolen memory register for SandyBridge+
  2012-10-26 21:58   ` Ben Widawsky
@ 2012-10-28  9:48     ` Chris Wilson
  2012-10-28 17:52       ` Ben Widawsky
  2012-11-02  0:08       ` Ben Widawsky
  0 siblings, 2 replies; 51+ messages in thread
From: Chris Wilson @ 2012-10-28  9:48 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: intel-gfx

On Fri, 26 Oct 2012 14:58:45 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> On Fri, 19 Oct 2012 18:03:09 +0100
> Chris Wilson <chris@chris-wilson.co.uk> wrote:
> 
> > A few of the earlier registers where enlarged and so the Base Data of
> > Stolen Memory Register (BDSM) was pushed to 0xb0.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> 
> This patch seems irrelevant to me. I have a i915_stolen_to_phys which
> already looks correct (git blame shows you last updated it in April).
> 
> Can you help unconfuse me?

As the patch suggests the current registers being used by stolen-to-phys
are incorrect for SNB+.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 01/18] drm: Introduce drm_mm_create_block()
  2012-10-26 21:47 ` [PATCH 01/18] drm: Introduce drm_mm_create_block() Ben Widawsky
@ 2012-10-28  9:57   ` Chris Wilson
  2012-10-28 18:12     ` Ben Widawsky
  0 siblings, 1 reply; 51+ messages in thread
From: Chris Wilson @ 2012-10-28  9:57 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: dri-devel, Dave Airlie, intel-gfx

On Fri, 26 Oct 2012 14:47:43 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> On Fri, 19 Oct 2012 18:03:07 +0100
> Chris Wilson <chris@chris-wilson.co.uk> wrote:
> 
> > To be used later by i915 to preallocate exact blocks of space from the
> > range manager.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Dave Airlie <airlied@redhat.com>
> > Cc: dri-devel@lists.freedestkop.org
> 
> With bikesheds below addressed or not:
> Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
> > ---
> >  drivers/gpu/drm/drm_mm.c |   49
> > ++++++++++++++++++++++++++++++++++++++++++++++
> > include/drm/drm_mm.h     |    4 ++++ 2 files changed, 53 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
> > index 9bb82f7..5db8c20 100644
> > --- a/drivers/gpu/drm/drm_mm.c
> > +++ b/drivers/gpu/drm/drm_mm.c
> > @@ -161,6 +161,55 @@ static void drm_mm_insert_helper(struct
> > drm_mm_node *hole_node, }
> >  }
> >  
> > +struct drm_mm_node *drm_mm_create_block(struct drm_mm *mm,
> > +					unsigned long start,
> > +					unsigned long size,
> > +					bool atomic)
> > +{
> 
> <bikeshed>
> You could add a best_fit field like some of the other interfaces which
> will try to find a start == hole_start and end == hole_end. I'd guess
> this interface won't be called enough to worry about fragmentation too
> much though.
> </bikeshed>

It's not a best fit though, it's an exact allocation request. The search
is to find the location in the free list where we need to insert the
node, and just as importantly reject the request if it would clobber an
earlier allocation.

> 
> > +	struct drm_mm_node *hole, *node;
> > +	unsigned long end = start + size;
> > +
> > +	list_for_each_entry(hole, &mm->hole_stack, hole_stack) {
> > +		unsigned long hole_start;
> > +		unsigned long hole_end;
> > +
> > +		BUG_ON(!hole->hole_follows);
> 
> <bikeshed>
> This isn't bad, but I don't think sticking the bug here is all that
> helpful in finding where the bug occured, since it wasn't here. WARN is
> perhaps more useful, but equally unhelpful IMO.
> </bikeshed>

The BUG_ON() is to be consistent with the rest of the code, and so there
isn't a conflict of interests when replacing all the common chunks with
drm_mm_for_each_hole().
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 03/18] drm/i915: Fix location of stolen memory register for SandyBridge+
  2012-10-28  9:48     ` Chris Wilson
@ 2012-10-28 17:52       ` Ben Widawsky
  2012-11-02  0:08       ` Ben Widawsky
  1 sibling, 0 replies; 51+ messages in thread
From: Ben Widawsky @ 2012-10-28 17:52 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Sun, 28 Oct 2012 09:48:35 +0000
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> On Fri, 26 Oct 2012 14:58:45 -0700, Ben Widawsky <ben@bwidawsk.net>
> wrote:
> > On Fri, 19 Oct 2012 18:03:09 +0100
> > Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > 
> > > A few of the earlier registers where enlarged and so the Base
> > > Data of Stolen Memory Register (BDSM) was pushed to 0xb0.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> > 
> > This patch seems irrelevant to me. I have a i915_stolen_to_phys
> > which already looks correct (git blame shows you last updated it in
> > April).
> > 
> > Can you help unconfuse me?
> 
> As the patch suggests the current registers being used by
> stolen-to-phys are incorrect for SNB+.
> -Chris
> 

Well no thanks to you, I found my confusion. Since I skipped patch 2 as
I don't care about gen2, this patch made no sense to me. It looks like
I need to go back and review patch 2 since among fixing detection for
gen2 you did a few other things such as changing the name
from i915_stolen_to_physical to i915_stolen_to_phys.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 01/18] drm: Introduce drm_mm_create_block()
  2012-10-28  9:57   ` Chris Wilson
@ 2012-10-28 18:12     ` Ben Widawsky
  2012-10-28 18:14       ` Ben Widawsky
  0 siblings, 1 reply; 51+ messages in thread
From: Ben Widawsky @ 2012-10-28 18:12 UTC (permalink / raw)
  To: Chris Wilson; +Cc: dri-devel, Dave Airlie, intel-gfx

On Sun, 28 Oct 2012 09:57:21 +0000
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> On Fri, 26 Oct 2012 14:47:43 -0700, Ben Widawsky <ben@bwidawsk.net>
> wrote:
> > On Fri, 19 Oct 2012 18:03:07 +0100
> > Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > 
> > > To be used later by i915 to preallocate exact blocks of space
> > > from the range manager.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Cc: Dave Airlie <airlied@redhat.com>
> > > Cc: dri-devel@lists.freedestkop.org
> > 
> > With bikesheds below addressed or not:
> > Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
> > > ---
> > >  drivers/gpu/drm/drm_mm.c |   49
> > > ++++++++++++++++++++++++++++++++++++++++++++++
> > > include/drm/drm_mm.h     |    4 ++++ 2 files changed, 53
> > > insertions(+)
> > > 
> > > diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
> > > index 9bb82f7..5db8c20 100644
> > > --- a/drivers/gpu/drm/drm_mm.c
> > > +++ b/drivers/gpu/drm/drm_mm.c
> > > @@ -161,6 +161,55 @@ static void drm_mm_insert_helper(struct
> > > drm_mm_node *hole_node, }
> > >  }
> > >  
> > > +struct drm_mm_node *drm_mm_create_block(struct drm_mm *mm,
> > > +					unsigned long start,
> > > +					unsigned long size,
> > > +					bool atomic)
> > > +{
> > 
> > <bikeshed>
> > You could add a best_fit field like some of the other interfaces
> > which will try to find a start == hole_start and end == hole_end.
> > I'd guess this interface won't be called enough to worry about
> > fragmentation too much though.
> > </bikeshed>
> 
> It's not a best fit though, it's an exact allocation request. The
> search is to find the location in the free list where we need to
> insert the node, and just as importantly reject the request if it
> would clobber an earlier allocation.

Yeah, my comment seems a bit silly now reading it again. I was
forgetting that start is a very specific place (as opposed to the
search_free case).

But, this does remind me since the hole_stack is ordered, instead of:
> +	if (hole_start > start || hole_end < end)
> +		continue;

Can't we do:
+	if (hole_start > start || hole_end < end)
+		break;

> 
> > 
> > > +	struct drm_mm_node *hole, *node;
> > > +	unsigned long end = start + size;
> > > +
> > > +	list_for_each_entry(hole, &mm->hole_stack, hole_stack) {
> > > +		unsigned long hole_start;
> > > +		unsigned long hole_end;
> > > +
> > > +		BUG_ON(!hole->hole_follows);
> > 
> > <bikeshed>
> > This isn't bad, but I don't think sticking the bug here is all that
> > helpful in finding where the bug occured, since it wasn't here.
> > WARN is perhaps more useful, but equally unhelpful IMO.
> > </bikeshed>
> 
> The BUG_ON() is to be consistent with the rest of the code, and so
> there isn't a conflict of interests when replacing all the common
> chunks with drm_mm_for_each_hole().
> -Chris
> 

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 01/18] drm: Introduce drm_mm_create_block()
  2012-10-28 18:12     ` Ben Widawsky
@ 2012-10-28 18:14       ` Ben Widawsky
  0 siblings, 0 replies; 51+ messages in thread
From: Ben Widawsky @ 2012-10-28 18:14 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

By the way, you noticed you got the dri-devel address wrong?

On Sun, 28 Oct 2012 11:12:05 -0700
Ben Widawsky <ben@bwidawsk.net> wrote:

> On Sun, 28 Oct 2012 09:57:21 +0000
> Chris Wilson <chris@chris-wilson.co.uk> wrote:
> 
> > On Fri, 26 Oct 2012 14:47:43 -0700, Ben Widawsky <ben@bwidawsk.net>
> > wrote:
> > > On Fri, 19 Oct 2012 18:03:07 +0100
> > > Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > > 
> > > > To be used later by i915 to preallocate exact blocks of space
> > > > from the range manager.
> > > > 
> > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > > Cc: Dave Airlie <airlied@redhat.com>
> > > > Cc: dri-devel@lists.freedestkop.org
> > > 
> > > With bikesheds below addressed or not:
> > > Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
> > > > ---
> > > >  drivers/gpu/drm/drm_mm.c |   49
> > > > ++++++++++++++++++++++++++++++++++++++++++++++
> > > > include/drm/drm_mm.h     |    4 ++++ 2 files changed, 53
> > > > insertions(+)
> > > > 
> > > > diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
> > > > index 9bb82f7..5db8c20 100644
> > > > --- a/drivers/gpu/drm/drm_mm.c
> > > > +++ b/drivers/gpu/drm/drm_mm.c
> > > > @@ -161,6 +161,55 @@ static void drm_mm_insert_helper(struct
> > > > drm_mm_node *hole_node, }
> > > >  }
> > > >  
> > > > +struct drm_mm_node *drm_mm_create_block(struct drm_mm *mm,
> > > > +					unsigned long start,
> > > > +					unsigned long size,
> > > > +					bool atomic)
> > > > +{
> > > 
> > > <bikeshed>
> > > You could add a best_fit field like some of the other interfaces
> > > which will try to find a start == hole_start and end == hole_end.
> > > I'd guess this interface won't be called enough to worry about
> > > fragmentation too much though.
> > > </bikeshed>
> > 
> > It's not a best fit though, it's an exact allocation request. The
> > search is to find the location in the free list where we need to
> > insert the node, and just as importantly reject the request if it
> > would clobber an earlier allocation.
> 
> Yeah, my comment seems a bit silly now reading it again. I was
> forgetting that start is a very specific place (as opposed to the
> search_free case).
> 
> But, this does remind me since the hole_stack is ordered, instead of:
> > +	if (hole_start > start || hole_end < end)
> > +		continue;
> 
> Can't we do:
> +	if (hole_start > start || hole_end < end)
> +		break;
> 
> > 
> > > 
> > > > +	struct drm_mm_node *hole, *node;
> > > > +	unsigned long end = start + size;
> > > > +
> > > > +	list_for_each_entry(hole, &mm->hole_stack, hole_stack)
> > > > {
> > > > +		unsigned long hole_start;
> > > > +		unsigned long hole_end;
> > > > +
> > > > +		BUG_ON(!hole->hole_follows);
> > > 
> > > <bikeshed>
> > > This isn't bad, but I don't think sticking the bug here is all
> > > that helpful in finding where the bug occured, since it wasn't
> > > here. WARN is perhaps more useful, but equally unhelpful IMO.
> > > </bikeshed>
> > 
> > The BUG_ON() is to be consistent with the rest of the code, and so
> > there isn't a conflict of interests when replacing all the common
> > chunks with drm_mm_for_each_hole().
> > -Chris
> > 
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 02/18] drm/i915: Fix detection of stolen base for gen2
  2012-10-19 17:03 ` [PATCH 02/18] drm/i915: Fix detection of stolen base for gen2 Chris Wilson
@ 2012-11-01 23:51   ` Ben Widawsky
  0 siblings, 0 replies; 51+ messages in thread
From: Ben Widawsky @ 2012-11-01 23:51 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, 19 Oct 2012 18:03:08 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> It was not until the G33 refresh, that a PCI config register was
> introduced that explicitly said where the stolen memory was. Prior to
> 865G there was not even a register that said where the end of usable
> low memory was and where the stolen memory began (or ended depending
> upon chipset). Before then, one has to look at the BIOS memory maps to
> find the Top of Memory. Alas that is not exported by arch/x86 and so we
> have to resort to disabling stolen memory on gen2 for the time being.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.h        |    1 +
>  drivers/gpu/drm/i915/i915_gem_stolen.c |   69 ++++++++++++++------------------
>  2 files changed, 31 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 4728d30..687f379 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -705,6 +705,7 @@ typedef struct drm_i915_private {
>  		unsigned long gtt_start;
>  		unsigned long gtt_mappable_end;
>  		unsigned long gtt_end;
> +		unsigned long stolen_base; /* limited to low memory (32-bit) */
>  
>  		struct io_mapping *gtt_mapping;
>  		phys_addr_t gtt_base_addr;
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index ada2e90..a01ff74 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -43,56 +43,43 @@
>   * for is a boon.
>   */
>  
> -#define PTE_ADDRESS_MASK		0xfffff000
> -#define PTE_ADDRESS_MASK_HIGH		0x000000f0 /* i915+ */
> -#define PTE_MAPPING_TYPE_UNCACHED	(0 << 1)
> -#define PTE_MAPPING_TYPE_DCACHE		(1 << 1) /* i830 only */
> -#define PTE_MAPPING_TYPE_CACHED		(3 << 1)
> -#define PTE_MAPPING_TYPE_MASK		(3 << 1)
> -#define PTE_VALID			(1 << 0)
> -
> -/**
> - * i915_stolen_to_phys - take an offset into stolen memory and turn it into
> - *                       a physical one
> - * @dev: drm device
> - * @offset: address to translate
> - *
> - * Some chip functions require allocations from stolen space and need the
> - * physical address of the memory in question.
> - */
> -static unsigned long i915_stolen_to_phys(struct drm_device *dev, u32 offset)
> +static unsigned long i915_stolen_to_physical(struct drm_device *dev)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct pci_dev *pdev = dev_priv->bridge_dev;
>  	u32 base;
>  
> -#if 0
>  	/* On the machines I have tested the Graphics Base of Stolen Memory
> -	 * is unreliable, so compute the base by subtracting the stolen memory
> -	 * from the Top of Low Usable DRAM which is where the BIOS places
> -	 * the graphics stolen memory.
> +	 * is unreliable, so on those compute the base by subtracting the
> +	 * stolen memory from the Top of Low Usable DRAM which is where the
> +	 * BIOS places the graphics stolen memory.
> +	 *
> +	 * On gen2, the layout is slightly different with the Graphics Segment
> +	 * immediately following Top of Memory (or Top of Usable DRAM). Note
> +	 * it appears that TOUD is only reported by 865g, so we just use the
> +	 * top of memory as determined by the e820 probe.
> +	 *
> +	 * XXX gen2 requires an unavailable symbol and 945gm fails with
> +	 * its value of TOLUD.
>  	 */
> +	base = 0;
>  	if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
> -		/* top 32bits are reserved = 0 */
> +		/* Read Graphics Base of Stolen Memory directly */
>  		pci_read_config_dword(pdev, 0xA4, &base);
> -	} else {
> -		/* XXX presume 8xx is the same as i915 */
> -		pci_bus_read_config_dword(pdev->bus, 2, 0x5C, &base);
> -	}
> -#else
> -	if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
> -		u16 val;
> -		pci_read_config_word(pdev, 0xb0, &val);
> -		base = val >> 4 << 20;
> -	} else {
> +#if 0
> +	} else if (IS_GEN3(dev)) {
>  		u8 val;
> +		/* Stolen is immediately below Top of Low Usable DRAM */
>  		pci_read_config_byte(pdev, 0x9c, &val);
>  		base = val >> 3 << 27;
> -	}
> -	base -= dev_priv->mm.gtt->stolen_size;
> +		base -= dev_priv->mm.gtt->stolen_size;
> +	} else {
> +		/* Stolen is immediately above Top of Memory */
> +		base = max_low_pfn_mapped << PAGE_SHIFT;
>  #endif
> +	}
>  
> -	return base + offset;
> +	return base;
>  }
>  

Comments on this below.

>  static void i915_warn_stolen(struct drm_device *dev)
> @@ -117,7 +104,7 @@ static void i915_setup_compression(struct drm_device *dev, int size)
>  	if (!compressed_fb)
>  		goto err;
>  
> -	cfb_base = i915_stolen_to_phys(dev, compressed_fb->start);
> +	cfb_base = dev_priv->mm.stolen_base + compressed_fb->start;
>  	if (!cfb_base)
>  		goto err_fb;
>  
> @@ -130,7 +117,7 @@ static void i915_setup_compression(struct drm_device *dev, int size)
>  		if (!compressed_llb)
>  			goto err_fb;
>  
> -		ll_base = i915_stolen_to_phys(dev, compressed_llb->start);
> +		ll_base = dev_priv->mm.stolen_base + compressed_llb->start;
>  		if (!ll_base)
>  			goto err_llb;
>  	}
> @@ -149,7 +136,7 @@ static void i915_setup_compression(struct drm_device *dev, int size)
>  	}
>  
>  	DRM_DEBUG_KMS("FBC base 0x%08lx, ll base 0x%08lx, size %dM\n",
> -		      cfb_base, ll_base, size >> 20);
> +		      (long)cfb_base, (long)ll_base, size >> 20);
>  	return;
>  
>  err_llb:
> @@ -181,6 +168,10 @@ int i915_gem_init_stolen(struct drm_device *dev)
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	unsigned long prealloc_size = dev_priv->mm.gtt->stolen_size;
>  
> +	dev_priv->mm.stolen_base = i915_stolen_to_physical(dev);
> +	if (dev_priv->mm.stolen_base == 0)
> +		return 0;
> +
>  	/* Basic memrange allocator for stolen space */
>  	drm_mm_init(&dev_priv->mm.stolen, 0, prealloc_size);
>  

Since I found viewing the diff difficult for this patch, I am going to
write the before and after (with the #if 0 blocks removed):

Before:
        if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
                u16 val;
                pci_read_config_word(pdev, 0xb0, &val);
                base = val >> 4 << 20;
        } else {
                u8 val;
                pci_read_config_byte(pdev, 0x9c, &val);
                base = val >> 3 << 27;
        }
        base -= dev_priv->mm.gtt->stolen_size;

After:
        base = 0;
        if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
                /* Read Graphics Base of Stolen Memory directly */
                pci_read_config_dword(pdev, 0xA4, &base);


I like that this is a simple helper now to calculate GSM Base. However,
I am completely baffled by this patch. The read of 0xA4 did exist
before, but was #if 0'd out appears to be the PCI capabilities pointer
as of GEN6 (I won't check before that). The before code on the other
hand at least looks correct for gen6 (BDSM - GGMS).

This seems like it will definitely break SNB+, which would be bad for
bisection.

Has git done something funny here?

-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 03/18] drm/i915: Fix location of stolen memory register for SandyBridge+
  2012-10-28  9:48     ` Chris Wilson
  2012-10-28 17:52       ` Ben Widawsky
@ 2012-11-02  0:08       ` Ben Widawsky
  2012-11-02  8:54         ` Chris Wilson
  1 sibling, 1 reply; 51+ messages in thread
From: Ben Widawsky @ 2012-11-02  0:08 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Sun, 28 Oct 2012 09:48:35 +0000
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> On Fri, 26 Oct 2012 14:58:45 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> > On Fri, 19 Oct 2012 18:03:09 +0100
> > Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > 
> > > A few of the earlier registers where enlarged and so the Base Data of
> > > Stolen Memory Register (BDSM) was pushed to 0xb0.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> > 
> > This patch seems irrelevant to me. I have a i915_stolen_to_phys which
> > already looks correct (git blame shows you last updated it in April).
> > 
> > Can you help unconfuse me?
> 
> As the patch suggests the current registers being used by stolen-to-phys
> are incorrect for SNB+.
> -Chris
> 

It looks like all you've done here is fix up the bug you left from
patch2.

-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 03/18] drm/i915: Fix location of stolen memory register for SandyBridge+
  2012-11-02  0:08       ` Ben Widawsky
@ 2012-11-02  8:54         ` Chris Wilson
  2012-11-05 13:53           ` Ben Widawsky
  0 siblings, 1 reply; 51+ messages in thread
From: Chris Wilson @ 2012-11-02  8:54 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: intel-gfx

On Thu, 1 Nov 2012 17:08:20 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> On Sun, 28 Oct 2012 09:48:35 +0000
> Chris Wilson <chris@chris-wilson.co.uk> wrote:
> 
> > On Fri, 26 Oct 2012 14:58:45 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> > > On Fri, 19 Oct 2012 18:03:09 +0100
> > > Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > > 
> > > > A few of the earlier registers where enlarged and so the Base Data of
> > > > Stolen Memory Register (BDSM) was pushed to 0xb0.
> > > > 
> > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > > Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> > > 
> > > This patch seems irrelevant to me. I have a i915_stolen_to_phys which
> > > already looks correct (git blame shows you last updated it in April).
> > > 
> > > Can you help unconfuse me?
> > 
> > As the patch suggests the current registers being used by stolen-to-phys
> > are incorrect for SNB+.
> > -Chris
> > 
> 
> It looks like all you've done here is fix up the bug you left from
> patch2.
> 
> -- 
> Ben Widawsky, Intel Open Source Technology Center
From: Chris Wilson <chris@chris-wilson.co.uk>
Subject: Re: [Intel-gfx] [PATCH 02/18] drm/i915: Fix detection of stolen base for gen2
To: Ben Widawsky <ben@bwidawsk.net>
Cc: intel-gfx@lists.freedesktop.org
In-Reply-To: <20121101165102.3cc9773c@bwidawsk.net>
References: <1350666204-8101-1-git-send-email-chris@chris-wilson.co.uk> <1350666204-8101-2-git-send-email-chris@chris-wilson.co.uk> <20121101165102.3cc9773c@bwidawsk.net>

On Thu, 1 Nov 2012 16:51:02 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> On Fri, 19 Oct 2012 18:03:08 +0100
> Chris Wilson <chris@chris-wilson.co.uk> wrote:
> 
> > It was not until the G33 refresh, that a PCI config register was
> > introduced that explicitly said where the stolen memory was. Prior to
> > 865G there was not even a register that said where the end of usable
> > low memory was and where the stolen memory began (or ended depending
> > upon chipset). Before then, one has to look at the BIOS memory maps to
> > find the Top of Memory. Alas that is not exported by arch/x86 and so we
> > have to resort to disabling stolen memory on gen2 for the time being.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h        |    1 +
> >  drivers/gpu/drm/i915/i915_gem_stolen.c |   69 ++++++++++++++------------------
> >  2 files changed, 31 insertions(+), 39 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 4728d30..687f379 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -705,6 +705,7 @@ typedef struct drm_i915_private {
> >  		unsigned long gtt_start;
> >  		unsigned long gtt_mappable_end;
> >  		unsigned long gtt_end;
> > +		unsigned long stolen_base; /* limited to low memory (32-bit) */
> >  
> >  		struct io_mapping *gtt_mapping;
> >  		phys_addr_t gtt_base_addr;
> > diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> > index ada2e90..a01ff74 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> > @@ -43,56 +43,43 @@
> >   * for is a boon.
> >   */
> >  
> > -#define PTE_ADDRESS_MASK		0xfffff000
> > -#define PTE_ADDRESS_MASK_HIGH		0x000000f0 /* i915+ */
> > -#define PTE_MAPPING_TYPE_UNCACHED	(0 << 1)
> > -#define PTE_MAPPING_TYPE_DCACHE		(1 << 1) /* i830 only */
> > -#define PTE_MAPPING_TYPE_CACHED		(3 << 1)
> > -#define PTE_MAPPING_TYPE_MASK		(3 << 1)
> > -#define PTE_VALID			(1 << 0)
> > -
> > -/**
> > - * i915_stolen_to_phys - take an offset into stolen memory and turn it into
> > - *                       a physical one
> > - * @dev: drm device
> > - * @offset: address to translate
> > - *
> > - * Some chip functions require allocations from stolen space and need the
> > - * physical address of the memory in question.
> > - */
> > -static unsigned long i915_stolen_to_phys(struct drm_device *dev, u32 offset)
> > +static unsigned long i915_stolen_to_physical(struct drm_device *dev)
> >  {
> >  	struct drm_i915_private *dev_priv = dev->dev_private;
> >  	struct pci_dev *pdev = dev_priv->bridge_dev;
> >  	u32 base;
> >  
> > -#if 0
> >  	/* On the machines I have tested the Graphics Base of Stolen Memory
> > -	 * is unreliable, so compute the base by subtracting the stolen memory
> > -	 * from the Top of Low Usable DRAM which is where the BIOS places
> > -	 * the graphics stolen memory.
> > +	 * is unreliable, so on those compute the base by subtracting the
> > +	 * stolen memory from the Top of Low Usable DRAM which is where the
> > +	 * BIOS places the graphics stolen memory.
> > +	 *
> > +	 * On gen2, the layout is slightly different with the Graphics Segment
> > +	 * immediately following Top of Memory (or Top of Usable DRAM). Note
> > +	 * it appears that TOUD is only reported by 865g, so we just use the
> > +	 * top of memory as determined by the e820 probe.
> > +	 *
> > +	 * XXX gen2 requires an unavailable symbol and 945gm fails with
> > +	 * its value of TOLUD.
> >  	 */
> > +	base = 0;
> >  	if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
> > -		/* top 32bits are reserved = 0 */
> > +		/* Read Graphics Base of Stolen Memory directly */
> >  		pci_read_config_dword(pdev, 0xA4, &base);
> > -	} else {
> > -		/* XXX presume 8xx is the same as i915 */
> > -		pci_bus_read_config_dword(pdev->bus, 2, 0x5C, &base);
> > -	}
> > -#else
> > -	if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
> > -		u16 val;
> > -		pci_read_config_word(pdev, 0xb0, &val);
> > -		base = val >> 4 << 20;
> > -	} else {
> > +#if 0
> > +	} else if (IS_GEN3(dev)) {
> >  		u8 val;
> > +		/* Stolen is immediately below Top of Low Usable DRAM */
> >  		pci_read_config_byte(pdev, 0x9c, &val);
> >  		base = val >> 3 << 27;
> > -	}
> > -	base -= dev_priv->mm.gtt->stolen_size;
> > +		base -= dev_priv->mm.gtt->stolen_size;
> > +	} else {
> > +		/* Stolen is immediately above Top of Memory */
> > +		base = max_low_pfn_mapped << PAGE_SHIFT;
> >  #endif
> > +	}
> >  
> > -	return base + offset;
> > +	return base;
> >  }
> >  
> 
> Comments on this below.
> 
> >  static void i915_warn_stolen(struct drm_device *dev)
> > @@ -117,7 +104,7 @@ static void i915_setup_compression(struct drm_device *dev, int size)
> >  	if (!compressed_fb)
> >  		goto err;
> >  
> > -	cfb_base = i915_stolen_to_phys(dev, compressed_fb->start);
> > +	cfb_base = dev_priv->mm.stolen_base + compressed_fb->start;
> >  	if (!cfb_base)
> >  		goto err_fb;
> >  
> > @@ -130,7 +117,7 @@ static void i915_setup_compression(struct drm_device *dev, int size)
> >  		if (!compressed_llb)
> >  			goto err_fb;
> >  
> > -		ll_base = i915_stolen_to_phys(dev, compressed_llb->start);
> > +		ll_base = dev_priv->mm.stolen_base + compressed_llb->start;
> >  		if (!ll_base)
> >  			goto err_llb;
> >  	}
> > @@ -149,7 +136,7 @@ static void i915_setup_compression(struct drm_device *dev, int size)
> >  	}
> >  
> >  	DRM_DEBUG_KMS("FBC base 0x%08lx, ll base 0x%08lx, size %dM\n",
> > -		      cfb_base, ll_base, size >> 20);
> > +		      (long)cfb_base, (long)ll_base, size >> 20);
> >  	return;
> >  
> >  err_llb:
> > @@ -181,6 +168,10 @@ int i915_gem_init_stolen(struct drm_device *dev)
> >  	struct drm_i915_private *dev_priv = dev->dev_private;
> >  	unsigned long prealloc_size = dev_priv->mm.gtt->stolen_size;
> >  
> > +	dev_priv->mm.stolen_base = i915_stolen_to_physical(dev);
> > +	if (dev_priv->mm.stolen_base == 0)
> > +		return 0;
> > +
> >  	/* Basic memrange allocator for stolen space */
> >  	drm_mm_init(&dev_priv->mm.stolen, 0, prealloc_size);
> >  
> 
> Since I found viewing the diff difficult for this patch, I am going to
> write the before and after (with the #if 0 blocks removed):
> 
> Before:
>         if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
>                 u16 val;
>                 pci_read_config_word(pdev, 0xb0, &val);
>                 base = val >> 4 << 20;
>         } else {
>                 u8 val;
>                 pci_read_config_byte(pdev, 0x9c, &val);
>                 base = val >> 3 << 27;
>         }
>         base -= dev_priv->mm.gtt->stolen_size;
> 
> After:
>         base = 0;
>         if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
>                 /* Read Graphics Base of Stolen Memory directly */
>                 pci_read_config_dword(pdev, 0xA4, &base);
> 
> 
> I like that this is a simple helper now to calculate GSM Base. However,
> I am completely baffled by this patch. The read of 0xA4 did exist
> before, but was #if 0'd out appears to be the PCI capabilities pointer
> as of GEN6 (I won't check before that). The before code on the other
> hand at least looks correct for gen6 (BDSM - GGMS).
> 
> This seems like it will definitely break SNB+, which would be bad for
> bisection.

The code as it stands is broken on SNB (it only has a semblance of
working but the address it generates is random and points into active
system memory), so it seems immaterial as to whether it then remained
broken after this patch, with the benefit of then having individual
patches to address the later generations. To allay your fears, this can
be split into a preparatory patch to allow the routine to fail, and then
fail until each is fixed.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 05/18] drm: Introduce an iterator over holes in the drm_mm range manager
  2012-10-19 17:03 ` [PATCH 05/18] drm: Introduce an iterator over holes in the drm_mm range manager Chris Wilson
@ 2012-11-05 13:41   ` Ben Widawsky
  2012-11-05 15:13     ` Chris Wilson
  0 siblings, 1 reply; 51+ messages in thread
From: Ben Widawsky @ 2012-11-05 13:41 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Dave Airlie, intel-gfx

On Fri, 19 Oct 2012 18:03:11 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> This will be used i915 in forthcoming patches in order to measure the
> largest contiguous chunk of memory available for enabling chipset
> features.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Dave Airlie <airlied@redhat.com>

nitpicks below
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>


> ---
>  drivers/gpu/drm/drm_mm.c |   55 +++++++++++++++-------------------------------
>  include/drm/drm_mm.h     |   26 ++++++++++++++++++++++
>  2 files changed, 44 insertions(+), 37 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
> index 5db8c20..c3d11ec 100644
> --- a/drivers/gpu/drm/drm_mm.c
> +++ b/drivers/gpu/drm/drm_mm.c
> @@ -102,20 +102,6 @@ int drm_mm_pre_get(struct drm_mm *mm)
>  }
>  EXPORT_SYMBOL(drm_mm_pre_get);
>  
> -static inline unsigned long drm_mm_hole_node_start(struct drm_mm_node *hole_node)
> -{
> -	return hole_node->start + hole_node->size;
> -}
> -
> -static inline unsigned long drm_mm_hole_node_end(struct drm_mm_node *hole_node)
> -{
> -	struct drm_mm_node *next_node =
> -		list_entry(hole_node->node_list.next, struct drm_mm_node,
> -			   node_list);
> -
> -	return next_node->start;
> -}
> -
>  static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
>  				 struct drm_mm_node *node,
>  				 unsigned long size, unsigned alignment,
> @@ -127,7 +113,7 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
>  	unsigned long adj_start = hole_start;
>  	unsigned long adj_end = hole_end;
>  
> -	BUG_ON(!hole_node->hole_follows || node->allocated);
> +	BUG_ON(node->allocated);
>  
>  	if (mm->color_adjust)
>  		mm->color_adjust(hole_node, color, &adj_start, &adj_end);
> @@ -168,15 +154,10 @@ struct drm_mm_node *drm_mm_create_block(struct drm_mm *mm,
>  {
>  	struct drm_mm_node *hole, *node;
>  	unsigned long end = start + size;
> +	unsigned long hole_start;
> +	unsigned long hole_end;
>  
> -	list_for_each_entry(hole, &mm->hole_stack, hole_stack) {
> -		unsigned long hole_start;
> -		unsigned long hole_end;
> -
> -		BUG_ON(!hole->hole_follows);
> -		hole_start = drm_mm_hole_node_start(hole);
> -		hole_end = drm_mm_hole_node_end(hole);
> -
> +	drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
>  		if (hole_start > start || hole_end < end)
>  			continue;
>  
> @@ -361,8 +342,10 @@ void drm_mm_remove_node(struct drm_mm_node *node)
>  				== drm_mm_hole_node_end(node));
>  		list_del(&node->hole_stack);
>  	} else
> -		BUG_ON(drm_mm_hole_node_start(node)
> -				!= drm_mm_hole_node_end(node));
> +		BUG_ON(node->start + node->size !=
> +		       list_entry(node->node_list.next,
> +				  struct drm_mm_node, node_list)->start);
> +
> 

This seems harder to read to me than before. I suspect you've changed it
only because you've moved the inline functions down, but I also don't
understand the reason for that.
 
>  	if (!prev_node->hole_follows) {
>  		prev_node->hole_follows = 1;
> @@ -420,6 +403,8 @@ struct drm_mm_node *drm_mm_search_free_generic(const struct drm_mm *mm,
>  {
>  	struct drm_mm_node *entry;
>  	struct drm_mm_node *best;
> +	unsigned long adj_start;
> +	unsigned long adj_end;
>  	unsigned long best_size;
>  
>  	BUG_ON(mm->scanned_blocks);
> @@ -427,17 +412,13 @@ struct drm_mm_node *drm_mm_search_free_generic(const struct drm_mm *mm,
>  	best = NULL;
>  	best_size = ~0UL;
>  
> -	list_for_each_entry(entry, &mm->hole_stack, hole_stack) {
> -		unsigned long adj_start = drm_mm_hole_node_start(entry);
> -		unsigned long adj_end = drm_mm_hole_node_end(entry);
> -
> +	drm_mm_for_each_hole(entry, mm, adj_start, adj_end) {
>  		if (mm->color_adjust) {
>  			mm->color_adjust(entry, color, &adj_start, &adj_end);
>  			if (adj_end <= adj_start)
>  				continue;
>  		}
>  
> -		BUG_ON(!entry->hole_follows);
>  		if (!check_free_hole(adj_start, adj_end, size, alignment))
>  			continue;
>  
> @@ -464,6 +445,8 @@ struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_mm *mm,
>  {
>  	struct drm_mm_node *entry;
>  	struct drm_mm_node *best;
> +	unsigned long adj_start;
> +	unsigned long adj_end;
>  	unsigned long best_size;
>  
>  	BUG_ON(mm->scanned_blocks);
> @@ -471,13 +454,11 @@ struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_mm *mm,
>  	best = NULL;
>  	best_size = ~0UL;
>  
> -	list_for_each_entry(entry, &mm->hole_stack, hole_stack) {
> -		unsigned long adj_start = drm_mm_hole_node_start(entry) < start ?
> -			start : drm_mm_hole_node_start(entry);
> -		unsigned long adj_end = drm_mm_hole_node_end(entry) > end ?
> -			end : drm_mm_hole_node_end(entry);
> -
> -		BUG_ON(!entry->hole_follows);
> +	drm_mm_for_each_hole(entry, mm, adj_start, adj_end) {
> +		if (adj_start < start)
> +			adj_start = start;
> +		if (adj_end > end)
> +			adj_end = end;
>  
>  		if (mm->color_adjust) {
>  			mm->color_adjust(entry, color, &adj_start, &adj_end);
> diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
> index 4020f96..d710a10 100644
> --- a/include/drm/drm_mm.h
> +++ b/include/drm/drm_mm.h
> @@ -89,6 +89,19 @@ static inline bool drm_mm_initialized(struct drm_mm *mm)
>  {
>  	return mm->hole_stack.next;
>  }
> +
> +static inline unsigned long drm_mm_hole_node_start(struct drm_mm_node *hole_node)
> +{
> +	BUG_ON(!hole_node->hole_follows);
> +	return hole_node->start + hole_node->size;
> +}
> +
> +static inline unsigned long drm_mm_hole_node_end(struct drm_mm_node *hole_node)
> +{
> +	return list_entry(hole_node->node_list.next,
> +			  struct drm_mm_node, node_list)->start;
> +}
> +
>  #define drm_mm_for_each_node(entry, mm) list_for_each_entry(entry, \
>  						&(mm)->head_node.node_list, \
>  						node_list)
> @@ -99,6 +112,19 @@ static inline bool drm_mm_initialized(struct drm_mm *mm)
>  	     entry != NULL; entry = next, \
>  		next = entry ? list_entry(entry->node_list.next, \
>  			struct drm_mm_node, node_list) : NULL) \
> +
> +/* Note that we need to unroll list_for_each_entry in order to inline
> + * setting hole_start and hole_end on each iteration and keep the
> + * macro sane.
> + */

This comment is probably better suited for the commit message then here
where it'd require git blame to make any sense of it.


> +#define drm_mm_for_each_hole(entry, mm, hole_start, hole_end) \
> +	for (entry = list_entry((mm)->hole_stack.next, typeof(struct drm_mm_node), hole_stack); \
> +	     &entry->hole_stack != &(mm)->hole_stack ? \
> +	     hole_start = drm_mm_hole_node_start(entry), \
> +	     hole_end = drm_mm_hole_node_end(entry) : \
> +	     0; \
> +	     entry = list_entry(entry->hole_stack.next, typeof(struct drm_mm_node), hole_stack))
> +
>  /*
>   * Basic range manager support (drm_mm.c)
>   */



-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 06/18] drm/i915: Delay allocation of stolen space for FBC
  2012-10-19 17:03 ` [PATCH 06/18] drm/i915: Delay allocation of stolen space for FBC Chris Wilson
@ 2012-11-05 13:44   ` Ben Widawsky
  2012-11-05 15:24     ` Chris Wilson
  0 siblings, 1 reply; 51+ messages in thread
From: Ben Widawsky @ 2012-11-05 13:44 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, 19 Oct 2012 18:03:12 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> As we may wish to wrap regions preallocated by the BIOS, we need to do
> that before carving out contiguous chunks of stolen space for FBC.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

More nitckicks:
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>

> ---
>  drivers/gpu/drm/i915/i915_drv.h        |    2 +
>  drivers/gpu/drm/i915/i915_gem_stolen.c |  115 ++++++++++++++++----------------
>  drivers/gpu/drm/i915/intel_display.c   |    3 +
>  drivers/gpu/drm/i915/intel_pm.c        |    3 +-
>  4 files changed, 66 insertions(+), 57 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 589b2f3..997abb3 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1516,6 +1516,8 @@ int i915_gem_evict_everything(struct drm_device *dev);
>  
>  /* i915_gem_stolen.c */
>  int i915_gem_init_stolen(struct drm_device *dev);
> +int i915_gem_stolen_setup_compression(struct drm_device *dev);
> +void i915_gem_stolen_cleanup_compression(struct drm_device *dev);
>  void i915_gem_cleanup_stolen(struct drm_device *dev);
>  
>  /* i915_gem_tiling.c */
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index d023ed6..68ef22a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -89,21 +89,13 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev)
>  	return base;
>  }
>  
> -static void i915_warn_stolen(struct drm_device *dev)
> -{
> -	DRM_INFO("not enough stolen space for compressed buffer, disabling\n");
> -	DRM_INFO("hint: you may be able to increase stolen memory size in the BIOS to avoid this\n");
> -}
> -
> -static void i915_setup_compression(struct drm_device *dev, int size)
> +static int i915_setup_compression(struct drm_device *dev, int size)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct drm_mm_node *compressed_fb, *uninitialized_var(compressed_llb);
> -	unsigned long cfb_base;
> -	unsigned long ll_base = 0;
>  
> -	/* Just in case the BIOS is doing something questionable. */
> -	intel_disable_fbc(dev);
> +	DRM_DEBUG_KMS("reserving %d bytes of contiguous stolen space for FBC\n",
> +		      size);
>  

I see you've moved this to modeset init. I question whether we even need
it at all.


>  	compressed_fb = drm_mm_search_free(&dev_priv->mm.stolen, size, 4096, 0);
>  	if (compressed_fb)
> @@ -111,11 +103,11 @@ static void i915_setup_compression(struct drm_device *dev, int size)
>  	if (!compressed_fb)
>  		goto err;
>  
> -	cfb_base = dev_priv->mm.stolen_base + compressed_fb->start;
> -	if (!cfb_base)
> -		goto err_fb;
> -
> -	if (!(IS_GM45(dev) || HAS_PCH_SPLIT(dev))) {
> +	if (HAS_PCH_SPLIT(dev))
> +		I915_WRITE(ILK_DPFC_CB_BASE, compressed_fb->start);
> +	else if (IS_GM45(dev)) {
> +		I915_WRITE(DPFC_CB_BASE, compressed_fb->start);
> +	} else {
>  		compressed_llb = drm_mm_search_free(&dev_priv->mm.stolen,
>  						    4096, 4096, 0);
>  		if (compressed_llb)
> @@ -124,56 +116,81 @@ static void i915_setup_compression(struct drm_device *dev, int size)
>  		if (!compressed_llb)
>  			goto err_fb;
>  
> -		ll_base = dev_priv->mm.stolen_base + compressed_llb->start;
> -		if (!ll_base)
> -			goto err_llb;
> -	}
> +		dev_priv->compressed_llb = compressed_llb;
>  
> -	dev_priv->cfb_size = size;
> +		I915_WRITE(FBC_CFB_BASE,
> +			   dev_priv->mm.stolen_base + compressed_fb->start);
> +		I915_WRITE(FBC_LL_BASE,
> +			   dev_priv->mm.stolen_base + compressed_llb->start);
> +	}
>  
>  	dev_priv->compressed_fb = compressed_fb;
> -	if (HAS_PCH_SPLIT(dev))
> -		I915_WRITE(ILK_DPFC_CB_BASE, compressed_fb->start);
> -	else if (IS_GM45(dev)) {
> -		I915_WRITE(DPFC_CB_BASE, compressed_fb->start);
> -	} else {
> -		I915_WRITE(FBC_CFB_BASE, cfb_base);
> -		I915_WRITE(FBC_LL_BASE, ll_base);
> -		dev_priv->compressed_llb = compressed_llb;
> -	}
> +	dev_priv->cfb_size = size;
>  
> -	DRM_DEBUG_KMS("FBC base 0x%08lx, ll base 0x%08lx, size %dM\n",
> -		      (long)cfb_base, (long)ll_base, size >> 20);
> -	return;
> +	return size;
>  
> -err_llb:
> -	drm_mm_put_block(compressed_llb);
>  err_fb:
>  	drm_mm_put_block(compressed_fb);
>  err:
>  	dev_priv->no_fbc_reason = FBC_STOLEN_TOO_SMALL;
> -	i915_warn_stolen(dev);
> +	DRM_INFO("not enough stolen space for compressed buffer (need %d bytes), disabling\n", size);
> +	DRM_INFO("hint: you may be able to increase stolen memory size in the BIOS to avoid this\n");
> +	return 0;
> +}
> +
> +int i915_gem_stolen_setup_compression(struct drm_device *dev)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_mm_node *node;
> +	unsigned long hole_start, hole_end, size;
> +
> +	if (dev_priv->mm.stolen_base == 0)
> +		return 0;
> +
> +	if (dev_priv->cfb_size)
> +		return dev_priv->cfb_size;
> +
> +	/* Try to set up FBC with a reasonable compressed buffer size */
> +	size = 0;
> +	drm_mm_for_each_hole(node, &dev_priv->mm.stolen, hole_start, hole_end) {
> +		unsigned long hole_size = hole_end - hole_start;
> +		if (hole_size > size)
> +			size = hole_size;
> +	}
> +
> +	/* Try to get a 32M buffer... */
> +	if (size > (36*1024*1024))
> +		size = 32*1024*1024;
> +	else /* fall back to 3/4 of the stolen space */
> +		size = size * 3 / 4;
> +
> +	return i915_setup_compression(dev, size);
>  }
> 

It seems a bit silly that you traverse the hole list here only to
traverse it again later with drm_mm_search_free. If you know you want
32MB, how about you try for that with search free, and then if that
fails, go for the drm_mm_for_each_hole?

afaict, this slightly differs from the old code in that it doesn't
respect i915_powersave any longer. I have no issue with that, but if
that's truly not an oversight on my part, something in the commit
message might be nice.

You've also changed from 3/4 to 7/8 for no apparent reason. Not great
for bisecting. At least you updated the comment.

 
> -static void i915_cleanup_compression(struct drm_device *dev)
> +void i915_gem_stolen_cleanup_compression(struct drm_device *dev)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  
> -	drm_mm_put_block(dev_priv->compressed_fb);
> +	if (dev_priv->cfb_size == 0)
> +		return;
> +
> +	if (dev_priv->compressed_fb)
> +		drm_mm_put_block(dev_priv->compressed_fb);
> +
>  	if (dev_priv->compressed_llb)
>  		drm_mm_put_block(dev_priv->compressed_llb);
> +
> +	dev_priv->cfb_size = 0;
>  }
>  
>  void i915_gem_cleanup_stolen(struct drm_device *dev)
>  {
> -	if (I915_HAS_FBC(dev) && i915_powersave)
> -		i915_cleanup_compression(dev);
> +	i915_gem_stolen_cleanup_compression(dev);
>  }
>  

I think we lost the meaning of i915_powersave again here, but I could
be mistaken.

>  int i915_gem_init_stolen(struct drm_device *dev)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> -	unsigned long prealloc_size = dev_priv->mm.gtt->stolen_size;
>  
>  	dev_priv->mm.stolen_base = i915_stolen_to_physical(dev);
>  	if (dev_priv->mm.stolen_base == 0)
> @@ -183,21 +200,7 @@ int i915_gem_init_stolen(struct drm_device *dev)
>  		      dev_priv->mm.gtt->stolen_size, dev_priv->mm.stolen_base);
>  
>  	/* Basic memrange allocator for stolen space */
> -	drm_mm_init(&dev_priv->mm.stolen, 0, prealloc_size);
> -
> -	/* Try to set up FBC with a reasonable compressed buffer size */
> -	if (I915_HAS_FBC(dev) && i915_powersave) {
> -		int cfb_size;
> -
> -		/* Leave 1M for line length buffer & misc. */
> -
> -		/* Try to get a 32M buffer... */
> -		if (prealloc_size > (36*1024*1024))
> -			cfb_size = 32*1024*1024;
> -		else /* fall back to 7/8 of the stolen space */
> -			cfb_size = prealloc_size * 7 / 8;
> -		i915_setup_compression(dev, cfb_size);
> -	}
> +	drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->mm.gtt->stolen_size);
>  
>  	return 0;
>  }
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index c2c219b..2ef497d0 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -8351,6 +8351,9 @@ void intel_modeset_init(struct drm_device *dev)
>  	/* Just disable it once at startup */
>  	i915_disable_vga(dev);
>  	intel_setup_outputs(dev);
> +
> +	/* Just in case the BIOS is doing something questionable. */
> +	intel_disable_fbc(dev);
>  }
>  
>  static void
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 2b3cddf..9ee53cb 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -440,7 +440,7 @@ void intel_update_fbc(struct drm_device *dev)
>  		dev_priv->no_fbc_reason = FBC_MODULE_PARAM;
>  		goto out_disable;
>  	}
> -	if (intel_fb->obj->base.size > dev_priv->cfb_size) {
> +	if (intel_fb->obj->base.size > i915_gem_stolen_setup_compression(dev)) {
>  		DRM_DEBUG_KMS("framebuffer too large, disabling "
>  			      "compression\n");
>  		dev_priv->no_fbc_reason = FBC_STOLEN_TOO_SMALL;
> @@ -526,6 +526,7 @@ out_disable:
>  		DRM_DEBUG_KMS("unsupported config, disabling FBC\n");
>  		intel_disable_fbc(dev);
>  	}
> +	i915_gem_stolen_cleanup_compression(dev);
>  }
>  
>  static void i915_pineview_get_mem_freq(struct drm_device *dev)



-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 03/18] drm/i915: Fix location of stolen memory register for SandyBridge+
  2012-11-02  8:54         ` Chris Wilson
@ 2012-11-05 13:53           ` Ben Widawsky
  0 siblings, 0 replies; 51+ messages in thread
From: Ben Widawsky @ 2012-11-05 13:53 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, 02 Nov 2012 08:54:39 +0000
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> On Thu, 1 Nov 2012 17:08:20 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> > On Sun, 28 Oct 2012 09:48:35 +0000
> > Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > 
> > > On Fri, 26 Oct 2012 14:58:45 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> > > > On Fri, 19 Oct 2012 18:03:09 +0100
> > > > Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > > > 
> > > > > A few of the earlier registers where enlarged and so the Base Data of
> > > > > Stolen Memory Register (BDSM) was pushed to 0xb0.
> > > > > 
> > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > > > Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> > > > 
> > > > This patch seems irrelevant to me. I have a i915_stolen_to_phys which
> > > > already looks correct (git blame shows you last updated it in April).
> > > > 
> > > > Can you help unconfuse me?
> > > 
> > > As the patch suggests the current registers being used by stolen-to-phys
> > > are incorrect for SNB+.
> > > -Chris
> > > 
> > 
> > It looks like all you've done here is fix up the bug you left from
> > patch2.
> > 
> > -- 
> > Ben Widawsky, Intel Open Source Technology Center
> From: Chris Wilson <chris@chris-wilson.co.uk>
> Subject: Re: [Intel-gfx] [PATCH 02/18] drm/i915: Fix detection of stolen base for gen2
> To: Ben Widawsky <ben@bwidawsk.net>
> Cc: intel-gfx@lists.freedesktop.org
> In-Reply-To: <20121101165102.3cc9773c@bwidawsk.net>
> References: <1350666204-8101-1-git-send-email-chris@chris-wilson.co.uk> <1350666204-8101-2-git-send-email-chris@chris-wilson.co.uk> <20121101165102.3cc9773c@bwidawsk.net>
> 
> On Thu, 1 Nov 2012 16:51:02 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> > On Fri, 19 Oct 2012 18:03:08 +0100
> > Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > 
> > > It was not until the G33 refresh, that a PCI config register was
> > > introduced that explicitly said where the stolen memory was. Prior to
> > > 865G there was not even a register that said where the end of usable
> > > low memory was and where the stolen memory began (or ended depending
> > > upon chipset). Before then, one has to look at the BIOS memory maps to
> > > find the Top of Memory. Alas that is not exported by arch/x86 and so we
> > > have to resort to disabling stolen memory on gen2 for the time being.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > ---
> > >  drivers/gpu/drm/i915/i915_drv.h        |    1 +
> > >  drivers/gpu/drm/i915/i915_gem_stolen.c |   69 ++++++++++++++------------------
> > >  2 files changed, 31 insertions(+), 39 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > > index 4728d30..687f379 100644
> > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > @@ -705,6 +705,7 @@ typedef struct drm_i915_private {
> > >  		unsigned long gtt_start;
> > >  		unsigned long gtt_mappable_end;
> > >  		unsigned long gtt_end;
> > > +		unsigned long stolen_base; /* limited to low memory (32-bit) */
> > >  
> > >  		struct io_mapping *gtt_mapping;
> > >  		phys_addr_t gtt_base_addr;
> > > diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> > > index ada2e90..a01ff74 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> > > @@ -43,56 +43,43 @@
> > >   * for is a boon.
> > >   */
> > >  
> > > -#define PTE_ADDRESS_MASK		0xfffff000
> > > -#define PTE_ADDRESS_MASK_HIGH		0x000000f0 /* i915+ */
> > > -#define PTE_MAPPING_TYPE_UNCACHED	(0 << 1)
> > > -#define PTE_MAPPING_TYPE_DCACHE		(1 << 1) /* i830 only */
> > > -#define PTE_MAPPING_TYPE_CACHED		(3 << 1)
> > > -#define PTE_MAPPING_TYPE_MASK		(3 << 1)
> > > -#define PTE_VALID			(1 << 0)
> > > -
> > > -/**
> > > - * i915_stolen_to_phys - take an offset into stolen memory and turn it into
> > > - *                       a physical one
> > > - * @dev: drm device
> > > - * @offset: address to translate
> > > - *
> > > - * Some chip functions require allocations from stolen space and need the
> > > - * physical address of the memory in question.
> > > - */
> > > -static unsigned long i915_stolen_to_phys(struct drm_device *dev, u32 offset)
> > > +static unsigned long i915_stolen_to_physical(struct drm_device *dev)
> > >  {
> > >  	struct drm_i915_private *dev_priv = dev->dev_private;
> > >  	struct pci_dev *pdev = dev_priv->bridge_dev;
> > >  	u32 base;
> > >  
> > > -#if 0
> > >  	/* On the machines I have tested the Graphics Base of Stolen Memory
> > > -	 * is unreliable, so compute the base by subtracting the stolen memory
> > > -	 * from the Top of Low Usable DRAM which is where the BIOS places
> > > -	 * the graphics stolen memory.
> > > +	 * is unreliable, so on those compute the base by subtracting the
> > > +	 * stolen memory from the Top of Low Usable DRAM which is where the
> > > +	 * BIOS places the graphics stolen memory.
> > > +	 *
> > > +	 * On gen2, the layout is slightly different with the Graphics Segment
> > > +	 * immediately following Top of Memory (or Top of Usable DRAM). Note
> > > +	 * it appears that TOUD is only reported by 865g, so we just use the
> > > +	 * top of memory as determined by the e820 probe.
> > > +	 *
> > > +	 * XXX gen2 requires an unavailable symbol and 945gm fails with
> > > +	 * its value of TOLUD.
> > >  	 */
> > > +	base = 0;
> > >  	if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
> > > -		/* top 32bits are reserved = 0 */
> > > +		/* Read Graphics Base of Stolen Memory directly */
> > >  		pci_read_config_dword(pdev, 0xA4, &base);
> > > -	} else {
> > > -		/* XXX presume 8xx is the same as i915 */
> > > -		pci_bus_read_config_dword(pdev->bus, 2, 0x5C, &base);
> > > -	}
> > > -#else
> > > -	if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
> > > -		u16 val;
> > > -		pci_read_config_word(pdev, 0xb0, &val);
> > > -		base = val >> 4 << 20;
> > > -	} else {
> > > +#if 0
> > > +	} else if (IS_GEN3(dev)) {
> > >  		u8 val;
> > > +		/* Stolen is immediately below Top of Low Usable DRAM */
> > >  		pci_read_config_byte(pdev, 0x9c, &val);
> > >  		base = val >> 3 << 27;
> > > -	}
> > > -	base -= dev_priv->mm.gtt->stolen_size;
> > > +		base -= dev_priv->mm.gtt->stolen_size;
> > > +	} else {
> > > +		/* Stolen is immediately above Top of Memory */
> > > +		base = max_low_pfn_mapped << PAGE_SHIFT;
> > >  #endif
> > > +	}
> > >  
> > > -	return base + offset;
> > > +	return base;
> > >  }
> > >  
> > 
> > Comments on this below.
> > 
> > >  static void i915_warn_stolen(struct drm_device *dev)
> > > @@ -117,7 +104,7 @@ static void i915_setup_compression(struct drm_device *dev, int size)
> > >  	if (!compressed_fb)
> > >  		goto err;
> > >  
> > > -	cfb_base = i915_stolen_to_phys(dev, compressed_fb->start);
> > > +	cfb_base = dev_priv->mm.stolen_base + compressed_fb->start;
> > >  	if (!cfb_base)
> > >  		goto err_fb;
> > >  
> > > @@ -130,7 +117,7 @@ static void i915_setup_compression(struct drm_device *dev, int size)
> > >  		if (!compressed_llb)
> > >  			goto err_fb;
> > >  
> > > -		ll_base = i915_stolen_to_phys(dev, compressed_llb->start);
> > > +		ll_base = dev_priv->mm.stolen_base + compressed_llb->start;
> > >  		if (!ll_base)
> > >  			goto err_llb;
> > >  	}
> > > @@ -149,7 +136,7 @@ static void i915_setup_compression(struct drm_device *dev, int size)
> > >  	}
> > >  
> > >  	DRM_DEBUG_KMS("FBC base 0x%08lx, ll base 0x%08lx, size %dM\n",
> > > -		      cfb_base, ll_base, size >> 20);
> > > +		      (long)cfb_base, (long)ll_base, size >> 20);
> > >  	return;
> > >  
> > >  err_llb:
> > > @@ -181,6 +168,10 @@ int i915_gem_init_stolen(struct drm_device *dev)
> > >  	struct drm_i915_private *dev_priv = dev->dev_private;
> > >  	unsigned long prealloc_size = dev_priv->mm.gtt->stolen_size;
> > >  
> > > +	dev_priv->mm.stolen_base = i915_stolen_to_physical(dev);
> > > +	if (dev_priv->mm.stolen_base == 0)
> > > +		return 0;
> > > +
> > >  	/* Basic memrange allocator for stolen space */
> > >  	drm_mm_init(&dev_priv->mm.stolen, 0, prealloc_size);
> > >  
> > 
> > Since I found viewing the diff difficult for this patch, I am going to
> > write the before and after (with the #if 0 blocks removed):
> > 
> > Before:
> >         if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
> >                 u16 val;
> >                 pci_read_config_word(pdev, 0xb0, &val);
> >                 base = val >> 4 << 20;
> >         } else {
> >                 u8 val;
> >                 pci_read_config_byte(pdev, 0x9c, &val);
> >                 base = val >> 3 << 27;
> >         }
> >         base -= dev_priv->mm.gtt->stolen_size;
> > 
> > After:
> >         base = 0;
> >         if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
> >                 /* Read Graphics Base of Stolen Memory directly */
> >                 pci_read_config_dword(pdev, 0xA4, &base);
> > 
> > 
> > I like that this is a simple helper now to calculate GSM Base. However,
> > I am completely baffled by this patch. The read of 0xA4 did exist
> > before, but was #if 0'd out appears to be the PCI capabilities pointer
> > as of GEN6 (I won't check before that). The before code on the other
> > hand at least looks correct for gen6 (BDSM - GGMS).
> > 
> > This seems like it will definitely break SNB+, which would be bad for
> > bisection.
> 
> The code as it stands is broken on SNB (it only has a semblance of
> working but the address it generates is random and points into active
> system memory), so it seems immaterial as to whether it then remained
> broken after this patch, with the benefit of then having individual
> patches to address the later generations. To allay your fears, this can
> be split into a preparatory patch to allow the routine to fail, and then
> fail until each is fixed.
> -Chris
> 

Please do allay my fears, and I can review it again. However in the
meantime, I disagree. The code seems correct to me as I said above (for
SNB, I haven't back-checked to previous generations). What I am looking
at now tells me that stolen memory base (which is located directly above
the GTT stolen memory) is defined by the 31:20 of offset 0xb0.
Similarly, we know the size of the stolen memory for the gtt ptes from
offset 0x50 (caculated previously), therefore the address certainly
isn't random. If there is a gap between the PTEs and the stolen
memory, then I agree it's somewhat problematic, but it still seems
better than what you changed it to.

Anyway, this is all in reference to patch 2 really since patch 3 fixes
the problem. 



-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 07/18] drm/i915: Defer allocation of stolen memory for FBC until actual first use
  2012-10-19 17:03 ` [PATCH 07/18] drm/i915: Defer allocation of stolen memory for FBC until actual first use Chris Wilson
@ 2012-11-05 15:00   ` Ben Widawsky
  2012-11-05 15:28     ` Chris Wilson
  0 siblings, 1 reply; 51+ messages in thread
From: Ben Widawsky @ 2012-11-05 15:00 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, 19 Oct 2012 18:03:13 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> As FBC is commonly disabled due to limitations of the chipset upon
> output configurations, on many systems FBC is never enabled. For those
> systems, it is advantageous to make use of the stolen memory for other
> objects and so we defer allocation of the FBC chunk until we actually
> require it. This increases the likelihood of that allocation failing,
> which in turns means that we are already taking advantage of the stolen
> memory!

I'm failing to see how this patch is doing what it advertises to do. At
least applies to dinq it's only deferring the error check. None of the
steps that now happen before allocating the stolen compressed fb use
stolen memory. On any of the errors, we seem to free the stolen memory.
I see a mode check, a platform/plane check, a tiling check, a debug
check now happening before we setup compression, but I fail to see how
that really effects anything.... I'm sorry if I am being obtuse, but
could you please explain a bit better?

> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/intel_pm.c |   13 +++++++------
>  1 file changed, 7 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 9ee53cb..cbf3f38 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -440,12 +440,6 @@ void intel_update_fbc(struct drm_device *dev)
>  		dev_priv->no_fbc_reason = FBC_MODULE_PARAM;
>  		goto out_disable;
>  	}
> -	if (intel_fb->obj->base.size > i915_gem_stolen_setup_compression(dev)) {
> -		DRM_DEBUG_KMS("framebuffer too large, disabling "
> -			      "compression\n");
> -		dev_priv->no_fbc_reason = FBC_STOLEN_TOO_SMALL;
> -		goto out_disable;
> -	}
>  	if ((crtc->mode.flags & DRM_MODE_FLAG_INTERLACE) ||
>  	    (crtc->mode.flags & DRM_MODE_FLAG_DBLSCAN)) {
>  		DRM_DEBUG_KMS("mode incompatible with compression, "
> @@ -479,6 +473,13 @@ void intel_update_fbc(struct drm_device *dev)
>  	if (in_dbg_master())
>  		goto out_disable;
>  
> +	if (intel_fb->obj->base.size > i915_gem_stolen_setup_compression(dev)) {
> +		DRM_DEBUG_KMS("framebuffer too large, disabling "
> +			      "compression\n");
> +		dev_priv->no_fbc_reason = FBC_STOLEN_TOO_SMALL;
> +		goto out_disable;
> +	}
> +

While we're moving this... I'd like to see the DRM_DEBUG statement on
one line (I think almost everyone agrees these days that breaking the 80
character limit is acceptable in favor of string grepability).

Also you may as well make intel_fb->obj just obj.

>  	/* If the scanout has not changed, don't modify the FBC settings.
>  	 * Note that we make the fundamental assumption that the fb->obj
>  	 * cannot be unpinned (and have its GTT offset and fence revoked)



-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 05/18] drm: Introduce an iterator over holes in the drm_mm range manager
  2012-11-05 13:41   ` Ben Widawsky
@ 2012-11-05 15:13     ` Chris Wilson
  0 siblings, 0 replies; 51+ messages in thread
From: Chris Wilson @ 2012-11-05 15:13 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Dave Airlie, intel-gfx

On Mon, 5 Nov 2012 13:41:06 +0000, Ben Widawsky <ben@bwidawsk.net> wrote:
> On Fri, 19 Oct 2012 18:03:11 +0100
> Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > -		BUG_ON(drm_mm_hole_node_start(node)
> > -				!= drm_mm_hole_node_end(node));
> > +		BUG_ON(node->start + node->size !=
> > +		       list_entry(node->node_list.next,
> > +				  struct drm_mm_node, node_list)->start);
> > +
> > 
> 
> This seems harder to read to me than before. I suspect you've changed it
> only because you've moved the inline functions down, but I also don't
> understand the reason for that.

Since I move the BUG_ON() into drm_mm_hole_node_end() and this is the
atypical caller, I chose to make this code less clear in order to make
the other callsites clearer. The other choice is to use
__drm_mm_hole_node_end(), which is probably worthwhile.

> > +
> > +/* Note that we need to unroll list_for_each_entry in order to inline
> > + * setting hole_start and hole_end on each iteration and keep the
> > + * macro sane.
> > + */
> 
> This comment is probably better suited for the commit message then here
> where it'd require git blame to make any sense of it.

No, I think list_for_each_entry() is a common macro that the reader
might have expected to see and so we need to explain why we cannot use
it here.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 06/18] drm/i915: Delay allocation of stolen space for FBC
  2012-11-05 13:44   ` Ben Widawsky
@ 2012-11-05 15:24     ` Chris Wilson
  0 siblings, 0 replies; 51+ messages in thread
From: Chris Wilson @ 2012-11-05 15:24 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: intel-gfx

On Mon, 5 Nov 2012 13:44:13 +0000, Ben Widawsky <ben@bwidawsk.net> wrote:
> > -static void i915_setup_compression(struct drm_device *dev, int size)
> > +static int i915_setup_compression(struct drm_device *dev, int size)
> >  {
> >  	struct drm_i915_private *dev_priv = dev->dev_private;
> >  	struct drm_mm_node *compressed_fb, *uninitialized_var(compressed_llb);
> > -	unsigned long cfb_base;
> > -	unsigned long ll_base = 0;
> >  
> > -	/* Just in case the BIOS is doing something questionable. */
> > -	intel_disable_fbc(dev);
> > +	DRM_DEBUG_KMS("reserving %d bytes of contiguous stolen space for FBC\n",
> > +		      size);
> >  
> 
> I see you've moved this to modeset init. I question whether we even need
> it at all.

I presume you mean the FBC disable? We should never need it, just like
we should never need any of the other BIOS sanitizers.

> > +int i915_gem_stolen_setup_compression(struct drm_device *dev)
> > +{
> > +	struct drm_i915_private *dev_priv = dev->dev_private;
> > +	struct drm_mm_node *node;
> > +	unsigned long hole_start, hole_end, size;
> > +
> > +	if (dev_priv->mm.stolen_base == 0)
> > +		return 0;
> > +
> > +	if (dev_priv->cfb_size)
> > +		return dev_priv->cfb_size;
> > +
> > +	/* Try to set up FBC with a reasonable compressed buffer size */
> > +	size = 0;
> > +	drm_mm_for_each_hole(node, &dev_priv->mm.stolen, hole_start, hole_end) {
> > +		unsigned long hole_size = hole_end - hole_start;
> > +		if (hole_size > size)
> > +			size = hole_size;
> > +	}
> > +
> > +	/* Try to get a 32M buffer... */
> > +	if (size > (36*1024*1024))
> > +		size = 32*1024*1024;
> > +	else /* fall back to 3/4 of the stolen space */
> > +		size = size * 3 / 4;
> > +
> > +	return i915_setup_compression(dev, size);
> >  }
> > 
> 
> It seems a bit silly that you traverse the hole list here only to
> traverse it again later with drm_mm_search_free. If you know you want
> 32MB, how about you try for that with search free, and then if that
> fails, go for the drm_mm_for_each_hole?

It was to try to preserve the flavour of the original code. If we felt
adventurous could we actively manage the stolen space (i.e.
evict/defragment).
 
> afaict, this slightly differs from the old code in that it doesn't
> respect i915_powersave any longer. I have no issue with that, but if
> that's truly not an oversight on my part, something in the commit
> message might be nice.

No, we should simply never try to allocate stolen space if
i915_powersave=0 and so never need to setup the compression buffer now.
 
> You've also changed from 3/4 to 7/8 for no apparent reason. Not great
> for bisecting. At least you updated the comment.

Ah, just because we are trying to use stolen space for other things than
FBC. More or less irrevelant until we enable FBC and care about the
various tradeoffs.

> >  void i915_gem_cleanup_stolen(struct drm_device *dev)
> >  {
> > -	if (I915_HAS_FBC(dev) && i915_powersave)
> > -		i915_cleanup_compression(dev);
> > +	i915_gem_stolen_cleanup_compression(dev);
> >  }
> >  
> 
> I think we lost the meaning of i915_powersave again here, but I could
> be mistaken.

Having allocated it, we always need to free it. We should not be
allocating if i915_powersave=0 anyway.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 07/18] drm/i915: Defer allocation of stolen memory for FBC until actual first use
  2012-11-05 15:00   ` Ben Widawsky
@ 2012-11-05 15:28     ` Chris Wilson
  2012-11-05 15:32       ` Ben Widawsky
  0 siblings, 1 reply; 51+ messages in thread
From: Chris Wilson @ 2012-11-05 15:28 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: intel-gfx

On Mon, 5 Nov 2012 15:00:36 +0000, Ben Widawsky <ben@bwidawsk.net> wrote:
> On Fri, 19 Oct 2012 18:03:13 +0100
> Chris Wilson <chris@chris-wilson.co.uk> wrote:
> 
> > As FBC is commonly disabled due to limitations of the chipset upon
> > output configurations, on many systems FBC is never enabled. For those
> > systems, it is advantageous to make use of the stolen memory for other
> > objects and so we defer allocation of the FBC chunk until we actually
> > require it. This increases the likelihood of that allocation failing,
> > which in turns means that we are already taking advantage of the stolen
> > memory!
> 
> I'm failing to see how this patch is doing what it advertises to do. At
> least applies to dinq it's only deferring the error check. None of the
> steps that now happen before allocating the stolen compressed fb use
> stolen memory. On any of the errors, we seem to free the stolen memory.
> I see a mode check, a platform/plane check, a tiling check, a debug
> check now happening before we setup compression, but I fail to see how
> that really effects anything.... I'm sorry if I am being obtuse, but
> could you please explain a bit better?

All of those previous checks are more likely to be false - and
previously we never tried to recover the stolen memory. I can go back to
a single patch as this is now just an optimisation rather than
preventing a permanent loss of memory.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 07/18] drm/i915: Defer allocation of stolen memory for FBC until actual first use
  2012-11-05 15:28     ` Chris Wilson
@ 2012-11-05 15:32       ` Ben Widawsky
  0 siblings, 0 replies; 51+ messages in thread
From: Ben Widawsky @ 2012-11-05 15:32 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Mon, 05 Nov 2012 15:28:42 +0000
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> On Mon, 5 Nov 2012 15:00:36 +0000, Ben Widawsky <ben@bwidawsk.net> wrote:
> > On Fri, 19 Oct 2012 18:03:13 +0100
> > Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > 
> > > As FBC is commonly disabled due to limitations of the chipset upon
> > > output configurations, on many systems FBC is never enabled. For those
> > > systems, it is advantageous to make use of the stolen memory for other
> > > objects and so we defer allocation of the FBC chunk until we actually
> > > require it. This increases the likelihood of that allocation failing,
> > > which in turns means that we are already taking advantage of the stolen
> > > memory!
> > 
> > I'm failing to see how this patch is doing what it advertises to do. At
> > least applies to dinq it's only deferring the error check. None of the
> > steps that now happen before allocating the stolen compressed fb use
> > stolen memory. On any of the errors, we seem to free the stolen memory.
> > I see a mode check, a platform/plane check, a tiling check, a debug
> > check now happening before we setup compression, but I fail to see how
> > that really effects anything.... I'm sorry if I am being obtuse, but
> > could you please explain a bit better?
> 
> All of those previous checks are more likely to be false - and
> previously we never tried to recover the stolen memory. I can go back to
> a single patch as this is now just an optimisation rather than
> preventing a permanent loss of memory.
> -Chris
> 

On the basis that all the other checks are more likely to be false, it
is:
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>

if you want to keep it. Maybe update the commit message if you feel like
it.

-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 10/18] drm/i915: Support readback of stolen objects upon error
  2012-10-19 17:03 ` [PATCH 10/18] drm/i915: Support readback of stolen objects upon error Chris Wilson
@ 2012-11-05 15:41   ` Ben Widawsky
  0 siblings, 0 replies; 51+ messages in thread
From: Ben Widawsky @ 2012-11-05 15:41 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, 19 Oct 2012 18:03:16 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> ---
>  drivers/gpu/drm/i915/i915_irq.c |    8 ++++++++
>  1 file changed, 8 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index d07c787..df3c10a 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -924,6 +924,14 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
>  						     reloc_offset);
>  			memcpy_fromio(d, s, PAGE_SIZE);
>  			io_mapping_unmap_atomic(s);
> +		} else if (src->stolen) {
> +			unsigned long offset;
> +
> +			offset = dev_priv->mm.stolen_base;
> +			offset += src->stolen->start;
> +			offset += i << PAGE_SHIFT;
> +
> +			memcpy_fromio(d, (void *)offset, PAGE_SIZE);
>  		} else {
>  			struct page *page;
>  			void *s;



Everything up to and including this is so far:
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>

-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 13/18] drm/i915: Introduce i915_gem_object_create_stolen()
  2012-10-19 17:03 ` [PATCH 13/18] drm/i915: Introduce i915_gem_object_create_stolen() Chris Wilson
@ 2012-11-05 16:32   ` Ben Widawsky
  2012-11-05 16:59     ` Chris Wilson
  0 siblings, 1 reply; 51+ messages in thread
From: Ben Widawsky @ 2012-11-05 16:32 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, 19 Oct 2012 18:03:19 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> Allow for the creation of GEM objects backed by stolen memory. As these
> are not backed by ordinary pages, we create a fake dma mapping and store
> the address in the scatterlist rather than obj->pages.
> 
> v2: Mark _i915_gem_object_create_stolen() as static, as noticed by Jesse
> Barnes.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

Deferring on an r-b for now until I understand the point of most of this
patch.

Some comments below however.

> ---
>  drivers/gpu/drm/i915/i915_drv.h        |    3 +
>  drivers/gpu/drm/i915/i915_gem.c        |    1 +
>  drivers/gpu/drm/i915/i915_gem_stolen.c |  122 ++++++++++++++++++++++++++++++++
>  3 files changed, 126 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 6b893c7..5a0e0cd 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1526,6 +1526,9 @@ int i915_gem_init_stolen(struct drm_device *dev);
>  int i915_gem_stolen_setup_compression(struct drm_device *dev);
>  void i915_gem_stolen_cleanup_compression(struct drm_device *dev);
>  void i915_gem_cleanup_stolen(struct drm_device *dev);
> +struct drm_i915_gem_object *
> +i915_gem_object_create_stolen(struct drm_device *dev, u32 size);
> +void i915_gem_object_release_stolen(struct drm_i915_gem_object *obj);
>  
>  /* i915_gem_tiling.c */
>  void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index d75c5c3..0349899 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3857,6 +3857,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  	obj->pages_pin_count = 0;
>  	i915_gem_object_put_pages(obj);
>  	i915_gem_object_free_mmap_offset(obj);
> +	i915_gem_object_release_stolen(obj);
>  
>  	BUG_ON(obj->pages);
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index 68ef22a..86c4af4 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -204,3 +204,125 @@ int i915_gem_init_stolen(struct drm_device *dev)
>  
>  	return 0;
>  }
> +
> +static struct sg_table *
> +i915_pages_create_for_stolen(struct drm_device *dev,
> +			     u32 offset, u32 size)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct sg_table *st;
> +	struct scatterlist *sg;
> +

BUG_ON(offset + size <= dev_priv->mm.gtt->stolen_size);

> +	/* We hide that we have no struct page backing our stolen object
> +	 * by wrapping the contiguous physical allocation with a fake
> +	 * dma mapping in a single scatterlist.
> +	 */
> +
> +	st = kmalloc(sizeof(*st), GFP_KERNEL);
> +	if (st == NULL)
> +		return NULL;
> +
> +	if (!sg_alloc_table(st, 1, GFP_KERNEL)) {
> +		kfree(st);
> +		return NULL;
> +	}
> +
> +	sg = st->sgl;
> +	sg->offset = offset;
> +	sg->length = size;
> +
> +	sg_dma_address(sg) = dev_priv->mm.stolen_base + offset;
> +	sg_dma_len(sg) = size;
> +

Do we want to make stolen_base a dma_addr_t (or at least typecast it)?

> +	return st;
> +}
> +
> +static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
> +{
> +	BUG();
> +	return -EINVAL;
> +}
> +

__noreturn, or maybe just make .get_pages = NULL, and do the check in
the upper layer get_pages?

> +static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj)
> +{
> +	/* Should only be called during free */
> +	sg_free_table(obj->pages);
> +	kfree(obj->pages);
> +}
> +
> +static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = {
> +	.get_pages = i915_gem_object_get_pages_stolen,
> +	.put_pages = i915_gem_object_put_pages_stolen,
> +};
> +
> +static struct drm_i915_gem_object *
> +_i915_gem_object_create_stolen(struct drm_device *dev,
> +			       struct drm_mm_node *stolen)
> +{
> +	struct drm_i915_gem_object *obj;
> +
> +	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> +	if (obj == NULL)
> +		return NULL;
> +
> +	if (drm_gem_private_object_init(dev, &obj->base, stolen->size))
> +		goto cleanup;
> +
> +	i915_gem_object_init(obj, &i915_gem_object_stolen_ops);
> +
> +	obj->pages = i915_pages_create_for_stolen(dev,
> +						  stolen->start, stolen->size);
> +	if (obj->pages == NULL)
> +		goto cleanup;
> +
> +	obj->has_dma_mapping = true;
> +	obj->pages_pin_count = 1;
> +	obj->stolen = stolen;
> +
> +	obj->base.write_domain = I915_GEM_DOMAIN_GTT;
> +	obj->base.read_domains = I915_GEM_DOMAIN_GTT;
> +	obj->cache_level = I915_CACHE_NONE;
> +
> +	return obj;
> +
> +cleanup:
> +	kfree(obj);
> +	return NULL;
> +}
> +
> +struct drm_i915_gem_object *
> +i915_gem_object_create_stolen(struct drm_device *dev, u32 size)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_i915_gem_object *obj;
> +	struct drm_mm_node *stolen;
> +
> +	if (dev_priv->mm.stolen_base == 0)
> +		return 0;
> +
> +	DRM_DEBUG_KMS("creating stolen object: size=%x\n", size);
> +	if (size == 0)
> +		return NULL;
> +
> +	stolen = drm_mm_search_free(&dev_priv->mm.stolen, size, 4096, 0);
> +	if (stolen)
> +		stolen = drm_mm_get_block(stolen, size, 4096);
> +	if (stolen == NULL)
> +		return NULL;

Could probably do slightly better here with ERR_PTR(-ENOMEM) but since
we don't do that elsewhere, I guess it doesn't matter.

> +
> +	obj = _i915_gem_object_create_stolen(dev, stolen);
> +	if (obj)
> +		return obj;
> +
> +	drm_mm_put_block(stolen);
> +	return NULL;

Similarly here maybe use PTR_ERR(obj) and return something meaningful in
_i915_gem_object_create_stolen.

Since just about everything is more or less, ENOMEM, maybe it doesn't
matter.
> +}
> +
> +void
> +i915_gem_object_release_stolen(struct drm_i915_gem_object *obj)
> +{
> +	if (obj->stolen) {
> +		drm_mm_put_block(obj->stolen);
> +		obj->stolen = NULL;
> +	}
> +}



-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 13/18] drm/i915: Introduce i915_gem_object_create_stolen()
  2012-11-05 16:32   ` Ben Widawsky
@ 2012-11-05 16:59     ` Chris Wilson
  2012-11-05 17:34       ` Ben Widawsky
  0 siblings, 1 reply; 51+ messages in thread
From: Chris Wilson @ 2012-11-05 16:59 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: intel-gfx

On Mon, 5 Nov 2012 16:32:26 +0000, Ben Widawsky <ben@bwidawsk.net> wrote:
> On Fri, 19 Oct 2012 18:03:19 +0100
> Chris Wilson <chris@chris-wilson.co.uk> wrote:
> 
> > Allow for the creation of GEM objects backed by stolen memory. As these
> > are not backed by ordinary pages, we create a fake dma mapping and store
> > the address in the scatterlist rather than obj->pages.
> > 
> > v2: Mark _i915_gem_object_create_stolen() as static, as noticed by Jesse
> > Barnes.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> 
> Deferring on an r-b for now until I understand the point of most of this
> patch.

The stolen support is a precursor for fastboot, where we need to wrap
the allocations made by the BIOS from the stolen memory and reuse that
for our own framebuffers.
 
> > +	struct scatterlist *sg;
> > +
> 
> BUG_ON(offset + size <= dev_priv->mm.gtt->stolen_size);

Done with a minor amendment.
 
> > +	/* We hide that we have no struct page backing our stolen object
> > +	 * by wrapping the contiguous physical allocation with a fake
> > +	 * dma mapping in a single scatterlist.
> > +	 */
> > +
> > +	st = kmalloc(sizeof(*st), GFP_KERNEL);
> > +	if (st == NULL)
> > +		return NULL;
> > +
> > +	if (!sg_alloc_table(st, 1, GFP_KERNEL)) {
Fixed.

> > +		kfree(st);
> > +		return NULL;
> > +	}
> > +
> > +	sg = st->sgl;
> > +	sg->offset = offset;
> > +	sg->length = size;
> > +
> > +	sg_dma_address(sg) = dev_priv->mm.stolen_base + offset;
> > +	sg_dma_len(sg) = size;
> > +
> 
> Do we want to make stolen_base a dma_addr_t (or at least typecast it)?

Interesting enough, the current FBC registers are limited to only using
32bit addresses, so stolen_base atm is not technically a dma_addr_t.
Maybe I'm picking hairs. :)
 
> > +	return st;
> > +}
> > +
> > +static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
> > +{
> > +	BUG();
> > +	return -EINVAL;
> > +}
> > +
> 
> __noreturn, or maybe just make .get_pages = NULL, and do the check in
> the upper layer get_pages?

I refer you to http://lwn.net/Articles/336262/ where the argument is put
forth that default no-op functions are preferrable in most cases to
interpretting special NULL vfuncs. We have adopted this elsewhere in
i915.ko to good effect.

> > +	stolen = drm_mm_search_free(&dev_priv->mm.stolen, size, 4096, 0);
> > +	if (stolen)
> > +		stolen = drm_mm_get_block(stolen, size, 4096);
> > +	if (stolen == NULL)
> > +		return NULL;
> 
> Could probably do slightly better here with ERR_PTR(-ENOMEM) but since
> we don't do that elsewhere, I guess it doesn't matter.

I was tempted - it would have just looked odd as being the only create
routine to do so. :)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 13/18] drm/i915: Introduce i915_gem_object_create_stolen()
  2012-11-05 16:59     ` Chris Wilson
@ 2012-11-05 17:34       ` Ben Widawsky
  0 siblings, 0 replies; 51+ messages in thread
From: Ben Widawsky @ 2012-11-05 17:34 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Mon, 05 Nov 2012 16:59:44 +0000
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> On Mon, 5 Nov 2012 16:32:26 +0000, Ben Widawsky <ben@bwidawsk.net> wrote:
> > On Fri, 19 Oct 2012 18:03:19 +0100
> > Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > 
> > > Allow for the creation of GEM objects backed by stolen memory. As these
> > > are not backed by ordinary pages, we create a fake dma mapping and store
> > > the address in the scatterlist rather than obj->pages.
> > > 
> > > v2: Mark _i915_gem_object_create_stolen() as static, as noticed by Jesse
> > > Barnes.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> > 
> > Deferring on an r-b for now until I understand the point of most of this
> > patch.
> 
> The stolen support is a precursor for fastboot, where we need to wrap
> the allocations made by the BIOS from the stolen memory and reuse that
> for our own framebuffers.
>  

For some reason I thought this should be simpler than it is, but Jesse
has successfully convinced me otherwise.
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>

> > > +	struct scatterlist *sg;
> > > +
> > 
> > BUG_ON(offset + size <= dev_priv->mm.gtt->stolen_size);
> 
> Done with a minor amendment.
>  
> > > +	/* We hide that we have no struct page backing our stolen object
> > > +	 * by wrapping the contiguous physical allocation with a fake
> > > +	 * dma mapping in a single scatterlist.
> > > +	 */
> > > +
> > > +	st = kmalloc(sizeof(*st), GFP_KERNEL);
> > > +	if (st == NULL)
> > > +		return NULL;
> > > +
> > > +	if (!sg_alloc_table(st, 1, GFP_KERNEL)) {
> Fixed.
> 
> > > +		kfree(st);
> > > +		return NULL;
> > > +	}
> > > +
> > > +	sg = st->sgl;
> > > +	sg->offset = offset;
> > > +	sg->length = size;
> > > +
> > > +	sg_dma_address(sg) = dev_priv->mm.stolen_base + offset;
> > > +	sg_dma_len(sg) = size;
> > > +
> > 
> > Do we want to make stolen_base a dma_addr_t (or at least typecast it)?
> 
> Interesting enough, the current FBC registers are limited to only using
> 32bit addresses, so stolen_base atm is not technically a dma_addr_t.
> Maybe I'm picking hairs. :)
>  
> > > +	return st;
> > > +}
> > > +
> > > +static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
> > > +{
> > > +	BUG();
> > > +	return -EINVAL;
> > > +}
> > > +
> > 
> > __noreturn, or maybe just make .get_pages = NULL, and do the check in
> > the upper layer get_pages?
> 
> I refer you to http://lwn.net/Articles/336262/ where the argument is put
> forth that default no-op functions are preferrable in most cases to
> interpretting special NULL vfuncs. We have adopted this elsewhere in
> i915.ko to good effect.
> 
> > > +	stolen = drm_mm_search_free(&dev_priv->mm.stolen, size, 4096, 0);
> > > +	if (stolen)
> > > +		stolen = drm_mm_get_block(stolen, size, 4096);
> > > +	if (stolen == NULL)
> > > +		return NULL;
> > 
> > Could probably do slightly better here with ERR_PTR(-ENOMEM) but since
> > we don't do that elsewhere, I guess it doesn't matter.
> 
> I was tempted - it would have just looked odd as being the only create
> routine to do so. :)
> -Chris
> 



-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 16/18] drm/i915: Allocate overlay registers from stolen memory
  2012-10-19 17:03 ` [PATCH 16/18] drm/i915: Allocate overlay registers " Chris Wilson
@ 2012-11-05 17:39   ` Ben Widawsky
  0 siblings, 0 replies; 51+ messages in thread
From: Ben Widawsky @ 2012-11-05 17:39 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, 19 Oct 2012 18:03:22 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

14-16 are:
Acked-by: Ben Widawsky <ben@bwidawsk.net>

I think it's a nice way to prove the existing code works, but am a
little scared of putting something like the ringbuffer in stolen memory.

-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 17/18] drm/i915: Use a slab for object allocation
  2012-10-19 17:03 ` [PATCH 17/18] drm/i915: Use a slab for object allocation Chris Wilson
  2012-10-24 20:21   ` Paulo Zanoni
@ 2012-11-05 17:49   ` Ben Widawsky
  2012-11-05 20:57     ` Chris Wilson
  2012-11-05 18:07   ` Ben Widawsky
  2 siblings, 1 reply; 51+ messages in thread
From: Ben Widawsky @ 2012-11-05 17:49 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, 19 Oct 2012 18:03:23 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> The primary purpose of this was to debug some use-after-free memory
> corruption that was causing an OOPS inside drm/i915. As it turned out
> the corruption was being caused elsewhere and i915.ko as a major user of
> many objects was being hit hardest.
> 
> Indeed as we do frequent the generic kmalloc caches, dedicating one to
> ourselves (or at least naming one for us depending upon the core) aids
> debugging our own slab usage.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

So I previously acked this patch, but you dropped that, so now you get
more words.

Why not go all the way and move all allocations we do in i915 to this
slab (adding a flag with whether or not to zero the memory first)? While
I agree in terms of space, nothing takes up as much as objects, I think
it would be nice to do this so we can totally track what our driver is
doing. In fact I would suggest this is something which core drm should
provide to all of the drivers (we already use drm_alloc calls in some
places).

Anyway, it's still:
Acked-by: Ben Widawsky <ben@bwidawsk.net>

> ---
>  drivers/gpu/drm/i915/i915_dma.c        |    3 +++
>  drivers/gpu/drm/i915/i915_drv.h        |    4 ++++
>  drivers/gpu/drm/i915/i915_gem.c        |   28 +++++++++++++++++++++++-----
>  drivers/gpu/drm/i915/i915_gem_dmabuf.c |    5 ++---
>  drivers/gpu/drm/i915/i915_gem_stolen.c |    4 ++--
>  5 files changed, 34 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 14271aa..69969be 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1776,6 +1776,9 @@ int i915_driver_unload(struct drm_device *dev)
>  
>  	destroy_workqueue(dev_priv->wq);
>  
> +	if (dev_priv->slab)
> +		kmem_cache_destroy(dev_priv->slab);
> +
>  	pci_dev_put(dev_priv->bridge_dev);
>  	kfree(dev->dev_private);
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 5a0e0cd..5084b29 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -398,6 +398,7 @@ struct intel_gmbus {
>  
>  typedef struct drm_i915_private {
>  	struct drm_device *dev;
> +	struct kmem_cache *slab;
>  
>  	const struct intel_device_info *info;
>  
> @@ -1341,12 +1342,15 @@ int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>  int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
>  			struct drm_file *file_priv);
>  void i915_gem_load(struct drm_device *dev);
> +void *i915_gem_object_alloc(struct drm_device *dev);
> +void i915_gem_object_free(struct drm_i915_gem_object *obj);
>  int i915_gem_init_object(struct drm_gem_object *obj);
>  void i915_gem_object_init(struct drm_i915_gem_object *obj,
>  			 const struct drm_i915_gem_object_ops *ops);
>  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>  						  size_t size);
>  void i915_gem_free_object(struct drm_gem_object *obj);
> +
>  int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
>  				     uint32_t alignment,
>  				     bool map_and_fenceable,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 0349899..8183c0f 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -193,6 +193,18 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>  	return 0;
>  }
>  
> +void *i915_gem_object_alloc(struct drm_device *dev)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	return kmem_cache_alloc(dev_priv->slab, GFP_KERNEL | __GFP_ZERO);
> +}
> +
> +void i915_gem_object_free(struct drm_i915_gem_object *obj)
> +{
> +	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> +	kmem_cache_free(dev_priv->slab, obj);
> +}
> +
>  static int
>  i915_gem_create(struct drm_file *file,
>  		struct drm_device *dev,
> @@ -216,7 +228,7 @@ i915_gem_create(struct drm_file *file,
>  	if (ret) {
>  		drm_gem_object_release(&obj->base);
>  		i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
> -		kfree(obj);
> +		i915_gem_object_free(obj);
>  		return ret;
>  	}
>  
> @@ -3780,12 +3792,12 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>  	struct address_space *mapping;
>  	u32 mask;
>  
> -	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> +	obj = i915_gem_object_alloc(dev);
>  	if (obj == NULL)
>  		return NULL;
>  
>  	if (drm_gem_object_init(dev, &obj->base, size) != 0) {
> -		kfree(obj);
> +		i915_gem_object_free(obj);
>  		return NULL;
>  	}
>  
> @@ -3868,7 +3880,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  	i915_gem_info_remove_obj(dev_priv, obj->base.size);
>  
>  	kfree(obj->bit_17);
> -	kfree(obj);
> +	i915_gem_object_free(obj);
>  }
>  
>  int
> @@ -4246,8 +4258,14 @@ init_ring_lists(struct intel_ring_buffer *ring)
>  void
>  i915_gem_load(struct drm_device *dev)
>  {
> -	int i;
>  	drm_i915_private_t *dev_priv = dev->dev_private;
> +	int i;
> +
> +	dev_priv->slab =
> +		kmem_cache_create("i915_gem_object",
> +				  sizeof(struct drm_i915_gem_object), 0,
> +				  SLAB_HWCACHE_ALIGN,
> +				  NULL);
>  
>  	INIT_LIST_HEAD(&dev_priv->mm.active_list);
>  	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
> diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> index ca3497e..f307e31 100644
> --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> @@ -276,8 +276,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
>  	if (IS_ERR(attach))
>  		return ERR_CAST(attach);
>  
> -
> -	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> +	obj = i915_gem_object_alloc(dev);
>  	if (obj == NULL) {
>  		ret = -ENOMEM;
>  		goto fail_detach;
> @@ -285,7 +284,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
>  
>  	ret = drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
>  	if (ret) {
> -		kfree(obj);
> +		i915_gem_object_free(obj);
>  		goto fail_detach;
>  	}
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index 86c4af4..d7cfa71 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -261,7 +261,7 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
>  {
>  	struct drm_i915_gem_object *obj;
>  
> -	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> +	obj = i915_gem_object_alloc(dev);
>  	if (obj == NULL)
>  		return NULL;
>  
> @@ -286,7 +286,7 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
>  	return obj;
>  
>  cleanup:
> -	kfree(obj);
> +	i915_gem_object_free(obj);
>  	return NULL;
>  }
>  



-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 17/18] drm/i915: Use a slab for object allocation
  2012-10-19 17:03 ` [PATCH 17/18] drm/i915: Use a slab for object allocation Chris Wilson
  2012-10-24 20:21   ` Paulo Zanoni
  2012-11-05 17:49   ` Ben Widawsky
@ 2012-11-05 18:07   ` Ben Widawsky
  2012-11-05 18:10     ` Ben Widawsky
  2 siblings, 1 reply; 51+ messages in thread
From: Ben Widawsky @ 2012-11-05 18:07 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, 19 Oct 2012 18:03:23 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> The primary purpose of this was to debug some use-after-free memory
> corruption that was causing an OOPS inside drm/i915. As it turned out
> the corruption was being caused elsewhere and i915.ko as a major user of
> many objects was being hit hardest.
> 
> Indeed as we do frequent the generic kmalloc caches, dedicating one to
> ourselves (or at least naming one for us depending upon the core) aids
> debugging our own slab usage.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

To sum up:
patches 5-11 are Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
patch 13 is Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
patch 14-17 is Acked-by: Ben Widawsky <ben@bwidawsk.net>

patches 1-17 are Tested-by: Ben Widawsky <ben@bwidawsk.net>

> ---
>  drivers/gpu/drm/i915/i915_dma.c        |    3 +++
>  drivers/gpu/drm/i915/i915_drv.h        |    4 ++++
>  drivers/gpu/drm/i915/i915_gem.c        |   28 +++++++++++++++++++++++-----
>  drivers/gpu/drm/i915/i915_gem_dmabuf.c |    5 ++---
>  drivers/gpu/drm/i915/i915_gem_stolen.c |    4 ++--
>  5 files changed, 34 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 14271aa..69969be 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1776,6 +1776,9 @@ int i915_driver_unload(struct drm_device *dev)
>  
>  	destroy_workqueue(dev_priv->wq);
>  
> +	if (dev_priv->slab)
> +		kmem_cache_destroy(dev_priv->slab);
> +
>  	pci_dev_put(dev_priv->bridge_dev);
>  	kfree(dev->dev_private);
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 5a0e0cd..5084b29 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -398,6 +398,7 @@ struct intel_gmbus {
>  
>  typedef struct drm_i915_private {
>  	struct drm_device *dev;
> +	struct kmem_cache *slab;
>  
>  	const struct intel_device_info *info;
>  
> @@ -1341,12 +1342,15 @@ int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>  int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
>  			struct drm_file *file_priv);
>  void i915_gem_load(struct drm_device *dev);
> +void *i915_gem_object_alloc(struct drm_device *dev);
> +void i915_gem_object_free(struct drm_i915_gem_object *obj);
>  int i915_gem_init_object(struct drm_gem_object *obj);
>  void i915_gem_object_init(struct drm_i915_gem_object *obj,
>  			 const struct drm_i915_gem_object_ops *ops);
>  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>  						  size_t size);
>  void i915_gem_free_object(struct drm_gem_object *obj);
> +
>  int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
>  				     uint32_t alignment,
>  				     bool map_and_fenceable,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 0349899..8183c0f 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -193,6 +193,18 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>  	return 0;
>  }
>  
> +void *i915_gem_object_alloc(struct drm_device *dev)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	return kmem_cache_alloc(dev_priv->slab, GFP_KERNEL | __GFP_ZERO);
> +}
> +
> +void i915_gem_object_free(struct drm_i915_gem_object *obj)
> +{
> +	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> +	kmem_cache_free(dev_priv->slab, obj);
> +}
> +
>  static int
>  i915_gem_create(struct drm_file *file,
>  		struct drm_device *dev,
> @@ -216,7 +228,7 @@ i915_gem_create(struct drm_file *file,
>  	if (ret) {
>  		drm_gem_object_release(&obj->base);
>  		i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
> -		kfree(obj);
> +		i915_gem_object_free(obj);
>  		return ret;
>  	}
>  
> @@ -3780,12 +3792,12 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>  	struct address_space *mapping;
>  	u32 mask;
>  
> -	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> +	obj = i915_gem_object_alloc(dev);
>  	if (obj == NULL)
>  		return NULL;
>  
>  	if (drm_gem_object_init(dev, &obj->base, size) != 0) {
> -		kfree(obj);
> +		i915_gem_object_free(obj);
>  		return NULL;
>  	}
>  
> @@ -3868,7 +3880,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  	i915_gem_info_remove_obj(dev_priv, obj->base.size);
>  
>  	kfree(obj->bit_17);
> -	kfree(obj);
> +	i915_gem_object_free(obj);
>  }
>  
>  int
> @@ -4246,8 +4258,14 @@ init_ring_lists(struct intel_ring_buffer *ring)
>  void
>  i915_gem_load(struct drm_device *dev)
>  {
> -	int i;
>  	drm_i915_private_t *dev_priv = dev->dev_private;
> +	int i;
> +
> +	dev_priv->slab =
> +		kmem_cache_create("i915_gem_object",
> +				  sizeof(struct drm_i915_gem_object), 0,
> +				  SLAB_HWCACHE_ALIGN,
> +				  NULL);
>  
>  	INIT_LIST_HEAD(&dev_priv->mm.active_list);
>  	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
> diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> index ca3497e..f307e31 100644
> --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> @@ -276,8 +276,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
>  	if (IS_ERR(attach))
>  		return ERR_CAST(attach);
>  
> -
> -	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> +	obj = i915_gem_object_alloc(dev);
>  	if (obj == NULL) {
>  		ret = -ENOMEM;
>  		goto fail_detach;
> @@ -285,7 +284,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
>  
>  	ret = drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
>  	if (ret) {
> -		kfree(obj);
> +		i915_gem_object_free(obj);
>  		goto fail_detach;
>  	}
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index 86c4af4..d7cfa71 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -261,7 +261,7 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
>  {
>  	struct drm_i915_gem_object *obj;
>  
> -	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> +	obj = i915_gem_object_alloc(dev);
>  	if (obj == NULL)
>  		return NULL;
>  
> @@ -286,7 +286,7 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
>  	return obj;
>  
>  cleanup:
> -	kfree(obj);
> +	i915_gem_object_free(obj);
>  	return NULL;
>  }
>  



-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 17/18] drm/i915: Use a slab for object allocation
  2012-11-05 18:07   ` Ben Widawsky
@ 2012-11-05 18:10     ` Ben Widawsky
  0 siblings, 0 replies; 51+ messages in thread
From: Ben Widawsky @ 2012-11-05 18:10 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: intel-gfx

On Mon, 5 Nov 2012 18:07:53 +0000
Ben Widawsky <ben@bwidawsk.net> wrote:

> On Fri, 19 Oct 2012 18:03:23 +0100
> Chris Wilson <chris@chris-wilson.co.uk> wrote:
> 
> > The primary purpose of this was to debug some use-after-free memory
> > corruption that was causing an OOPS inside drm/i915. As it turned out
> > the corruption was being caused elsewhere and i915.ko as a major user of
> > many objects was being hit hardest.
> > 
> > Indeed as we do frequent the generic kmalloc caches, dedicating one to
> > ourselves (or at least naming one for us depending upon the core) aids
> > debugging our own slab usage.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> 
> To sum up:
> patches 5-11 are Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
oops, 11 is not reviewed-by me. 11 and 12 are meh IMO.

> patch 13 is Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
> patch 14-17 is Acked-by: Ben Widawsky <ben@bwidawsk.net>
> 
> patches 1-17 are Tested-by: Ben Widawsky <ben@bwidawsk.net>
> 
> > ---
> >  drivers/gpu/drm/i915/i915_dma.c        |    3 +++
> >  drivers/gpu/drm/i915/i915_drv.h        |    4 ++++
> >  drivers/gpu/drm/i915/i915_gem.c        |   28 +++++++++++++++++++++++-----
> >  drivers/gpu/drm/i915/i915_gem_dmabuf.c |    5 ++---
> >  drivers/gpu/drm/i915/i915_gem_stolen.c |    4 ++--
> >  5 files changed, 34 insertions(+), 10 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> > index 14271aa..69969be 100644
> > --- a/drivers/gpu/drm/i915/i915_dma.c
> > +++ b/drivers/gpu/drm/i915/i915_dma.c
> > @@ -1776,6 +1776,9 @@ int i915_driver_unload(struct drm_device *dev)
> >  
> >  	destroy_workqueue(dev_priv->wq);
> >  
> > +	if (dev_priv->slab)
> > +		kmem_cache_destroy(dev_priv->slab);
> > +
> >  	pci_dev_put(dev_priv->bridge_dev);
> >  	kfree(dev->dev_private);
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 5a0e0cd..5084b29 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -398,6 +398,7 @@ struct intel_gmbus {
> >  
> >  typedef struct drm_i915_private {
> >  	struct drm_device *dev;
> > +	struct kmem_cache *slab;
> >  
> >  	const struct intel_device_info *info;
> >  
> > @@ -1341,12 +1342,15 @@ int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
> >  int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
> >  			struct drm_file *file_priv);
> >  void i915_gem_load(struct drm_device *dev);
> > +void *i915_gem_object_alloc(struct drm_device *dev);
> > +void i915_gem_object_free(struct drm_i915_gem_object *obj);
> >  int i915_gem_init_object(struct drm_gem_object *obj);
> >  void i915_gem_object_init(struct drm_i915_gem_object *obj,
> >  			 const struct drm_i915_gem_object_ops *ops);
> >  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
> >  						  size_t size);
> >  void i915_gem_free_object(struct drm_gem_object *obj);
> > +
> >  int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
> >  				     uint32_t alignment,
> >  				     bool map_and_fenceable,
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index 0349899..8183c0f 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -193,6 +193,18 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
> >  	return 0;
> >  }
> >  
> > +void *i915_gem_object_alloc(struct drm_device *dev)
> > +{
> > +	struct drm_i915_private *dev_priv = dev->dev_private;
> > +	return kmem_cache_alloc(dev_priv->slab, GFP_KERNEL | __GFP_ZERO);
> > +}
> > +
> > +void i915_gem_object_free(struct drm_i915_gem_object *obj)
> > +{
> > +	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> > +	kmem_cache_free(dev_priv->slab, obj);
> > +}
> > +
> >  static int
> >  i915_gem_create(struct drm_file *file,
> >  		struct drm_device *dev,
> > @@ -216,7 +228,7 @@ i915_gem_create(struct drm_file *file,
> >  	if (ret) {
> >  		drm_gem_object_release(&obj->base);
> >  		i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
> > -		kfree(obj);
> > +		i915_gem_object_free(obj);
> >  		return ret;
> >  	}
> >  
> > @@ -3780,12 +3792,12 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
> >  	struct address_space *mapping;
> >  	u32 mask;
> >  
> > -	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> > +	obj = i915_gem_object_alloc(dev);
> >  	if (obj == NULL)
> >  		return NULL;
> >  
> >  	if (drm_gem_object_init(dev, &obj->base, size) != 0) {
> > -		kfree(obj);
> > +		i915_gem_object_free(obj);
> >  		return NULL;
> >  	}
> >  
> > @@ -3868,7 +3880,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
> >  	i915_gem_info_remove_obj(dev_priv, obj->base.size);
> >  
> >  	kfree(obj->bit_17);
> > -	kfree(obj);
> > +	i915_gem_object_free(obj);
> >  }
> >  
> >  int
> > @@ -4246,8 +4258,14 @@ init_ring_lists(struct intel_ring_buffer *ring)
> >  void
> >  i915_gem_load(struct drm_device *dev)
> >  {
> > -	int i;
> >  	drm_i915_private_t *dev_priv = dev->dev_private;
> > +	int i;
> > +
> > +	dev_priv->slab =
> > +		kmem_cache_create("i915_gem_object",
> > +				  sizeof(struct drm_i915_gem_object), 0,
> > +				  SLAB_HWCACHE_ALIGN,
> > +				  NULL);
> >  
> >  	INIT_LIST_HEAD(&dev_priv->mm.active_list);
> >  	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
> > diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> > index ca3497e..f307e31 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> > @@ -276,8 +276,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
> >  	if (IS_ERR(attach))
> >  		return ERR_CAST(attach);
> >  
> > -
> > -	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> > +	obj = i915_gem_object_alloc(dev);
> >  	if (obj == NULL) {
> >  		ret = -ENOMEM;
> >  		goto fail_detach;
> > @@ -285,7 +284,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
> >  
> >  	ret = drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
> >  	if (ret) {
> > -		kfree(obj);
> > +		i915_gem_object_free(obj);
> >  		goto fail_detach;
> >  	}
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> > index 86c4af4..d7cfa71 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> > @@ -261,7 +261,7 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
> >  {
> >  	struct drm_i915_gem_object *obj;
> >  
> > -	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> > +	obj = i915_gem_object_alloc(dev);
> >  	if (obj == NULL)
> >  		return NULL;
> >  
> > @@ -286,7 +286,7 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
> >  	return obj;
> >  
> >  cleanup:
> > -	kfree(obj);
> > +	i915_gem_object_free(obj);
> >  	return NULL;
> >  }
> >  
> 
> 
> 



-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 17/18] drm/i915: Use a slab for object allocation
  2012-11-05 17:49   ` Ben Widawsky
@ 2012-11-05 20:57     ` Chris Wilson
  2012-11-07 13:59       ` Ben Widawsky
  0 siblings, 1 reply; 51+ messages in thread
From: Chris Wilson @ 2012-11-05 20:57 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: intel-gfx

On Mon, 5 Nov 2012 17:49:35 +0000, Ben Widawsky <ben@bwidawsk.net> wrote:
> On Fri, 19 Oct 2012 18:03:23 +0100
> Chris Wilson <chris@chris-wilson.co.uk> wrote:
> 
> > The primary purpose of this was to debug some use-after-free memory
> > corruption that was causing an OOPS inside drm/i915. As it turned out
> > the corruption was being caused elsewhere and i915.ko as a major user of
> > many objects was being hit hardest.
> > 
> > Indeed as we do frequent the generic kmalloc caches, dedicating one to
> > ourselves (or at least naming one for us depending upon the core) aids
> > debugging our own slab usage.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> 
> So I previously acked this patch, but you dropped that, so now you get
> more words.
> 
> Why not go all the way and move all allocations we do in i915 to this
> slab (adding a flag with whether or not to zero the memory first)? While
> I agree in terms of space, nothing takes up as much as objects, I think
> it would be nice to do this so we can totally track what our driver is
> doing. In fact I would suggest this is something which core drm should
> provide to all of the drivers (we already use drm_alloc calls in some
> places).

I thought the slab cache was a fixed size, so presumably you mean
introduce a slab per object we allocate? Of which the only other
interesting one is the requests. And we don't tend to have many requests
pending at any one time (outside of exceptional error cases), unlike the
thousands of bo that tend to be allocated. So is it worth dedicating a
separate slab to the few but frequently allocated requests?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 17/18] drm/i915: Use a slab for object allocation
  2012-11-05 20:57     ` Chris Wilson
@ 2012-11-07 13:59       ` Ben Widawsky
  0 siblings, 0 replies; 51+ messages in thread
From: Ben Widawsky @ 2012-11-07 13:59 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Mon, 05 Nov 2012 20:57:05 +0000
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> On Mon, 5 Nov 2012 17:49:35 +0000, Ben Widawsky <ben@bwidawsk.net> wrote:
> > On Fri, 19 Oct 2012 18:03:23 +0100
> > Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > 
> > > The primary purpose of this was to debug some use-after-free memory
> > > corruption that was causing an OOPS inside drm/i915. As it turned out
> > > the corruption was being caused elsewhere and i915.ko as a major user of
> > > many objects was being hit hardest.
> > > 
> > > Indeed as we do frequent the generic kmalloc caches, dedicating one to
> > > ourselves (or at least naming one for us depending upon the core) aids
> > > debugging our own slab usage.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> > 
> > So I previously acked this patch, but you dropped that, so now you get
> > more words.
> > 
> > Why not go all the way and move all allocations we do in i915 to this
> > slab (adding a flag with whether or not to zero the memory first)? While
> > I agree in terms of space, nothing takes up as much as objects, I think
> > it would be nice to do this so we can totally track what our driver is
> > doing. In fact I would suggest this is something which core drm should
> > provide to all of the drivers (we already use drm_alloc calls in some
> > places).
> 
> I thought the slab cache was a fixed size, so presumably you mean
> introduce a slab per object we allocate? Of which the only other
> interesting one is the requests. And we don't tend to have many requests
> pending at any one time (outside of exceptional error cases), unlike the
> thousands of bo that tend to be allocated. So is it worth dedicating a
> separate slab to the few but frequently allocated requests?
> -Chris
> 

I guess I wasn't really thinking about the details. I knew in the back
of my mind that the slab cache was a fixed size, but was thinking we
could have a generically large enough object for most of our other
things (I think they're mostly small except for dev_priv). But, now
thinking about it a bit, I guess that wastes the memory we're trying to
scrutinize, so it's probably silly.

In any case, this made me review kmem_cache_create a bit, so we can bump
this patch to:
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>

-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 51+ messages in thread

end of thread, other threads:[~2012-11-07 13:59 UTC | newest]

Thread overview: 51+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-10-19 17:03 [PATCH 01/18] drm: Introduce drm_mm_create_block() Chris Wilson
2012-10-19 17:03 ` [PATCH 02/18] drm/i915: Fix detection of stolen base for gen2 Chris Wilson
2012-11-01 23:51   ` Ben Widawsky
2012-10-19 17:03 ` [PATCH 03/18] drm/i915: Fix location of stolen memory register for SandyBridge+ Chris Wilson
2012-10-26 21:58   ` Ben Widawsky
2012-10-28  9:48     ` Chris Wilson
2012-10-28 17:52       ` Ben Widawsky
2012-11-02  0:08       ` Ben Widawsky
2012-11-02  8:54         ` Chris Wilson
2012-11-05 13:53           ` Ben Widawsky
2012-10-19 17:03 ` [PATCH 04/18] drm/i915: Avoid clearing preallocated regions from the GTT Chris Wilson
2012-10-26 22:22   ` Ben Widawsky
2012-10-19 17:03 ` [PATCH 05/18] drm: Introduce an iterator over holes in the drm_mm range manager Chris Wilson
2012-11-05 13:41   ` Ben Widawsky
2012-11-05 15:13     ` Chris Wilson
2012-10-19 17:03 ` [PATCH 06/18] drm/i915: Delay allocation of stolen space for FBC Chris Wilson
2012-11-05 13:44   ` Ben Widawsky
2012-11-05 15:24     ` Chris Wilson
2012-10-19 17:03 ` [PATCH 07/18] drm/i915: Defer allocation of stolen memory for FBC until actual first use Chris Wilson
2012-11-05 15:00   ` Ben Widawsky
2012-11-05 15:28     ` Chris Wilson
2012-11-05 15:32       ` Ben Widawsky
2012-10-19 17:03 ` [PATCH 08/18] drm/i915: Allow objects to be created with no backing pages, but stolen space Chris Wilson
2012-10-19 17:03 ` [PATCH 09/18] drm/i915: Differentiate between prime and stolen objects Chris Wilson
2012-10-19 17:03 ` [PATCH 10/18] drm/i915: Support readback of stolen objects upon error Chris Wilson
2012-11-05 15:41   ` Ben Widawsky
2012-10-19 17:03 ` [PATCH 11/18] drm/i915: Handle stolen objects in pwrite Chris Wilson
2012-10-19 17:03 ` [PATCH 12/18] drm/i915: Handle stolen objects for pread Chris Wilson
2012-10-19 17:03 ` [PATCH 13/18] drm/i915: Introduce i915_gem_object_create_stolen() Chris Wilson
2012-11-05 16:32   ` Ben Widawsky
2012-11-05 16:59     ` Chris Wilson
2012-11-05 17:34       ` Ben Widawsky
2012-10-19 17:03 ` [PATCH 14/18] drm/i915: Allocate fbcon from stolen memory Chris Wilson
2012-10-19 17:03 ` [PATCH 15/18] drm/i915: Allocate ringbuffers " Chris Wilson
2012-10-19 17:03 ` [PATCH 16/18] drm/i915: Allocate overlay registers " Chris Wilson
2012-11-05 17:39   ` Ben Widawsky
2012-10-19 17:03 ` [PATCH 17/18] drm/i915: Use a slab for object allocation Chris Wilson
2012-10-24 20:21   ` Paulo Zanoni
2012-11-05 17:49   ` Ben Widawsky
2012-11-05 20:57     ` Chris Wilson
2012-11-07 13:59       ` Ben Widawsky
2012-11-05 18:07   ` Ben Widawsky
2012-11-05 18:10     ` Ben Widawsky
2012-10-19 17:03 ` [PATCH 18/18] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl Chris Wilson
2012-10-22 22:21   ` Jesse Barnes
2012-10-23  7:20     ` Daniel Vetter
2012-10-23 17:50       ` Eric Anholt
2012-10-26 21:47 ` [PATCH 01/18] drm: Introduce drm_mm_create_block() Ben Widawsky
2012-10-28  9:57   ` Chris Wilson
2012-10-28 18:12     ` Ben Widawsky
2012-10-28 18:14       ` Ben Widawsky

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.