All of lore.kernel.org
 help / color / mirror / Atom feed
* Stolen memory, again.
@ 2012-09-04 20:02 Chris Wilson
  2012-09-04 20:02 ` [PATCH 01/24] drm/i915: Introduce drm_i915_gem_object_ops Chris Wilson
                   ` (23 more replies)
  0 siblings, 24 replies; 55+ messages in thread
From: Chris Wilson @ 2012-09-04 20:02 UTC (permalink / raw)
  To: intel-gfx

This addresses all the comments I've received so far, except for the
handling of pwrite/pread through additional obj->ops as I think the cure
was worse than the disease in that case (due to the fiddling around
required for sg iterator and tracking individual struct page operations).
-Chris

^ permalink raw reply	[flat|nested] 55+ messages in thread

* [PATCH 01/24] drm/i915: Introduce drm_i915_gem_object_ops
  2012-09-04 20:02 Stolen memory, again Chris Wilson
@ 2012-09-04 20:02 ` Chris Wilson
  2012-09-06 22:32   ` Ben Widawsky
  2012-10-11 18:28   ` Jesse Barnes
  2012-09-04 20:02 ` [PATCH 02/24] drm/i915: Pin backing pages whilst exporting through a dmabuf vmap Chris Wilson
                   ` (22 subsequent siblings)
  23 siblings, 2 replies; 55+ messages in thread
From: Chris Wilson @ 2012-09-04 20:02 UTC (permalink / raw)
  To: intel-gfx

In order to specialise functions depending upon the type of object, we
can attach vfuncs to each object via a new ->ops pointer.

For instance, this will be used in future patches to only bind pages from
a dma-buf for the duration that the object is used by the GPU - and so
prevent them from pinning those pages for the entire of the object.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h        |   12 +++++-
 drivers/gpu/drm/i915/i915_gem.c        |   71 +++++++++++++++++++++-----------
 drivers/gpu/drm/i915/i915_gem_dmabuf.c |    4 +-
 3 files changed, 60 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f16ab5e..f180874 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -898,9 +898,16 @@ enum i915_cache_level {
 	I915_CACHE_LLC_MLC, /* gen6+, in docs at least! */
 };
 
+struct drm_i915_gem_object_ops {
+	int (*get_pages)(struct drm_i915_gem_object *);
+	void (*put_pages)(struct drm_i915_gem_object *);
+};
+
 struct drm_i915_gem_object {
 	struct drm_gem_object base;
 
+	const struct drm_i915_gem_object_ops *ops;
+
 	/** Current space allocated to this object in the GTT, if any. */
 	struct drm_mm_node *gtt_space;
 	struct list_head gtt_list;
@@ -1305,7 +1312,8 @@ int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
 void i915_gem_load(struct drm_device *dev);
 int i915_gem_init_object(struct drm_gem_object *obj);
-void i915_gem_object_init(struct drm_i915_gem_object *obj);
+void i915_gem_object_init(struct drm_i915_gem_object *obj,
+			 const struct drm_i915_gem_object_ops *ops);
 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 						  size_t size);
 void i915_gem_free_object(struct drm_gem_object *obj);
@@ -1318,7 +1326,7 @@ int __must_check i915_gem_object_unbind(struct drm_i915_gem_object *obj);
 void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
 void i915_gem_lastclose(struct drm_device *dev);
 
-int __must_check i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj);
+int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
 			 struct intel_ring_buffer *to);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 87a64e5..66fbd9f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1650,18 +1650,12 @@ i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
 	return obj->madv == I915_MADV_DONTNEED;
 }
 
-static int
+static void
 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
 {
 	int page_count = obj->base.size / PAGE_SIZE;
 	int ret, i;
 
-	BUG_ON(obj->gtt_space);
-
-	if (obj->pages == NULL)
-		return 0;
-
-	BUG_ON(obj->gtt_space);
 	BUG_ON(obj->madv == __I915_MADV_PURGED);
 
 	ret = i915_gem_object_set_to_cpu_domain(obj, true);
@@ -1693,9 +1687,21 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
 
 	drm_free_large(obj->pages);
 	obj->pages = NULL;
+}
 
-	list_del(&obj->gtt_list);
+static int
+i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
+{
+	const struct drm_i915_gem_object_ops *ops = obj->ops;
+
+	if (obj->sg_table || obj->pages == NULL)
+		return 0;
+
+	BUG_ON(obj->gtt_space);
 
+	ops->put_pages(obj);
+
+	list_del(&obj->gtt_list);
 	if (i915_gem_object_is_purgeable(obj))
 		i915_gem_object_truncate(obj);
 
@@ -1712,7 +1718,7 @@ i915_gem_purge(struct drm_i915_private *dev_priv, long target)
 				 &dev_priv->mm.unbound_list,
 				 gtt_list) {
 		if (i915_gem_object_is_purgeable(obj) &&
-		    i915_gem_object_put_pages_gtt(obj) == 0) {
+		    i915_gem_object_put_pages(obj) == 0) {
 			count += obj->base.size >> PAGE_SHIFT;
 			if (count >= target)
 				return count;
@@ -1724,7 +1730,7 @@ i915_gem_purge(struct drm_i915_private *dev_priv, long target)
 				 mm_list) {
 		if (i915_gem_object_is_purgeable(obj) &&
 		    i915_gem_object_unbind(obj) == 0 &&
-		    i915_gem_object_put_pages_gtt(obj) == 0) {
+		    i915_gem_object_put_pages(obj) == 0) {
 			count += obj->base.size >> PAGE_SHIFT;
 			if (count >= target)
 				return count;
@@ -1742,10 +1748,10 @@ i915_gem_shrink_all(struct drm_i915_private *dev_priv)
 	i915_gem_evict_everything(dev_priv->dev);
 
 	list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, gtt_list)
-		i915_gem_object_put_pages_gtt(obj);
+		i915_gem_object_put_pages(obj);
 }
 
-int
+static int
 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
@@ -1754,9 +1760,6 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 	struct page *page;
 	gfp_t gfp;
 
-	if (obj->pages || obj->sg_table)
-		return 0;
-
 	/* Assert that the object is not currently in any GPU domain. As it
 	 * wasn't in the GTT, there shouldn't be any way it could have been in
 	 * a GPU cache
@@ -1806,7 +1809,6 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 	if (i915_gem_object_needs_bit17_swizzle(obj))
 		i915_gem_object_do_bit_17_swizzle(obj);
 
-	list_add_tail(&obj->gtt_list, &dev_priv->mm.unbound_list);
 	return 0;
 
 err_pages:
@@ -1818,6 +1820,24 @@ err_pages:
 	return PTR_ERR(page);
 }
 
+int
+i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+	const struct drm_i915_gem_object_ops *ops = obj->ops;
+	int ret;
+
+	if (obj->sg_table || obj->pages)
+		return 0;
+
+	ret = ops->get_pages(obj);
+	if (ret)
+		return ret;
+
+	list_add_tail(&obj->gtt_list, &dev_priv->mm.unbound_list);
+	return 0;
+}
+
 void
 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
 			       struct intel_ring_buffer *ring,
@@ -2071,7 +2091,6 @@ void i915_gem_reset(struct drm_device *dev)
 		obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
 	}
 
-
 	/* The fence registers are invalidated so clear them out */
 	i915_gem_reset_fences(dev);
 }
@@ -2871,7 +2890,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
 		return -E2BIG;
 	}
 
-	ret = i915_gem_object_get_pages_gtt(obj);
+	ret = i915_gem_object_get_pages(obj);
 	if (ret)
 		return ret;
 
@@ -3610,15 +3629,16 @@ unlock:
 	return ret;
 }
 
-void i915_gem_object_init(struct drm_i915_gem_object *obj)
+void i915_gem_object_init(struct drm_i915_gem_object *obj,
+			  const struct drm_i915_gem_object_ops *ops)
 {
-	obj->base.driver_private = NULL;
-
 	INIT_LIST_HEAD(&obj->mm_list);
 	INIT_LIST_HEAD(&obj->gtt_list);
 	INIT_LIST_HEAD(&obj->ring_list);
 	INIT_LIST_HEAD(&obj->exec_list);
 
+	obj->ops = ops;
+
 	obj->fence_reg = I915_FENCE_REG_NONE;
 	obj->madv = I915_MADV_WILLNEED;
 	/* Avoid an unnecessary call to unbind on the first bind. */
@@ -3627,6 +3647,11 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj)
 	i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
 }
 
+static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
+	.get_pages = i915_gem_object_get_pages_gtt,
+	.put_pages = i915_gem_object_put_pages_gtt,
+};
+
 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 						  size_t size)
 {
@@ -3653,7 +3678,7 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 	mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
 	mapping_set_gfp_mask(mapping, mask);
 
-	i915_gem_object_init(obj);
+	i915_gem_object_init(obj, &i915_gem_object_ops);
 
 	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
 	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
@@ -3711,7 +3736,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 		dev_priv->mm.interruptible = was_interruptible;
 	}
 
-	i915_gem_object_put_pages_gtt(obj);
+	i915_gem_object_put_pages(obj);
 	i915_gem_object_free_mmap_offset(obj);
 
 	drm_gem_object_release(&obj->base);
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 43c9530..e4f1141 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -41,7 +41,7 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme
 	if (ret)
 		return ERR_PTR(ret);
 
-	ret = i915_gem_object_get_pages_gtt(obj);
+	ret = i915_gem_object_get_pages(obj);
 	if (ret) {
 		sg = ERR_PTR(ret);
 		goto out;
@@ -89,7 +89,7 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
 		goto out_unlock;
 	}
 
-	ret = i915_gem_object_get_pages_gtt(obj);
+	ret = i915_gem_object_get_pages(obj);
 	if (ret) {
 		mutex_unlock(&dev->struct_mutex);
 		return ERR_PTR(ret);
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 02/24] drm/i915: Pin backing pages whilst exporting through a dmabuf vmap
  2012-09-04 20:02 Stolen memory, again Chris Wilson
  2012-09-04 20:02 ` [PATCH 01/24] drm/i915: Introduce drm_i915_gem_object_ops Chris Wilson
@ 2012-09-04 20:02 ` Chris Wilson
  2012-09-06 22:55   ` Ben Widawsky
  2012-10-11 18:30   ` Jesse Barnes
  2012-09-04 20:02 ` [PATCH 03/24] drm/i915: Pin backing pages for pwrite Chris Wilson
                   ` (21 subsequent siblings)
  23 siblings, 2 replies; 55+ messages in thread
From: Chris Wilson @ 2012-09-04 20:02 UTC (permalink / raw)
  To: intel-gfx

We need to refcount our pages in order to prevent reaping them at
inopportune times, such as when they currently vmapped or exported to
another driver. However, we also wish to keep the lazy deallocation of
our pages so we need to take a pin/unpinned approach rather than a
simple refcount.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h        |   12 ++++++++++++
 drivers/gpu/drm/i915/i915_gem.c        |   11 +++++++++--
 drivers/gpu/drm/i915/i915_gem_dmabuf.c |    8 ++++++--
 3 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f180874..0747472 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -994,6 +994,7 @@ struct drm_i915_gem_object {
 	unsigned int has_global_gtt_mapping:1;
 
 	struct page **pages;
+	int pages_pin_count;
 
 	/**
 	 * DMAR support
@@ -1327,6 +1328,17 @@ void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
 void i915_gem_lastclose(struct drm_device *dev);
 
 int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
+static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
+{
+	BUG_ON(obj->pages == NULL);
+	obj->pages_pin_count++;
+}
+static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
+{
+	BUG_ON(obj->pages_pin_count == 0);
+	obj->pages_pin_count--;
+}
+
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
 			 struct intel_ring_buffer *to);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 66fbd9f..aa088ef 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1699,6 +1699,9 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
 
 	BUG_ON(obj->gtt_space);
 
+	if (obj->pages_pin_count)
+		return -EBUSY;
+
 	ops->put_pages(obj);
 
 	list_del(&obj->gtt_list);
@@ -1830,6 +1833,8 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 	if (obj->sg_table || obj->pages)
 		return 0;
 
+	BUG_ON(obj->pages_pin_count);
+
 	ret = ops->get_pages(obj);
 	if (ret)
 		return ret;
@@ -3736,6 +3741,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 		dev_priv->mm.interruptible = was_interruptible;
 	}
 
+	obj->pages_pin_count = 0;
 	i915_gem_object_put_pages(obj);
 	i915_gem_object_free_mmap_offset(obj);
 
@@ -4395,9 +4401,10 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
 
 	cnt = 0;
 	list_for_each_entry(obj, &dev_priv->mm.unbound_list, gtt_list)
-		cnt += obj->base.size >> PAGE_SHIFT;
+		if (obj->pages_pin_count == 0)
+			cnt += obj->base.size >> PAGE_SHIFT;
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list)
-		if (obj->pin_count == 0)
+		if (obj->pin_count == 0 && obj->pages_pin_count == 0)
 			cnt += obj->base.size >> PAGE_SHIFT;
 
 	mutex_unlock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index e4f1141..eca4726 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -50,6 +50,8 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme
 	/* link the pages into an SG then map the sg */
 	sg = drm_prime_pages_to_sg(obj->pages, npages);
 	nents = dma_map_sg(attachment->dev, sg->sgl, sg->nents, dir);
+	i915_gem_object_pin_pages(obj);
+
 out:
 	mutex_unlock(&dev->struct_mutex);
 	return sg;
@@ -102,6 +104,7 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
 	}
 
 	obj->vmapping_count = 1;
+	i915_gem_object_pin_pages(obj);
 out_unlock:
 	mutex_unlock(&dev->struct_mutex);
 	return obj->dma_buf_vmapping;
@@ -117,10 +120,11 @@ static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr)
 	if (ret)
 		return;
 
-	--obj->vmapping_count;
-	if (obj->vmapping_count == 0) {
+	if (--obj->vmapping_count == 0) {
 		vunmap(obj->dma_buf_vmapping);
 		obj->dma_buf_vmapping = NULL;
+
+		i915_gem_object_unpin_pages(obj);
 	}
 	mutex_unlock(&dev->struct_mutex);
 }
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 03/24] drm/i915: Pin backing pages for pwrite
  2012-09-04 20:02 Stolen memory, again Chris Wilson
  2012-09-04 20:02 ` [PATCH 01/24] drm/i915: Introduce drm_i915_gem_object_ops Chris Wilson
  2012-09-04 20:02 ` [PATCH 02/24] drm/i915: Pin backing pages whilst exporting through a dmabuf vmap Chris Wilson
@ 2012-09-04 20:02 ` Chris Wilson
  2012-09-07  0:07   ` Ben Widawsky
  2012-10-11 18:31   ` Jesse Barnes
  2012-09-04 20:02 ` [PATCH 04/24] drm/i915: Pin backing pages for pread Chris Wilson
                   ` (20 subsequent siblings)
  23 siblings, 2 replies; 55+ messages in thread
From: Chris Wilson @ 2012-09-04 20:02 UTC (permalink / raw)
  To: intel-gfx

By using the recently introduced pinning of pages, we can safely drop
the mutex in the knowledge that the pages are not going to disappear
beneath us, and so we can simplify the code for iterating over the pages.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c |   37 +++++++++++++------------------------
 1 file changed, 13 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index aa088ef..8a4eac0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -690,7 +690,7 @@ shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
 				       page_length);
 	kunmap_atomic(vaddr);
 
-	return ret;
+	return ret ? -EFAULT : 0;
 }
 
 /* Only difference to the fast-path function is that this can handle bit17
@@ -724,7 +724,7 @@ shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
 					     page_do_bit17_swizzling);
 	kunmap(page);
 
-	return ret;
+	return ret ? -EFAULT : 0;
 }
 
 static int
@@ -733,7 +733,6 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 		      struct drm_i915_gem_pwrite *args,
 		      struct drm_file *file)
 {
-	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
 	ssize_t remain;
 	loff_t offset;
 	char __user *user_data;
@@ -742,7 +741,6 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 	int hit_slowpath = 0;
 	int needs_clflush_after = 0;
 	int needs_clflush_before = 0;
-	int release_page;
 
 	user_data = (char __user *) (uintptr_t) args->data_ptr;
 	remain = args->size;
@@ -768,6 +766,12 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 	    && obj->cache_level == I915_CACHE_NONE)
 		needs_clflush_before = 1;
 
+	ret = i915_gem_object_get_pages(obj);
+	if (ret)
+		return ret;
+
+	i915_gem_object_pin_pages(obj);
+
 	offset = args->offset;
 	obj->dirty = 1;
 
@@ -793,18 +797,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 			((shmem_page_offset | page_length)
 				& (boot_cpu_data.x86_clflush_size - 1));
 
-		if (obj->pages) {
-			page = obj->pages[offset >> PAGE_SHIFT];
-			release_page = 0;
-		} else {
-			page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
-			if (IS_ERR(page)) {
-				ret = PTR_ERR(page);
-				goto out;
-			}
-			release_page = 1;
-		}
-
+		page = obj->pages[offset >> PAGE_SHIFT];
 		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
 			(page_to_phys(page) & (1 << 17)) != 0;
 
@@ -816,26 +809,20 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 			goto next_page;
 
 		hit_slowpath = 1;
-		page_cache_get(page);
 		mutex_unlock(&dev->struct_mutex);
-
 		ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
 					user_data, page_do_bit17_swizzling,
 					partial_cacheline_write,
 					needs_clflush_after);
 
 		mutex_lock(&dev->struct_mutex);
-		page_cache_release(page);
+
 next_page:
 		set_page_dirty(page);
 		mark_page_accessed(page);
-		if (release_page)
-			page_cache_release(page);
 
-		if (ret) {
-			ret = -EFAULT;
+		if (ret)
 			goto out;
-		}
 
 		remain -= page_length;
 		user_data += page_length;
@@ -843,6 +830,8 @@ next_page:
 	}
 
 out:
+	i915_gem_object_unpin_pages(obj);
+
 	if (hit_slowpath) {
 		/* Fixup: Kill any reinstated backing storage pages */
 		if (obj->madv == __I915_MADV_PURGED)
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 04/24] drm/i915: Pin backing pages for pread
  2012-09-04 20:02 Stolen memory, again Chris Wilson
                   ` (2 preceding siblings ...)
  2012-09-04 20:02 ` [PATCH 03/24] drm/i915: Pin backing pages for pwrite Chris Wilson
@ 2012-09-04 20:02 ` Chris Wilson
  2012-09-07  0:10   ` Ben Widawsky
  2012-09-04 20:02 ` [PATCH 05/24] drm/i915: Replace the array of pages with a scatterlist Chris Wilson
                   ` (19 subsequent siblings)
  23 siblings, 1 reply; 55+ messages in thread
From: Chris Wilson @ 2012-09-04 20:02 UTC (permalink / raw)
  To: intel-gfx

By using the recently introduced pinning of pages, we can safely drop
the mutex in the knowledge that the pages are not going to disappear
beneath us, and so we can simplify the code for iterating over the pages.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c |   36 +++++++++++++-----------------------
 1 file changed, 13 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8a4eac0..171bc51 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -343,7 +343,7 @@ shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
 				      page_length);
 	kunmap_atomic(vaddr);
 
-	return ret;
+	return ret ? -EFAULT : 0;
 }
 
 static void
@@ -394,7 +394,7 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
 				     page_length);
 	kunmap(page);
 
-	return ret;
+	return ret ? - EFAULT : 0;
 }
 
 static int
@@ -403,7 +403,6 @@ i915_gem_shmem_pread(struct drm_device *dev,
 		     struct drm_i915_gem_pread *args,
 		     struct drm_file *file)
 {
-	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
 	char __user *user_data;
 	ssize_t remain;
 	loff_t offset;
@@ -412,7 +411,6 @@ i915_gem_shmem_pread(struct drm_device *dev,
 	int hit_slowpath = 0;
 	int prefaulted = 0;
 	int needs_clflush = 0;
-	int release_page;
 
 	user_data = (char __user *) (uintptr_t) args->data_ptr;
 	remain = args->size;
@@ -433,6 +431,12 @@ i915_gem_shmem_pread(struct drm_device *dev,
 		}
 	}
 
+	ret = i915_gem_object_get_pages(obj);
+	if (ret)
+		return ret;
+
+	i915_gem_object_pin_pages(obj);
+
 	offset = args->offset;
 
 	while (remain > 0) {
@@ -448,18 +452,7 @@ i915_gem_shmem_pread(struct drm_device *dev,
 		if ((shmem_page_offset + page_length) > PAGE_SIZE)
 			page_length = PAGE_SIZE - shmem_page_offset;
 
-		if (obj->pages) {
-			page = obj->pages[offset >> PAGE_SHIFT];
-			release_page = 0;
-		} else {
-			page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
-			if (IS_ERR(page)) {
-				ret = PTR_ERR(page);
-				goto out;
-			}
-			release_page = 1;
-		}
-
+		page = obj->pages[offset >> PAGE_SHIFT];
 		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
 			(page_to_phys(page) & (1 << 17)) != 0;
 
@@ -470,7 +463,6 @@ i915_gem_shmem_pread(struct drm_device *dev,
 			goto next_page;
 
 		hit_slowpath = 1;
-		page_cache_get(page);
 		mutex_unlock(&dev->struct_mutex);
 
 		if (!prefaulted) {
@@ -488,16 +480,12 @@ i915_gem_shmem_pread(struct drm_device *dev,
 				       needs_clflush);
 
 		mutex_lock(&dev->struct_mutex);
-		page_cache_release(page);
+
 next_page:
 		mark_page_accessed(page);
-		if (release_page)
-			page_cache_release(page);
 
-		if (ret) {
-			ret = -EFAULT;
+		if (ret)
 			goto out;
-		}
 
 		remain -= page_length;
 		user_data += page_length;
@@ -505,6 +493,8 @@ next_page:
 	}
 
 out:
+	i915_gem_object_unpin_pages(obj);
+
 	if (hit_slowpath) {
 		/* Fixup: Kill any reinstated backing storage pages */
 		if (obj->madv == __I915_MADV_PURGED)
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 05/24] drm/i915: Replace the array of pages with a scatterlist
  2012-09-04 20:02 Stolen memory, again Chris Wilson
                   ` (3 preceding siblings ...)
  2012-09-04 20:02 ` [PATCH 04/24] drm/i915: Pin backing pages for pread Chris Wilson
@ 2012-09-04 20:02 ` Chris Wilson
  2012-09-07  1:49   ` Ben Widawsky
  2012-09-04 20:02 ` [PATCH 06/24] drm/i915: Convert the dmabuf object to use the new i915_gem_object_ops Chris Wilson
                   ` (18 subsequent siblings)
  23 siblings, 1 reply; 55+ messages in thread
From: Chris Wilson @ 2012-09-04 20:02 UTC (permalink / raw)
  To: intel-gfx

Rather than have multiple data structures for describing our page layout
in conjunction with the array of pages, we can migrate all users over to
a scatterlist.

One major advantage, other than unifying the page tracking structures,
this offers is that we replace the vmalloc'ed array (which can be up to
a megabyte in size) with a chain of individual pages which helps reduce
memory pressure.

The disadvantage is that we then do not have a simple array to iterate,
or to access randomly. The common case for this is in the relocation
processing, which will typically fit within a single scatterlist page
and so be almost the same cost as the simple array. For iterating over
the array, the extra function call could be optimised away, but in
reality is an insignificant cost of either binding the pages, or
performing the pwrite/pread.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/char/agp/intel-gtt.c               |   51 +++++-------
 drivers/gpu/drm/drm_cache.c                |   23 ++++++
 drivers/gpu/drm/i915/i915_drv.h            |   18 +++--
 drivers/gpu/drm/i915/i915_gem.c            |   79 ++++++++++++------
 drivers/gpu/drm/i915/i915_gem_dmabuf.c     |   99 +++++++++++++++--------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |    3 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c        |  121 ++++++----------------------
 drivers/gpu/drm/i915/i915_gem_tiling.c     |   16 ++--
 drivers/gpu/drm/i915/i915_irq.c            |   25 +++---
 drivers/gpu/drm/i915/intel_ringbuffer.c    |    9 ++-
 include/drm/drmP.h                         |    1 +
 include/drm/intel-gtt.h                    |   10 +--
 12 files changed, 236 insertions(+), 219 deletions(-)

diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index 58e32f7..511d1b1 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -84,40 +84,33 @@ static struct _intel_private {
 #define IS_IRONLAKE	intel_private.driver->is_ironlake
 #define HAS_PGTBL_EN	intel_private.driver->has_pgtbl_enable
 
-int intel_gtt_map_memory(struct page **pages, unsigned int num_entries,
-			 struct scatterlist **sg_list, int *num_sg)
+static int intel_gtt_map_memory(struct page **pages,
+				unsigned int num_entries,
+				struct sg_table *st)
 {
-	struct sg_table st;
 	struct scatterlist *sg;
 	int i;
 
-	if (*sg_list)
-		return 0; /* already mapped (for e.g. resume */
-
 	DBG("try mapping %lu pages\n", (unsigned long)num_entries);
 
-	if (sg_alloc_table(&st, num_entries, GFP_KERNEL))
+	if (sg_alloc_table(st, num_entries, GFP_KERNEL))
 		goto err;
 
-	*sg_list = sg = st.sgl;
-
-	for (i = 0 ; i < num_entries; i++, sg = sg_next(sg))
+	for_each_sg(st->sgl, sg, num_entries,i)
 		sg_set_page(sg, pages[i], PAGE_SIZE, 0);
 
-	*num_sg = pci_map_sg(intel_private.pcidev, *sg_list,
-				 num_entries, PCI_DMA_BIDIRECTIONAL);
-	if (unlikely(!*num_sg))
+	if (!pci_map_sg(intel_private.pcidev,
+			st->sgl, st->nents, PCI_DMA_BIDIRECTIONAL))
 		goto err;
 
 	return 0;
 
 err:
-	sg_free_table(&st);
+	sg_free_table(st);
 	return -ENOMEM;
 }
-EXPORT_SYMBOL(intel_gtt_map_memory);
 
-void intel_gtt_unmap_memory(struct scatterlist *sg_list, int num_sg)
+static void intel_gtt_unmap_memory(struct scatterlist *sg_list, int num_sg)
 {
 	struct sg_table st;
 	DBG("try unmapping %lu pages\n", (unsigned long)mem->page_count);
@@ -130,7 +123,6 @@ void intel_gtt_unmap_memory(struct scatterlist *sg_list, int num_sg)
 
 	sg_free_table(&st);
 }
-EXPORT_SYMBOL(intel_gtt_unmap_memory);
 
 static void intel_fake_agp_enable(struct agp_bridge_data *bridge, u32 mode)
 {
@@ -879,8 +871,7 @@ static bool i830_check_flags(unsigned int flags)
 	return false;
 }
 
-void intel_gtt_insert_sg_entries(struct scatterlist *sg_list,
-				 unsigned int sg_len,
+void intel_gtt_insert_sg_entries(struct sg_table *st,
 				 unsigned int pg_start,
 				 unsigned int flags)
 {
@@ -892,12 +883,11 @@ void intel_gtt_insert_sg_entries(struct scatterlist *sg_list,
 
 	/* sg may merge pages, but we have to separate
 	 * per-page addr for GTT */
-	for_each_sg(sg_list, sg, sg_len, i) {
+	for_each_sg(st->sgl, sg, st->nents, i) {
 		len = sg_dma_len(sg) >> PAGE_SHIFT;
 		for (m = 0; m < len; m++) {
 			dma_addr_t addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
-			intel_private.driver->write_entry(addr,
-							  j, flags);
+			intel_private.driver->write_entry(addr, j, flags);
 			j++;
 		}
 	}
@@ -905,8 +895,10 @@ void intel_gtt_insert_sg_entries(struct scatterlist *sg_list,
 }
 EXPORT_SYMBOL(intel_gtt_insert_sg_entries);
 
-void intel_gtt_insert_pages(unsigned int first_entry, unsigned int num_entries,
-			    struct page **pages, unsigned int flags)
+static void intel_gtt_insert_pages(unsigned int first_entry,
+				   unsigned int num_entries,
+				   struct page **pages,
+				   unsigned int flags)
 {
 	int i, j;
 
@@ -917,7 +909,6 @@ void intel_gtt_insert_pages(unsigned int first_entry, unsigned int num_entries,
 	}
 	readl(intel_private.gtt+j-1);
 }
-EXPORT_SYMBOL(intel_gtt_insert_pages);
 
 static int intel_fake_agp_insert_entries(struct agp_memory *mem,
 					 off_t pg_start, int type)
@@ -953,13 +944,15 @@ static int intel_fake_agp_insert_entries(struct agp_memory *mem,
 		global_cache_flush();
 
 	if (intel_private.base.needs_dmar) {
-		ret = intel_gtt_map_memory(mem->pages, mem->page_count,
-					   &mem->sg_list, &mem->num_sg);
+		struct sg_table st;
+
+		ret = intel_gtt_map_memory(mem->pages, mem->page_count, &st);
 		if (ret != 0)
 			return ret;
 
-		intel_gtt_insert_sg_entries(mem->sg_list, mem->num_sg,
-					    pg_start, type);
+		intel_gtt_insert_sg_entries(&st, pg_start, type);
+		mem->sg_list = st.sgl;
+		mem->num_sg = st.nents;
 	} else
 		intel_gtt_insert_pages(pg_start, mem->page_count, mem->pages,
 				       type);
diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
index 08758e0..628a2e0 100644
--- a/drivers/gpu/drm/drm_cache.c
+++ b/drivers/gpu/drm/drm_cache.c
@@ -100,6 +100,29 @@ drm_clflush_pages(struct page *pages[], unsigned long num_pages)
 EXPORT_SYMBOL(drm_clflush_pages);
 
 void
+drm_clflush_sg(struct sg_table *st)
+{
+#if defined(CONFIG_X86)
+	if (cpu_has_clflush) {
+		struct scatterlist *sg;
+		int i;
+
+		mb();
+		for_each_sg(st->sgl, sg, st->nents, i)
+			drm_clflush_page(sg_page(sg));
+		mb();
+	}
+
+	if (on_each_cpu(drm_clflush_ipi_handler, NULL, 1) != 0)
+		printk(KERN_ERR "Timed out waiting for cache flush.\n");
+#else
+	printk(KERN_ERR "Architecture has no drm_cache.c support\n");
+	WARN_ON_ONCE(1);
+#endif
+}
+EXPORT_SYMBOL(drm_clflush_sg);
+
+void
 drm_clflush_virt_range(char *addr, unsigned long length)
 {
 #if defined(CONFIG_X86)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0747472..1a714fa 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -992,16 +992,11 @@ struct drm_i915_gem_object {
 
 	unsigned int has_aliasing_ppgtt_mapping:1;
 	unsigned int has_global_gtt_mapping:1;
+	unsigned int has_dma_mapping:1;
 
-	struct page **pages;
+	struct sg_table *pages;
 	int pages_pin_count;
 
-	/**
-	 * DMAR support
-	 */
-	struct scatterlist *sg_list;
-	int num_sg;
-
 	/* prime dma-buf support */
 	struct sg_table *sg_table;
 	void *dma_buf_vmapping;
@@ -1328,6 +1323,15 @@ void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
 void i915_gem_lastclose(struct drm_device *dev);
 
 int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
+static inline struct page *i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n)
+{
+	struct scatterlist *sg = obj->pages->sgl;
+	while (n >= SG_MAX_SINGLE_ALLOC) {
+		sg = sg_chain_ptr(sg + SG_MAX_SINGLE_ALLOC - 1);
+		n -= SG_MAX_SINGLE_ALLOC - 1;
+	}
+	return sg_page(sg+n);
+}
 static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
 {
 	BUG_ON(obj->pages == NULL);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 171bc51..06589a9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -411,6 +411,8 @@ i915_gem_shmem_pread(struct drm_device *dev,
 	int hit_slowpath = 0;
 	int prefaulted = 0;
 	int needs_clflush = 0;
+	struct scatterlist *sg;
+	int i;
 
 	user_data = (char __user *) (uintptr_t) args->data_ptr;
 	remain = args->size;
@@ -439,9 +441,15 @@ i915_gem_shmem_pread(struct drm_device *dev,
 
 	offset = args->offset;
 
-	while (remain > 0) {
+	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
 		struct page *page;
 
+		if (i < offset >> PAGE_SHIFT)
+			continue;
+
+		if (remain <= 0)
+			break;
+
 		/* Operation in this page
 		 *
 		 * shmem_page_offset = offset within page in shmem file
@@ -452,7 +460,7 @@ i915_gem_shmem_pread(struct drm_device *dev,
 		if ((shmem_page_offset + page_length) > PAGE_SIZE)
 			page_length = PAGE_SIZE - shmem_page_offset;
 
-		page = obj->pages[offset >> PAGE_SHIFT];
+		page = sg_page(sg);
 		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
 			(page_to_phys(page) & (1 << 17)) != 0;
 
@@ -731,6 +739,8 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 	int hit_slowpath = 0;
 	int needs_clflush_after = 0;
 	int needs_clflush_before = 0;
+	int i;
+	struct scatterlist *sg;
 
 	user_data = (char __user *) (uintptr_t) args->data_ptr;
 	remain = args->size;
@@ -765,10 +775,16 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 	offset = args->offset;
 	obj->dirty = 1;
 
-	while (remain > 0) {
+	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
 		struct page *page;
 		int partial_cacheline_write;
 
+		if (i < offset >> PAGE_SHIFT)
+			continue;
+
+		if (remain <= 0)
+			break;
+
 		/* Operation in this page
 		 *
 		 * shmem_page_offset = offset within page in shmem file
@@ -787,7 +803,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 			((shmem_page_offset | page_length)
 				& (boot_cpu_data.x86_clflush_size - 1));
 
-		page = obj->pages[offset >> PAGE_SHIFT];
+		page = sg_page(sg);
 		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
 			(page_to_phys(page) & (1 << 17)) != 0;
 
@@ -1633,6 +1649,7 @@ static void
 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
 {
 	int page_count = obj->base.size / PAGE_SIZE;
+	struct scatterlist *sg;
 	int ret, i;
 
 	BUG_ON(obj->madv == __I915_MADV_PURGED);
@@ -1653,19 +1670,21 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
 	if (obj->madv == I915_MADV_DONTNEED)
 		obj->dirty = 0;
 
-	for (i = 0; i < page_count; i++) {
+	for_each_sg(obj->pages->sgl, sg, page_count, i) {
+		struct page *page = sg_page(sg);
+
 		if (obj->dirty)
-			set_page_dirty(obj->pages[i]);
+			set_page_dirty(page);
 
 		if (obj->madv == I915_MADV_WILLNEED)
-			mark_page_accessed(obj->pages[i]);
+			mark_page_accessed(page);
 
-		page_cache_release(obj->pages[i]);
+		page_cache_release(page);
 	}
 	obj->dirty = 0;
 
-	drm_free_large(obj->pages);
-	obj->pages = NULL;
+	sg_free_table(obj->pages);
+	kfree(obj->pages);
 }
 
 static int
@@ -1682,6 +1701,7 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
 		return -EBUSY;
 
 	ops->put_pages(obj);
+	obj->pages = NULL;
 
 	list_del(&obj->gtt_list);
 	if (i915_gem_object_is_purgeable(obj))
@@ -1739,6 +1759,8 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
 	int page_count, i;
 	struct address_space *mapping;
+	struct sg_table *st;
+	struct scatterlist *sg;
 	struct page *page;
 	gfp_t gfp;
 
@@ -1749,20 +1771,27 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 	BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
 	BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
 
-	/* Get the list of pages out of our struct file.  They'll be pinned
-	 * at this point until we release them.
-	 */
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (st == NULL)
+		return -ENOMEM;
+
 	page_count = obj->base.size / PAGE_SIZE;
-	obj->pages = drm_malloc_ab(page_count, sizeof(struct page *));
-	if (obj->pages == NULL)
+	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
+		sg_free_table(st);
+		kfree(st);
 		return -ENOMEM;
+	}
 
-	/* Fail silently without starting the shrinker */
+	/* Get the list of pages out of our struct file.  They'll be pinned
+	 * at this point until we release them.
+	 *
+	 * Fail silently without starting the shrinker
+	 */
 	mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
 	gfp = mapping_gfp_mask(mapping);
 	gfp |= __GFP_NORETRY | __GFP_NOWARN;
 	gfp &= ~(__GFP_IO | __GFP_WAIT);
-	for (i = 0; i < page_count; i++) {
+	for_each_sg(st->sgl, sg, page_count, i) {
 		page = shmem_read_mapping_page_gfp(mapping, i, gfp);
 		if (IS_ERR(page)) {
 			i915_gem_purge(dev_priv, page_count);
@@ -1785,20 +1814,20 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 			gfp &= ~(__GFP_IO | __GFP_WAIT);
 		}
 
-		obj->pages[i] = page;
+		sg_set_page(sg, page, PAGE_SIZE, 0);
 	}
 
 	if (i915_gem_object_needs_bit17_swizzle(obj))
 		i915_gem_object_do_bit_17_swizzle(obj);
 
+	obj->pages = st;
 	return 0;
 
 err_pages:
-	while (i--)
-		page_cache_release(obj->pages[i]);
-
-	drm_free_large(obj->pages);
-	obj->pages = NULL;
+	for_each_sg(st->sgl, sg, i, page_count)
+		page_cache_release(sg_page(sg));
+	sg_free_table(st);
+	kfree(st);
 	return PTR_ERR(page);
 }
 
@@ -2974,7 +3003,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj)
 
 	trace_i915_gem_object_clflush(obj);
 
-	drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE);
+	drm_clflush_sg(obj->pages);
 }
 
 /** Flushes the GTT write domain for the object if it's dirty. */
@@ -3724,6 +3753,8 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	i915_gem_object_put_pages(obj);
 	i915_gem_object_free_mmap_offset(obj);
 
+	BUG_ON(obj->pages);
+
 	drm_gem_object_release(&obj->base);
 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index eca4726..4bb1b94 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -28,33 +28,57 @@
 #include <linux/dma-buf.h>
 
 static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachment,
-				      enum dma_data_direction dir)
+					     enum dma_data_direction dir)
 {
 	struct drm_i915_gem_object *obj = attachment->dmabuf->priv;
-	struct drm_device *dev = obj->base.dev;
-	int npages = obj->base.size / PAGE_SIZE;
-	struct sg_table *sg;
-	int ret;
-	int nents;
+	struct sg_table *st;
+	struct scatterlist *src, *dst;
+	int ret, i;
 
-	ret = i915_mutex_lock_interruptible(dev);
+	ret = i915_mutex_lock_interruptible(obj->base.dev);
 	if (ret)
 		return ERR_PTR(ret);
 
 	ret = i915_gem_object_get_pages(obj);
 	if (ret) {
-		sg = ERR_PTR(ret);
+		st = ERR_PTR(ret);
+		goto out;
+	}
+
+	/* Copy sg so that we make an independent mapping */
+	st = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
+	if (st == NULL) {
+		st = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	ret = sg_alloc_table(st, obj->pages->nents, GFP_KERNEL);
+	if (ret) {
+		kfree(st);
+		st = ERR_PTR(ret);
+		goto out;
+	}
+
+	src = obj->pages->sgl;
+	dst = st->sgl;
+	for (i = 0; i < obj->pages->nents; i++) {
+		sg_set_page(dst, sg_page(src), PAGE_SIZE, 0);
+		dst = sg_next(dst);
+		src = sg_next(src);
+	}
+
+	if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) {
+		sg_free_table(st);
+		kfree(st);
+		st = ERR_PTR(-ENOMEM);
 		goto out;
 	}
 
-	/* link the pages into an SG then map the sg */
-	sg = drm_prime_pages_to_sg(obj->pages, npages);
-	nents = dma_map_sg(attachment->dev, sg->sgl, sg->nents, dir);
 	i915_gem_object_pin_pages(obj);
 
 out:
-	mutex_unlock(&dev->struct_mutex);
-	return sg;
+	mutex_unlock(&obj->base.dev->struct_mutex);
+	return st;
 }
 
 static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment,
@@ -80,7 +104,9 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
 {
 	struct drm_i915_gem_object *obj = dma_buf->priv;
 	struct drm_device *dev = obj->base.dev;
-	int ret;
+	struct scatterlist *sg;
+	struct page **pages;
+	int ret, i;
 
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
@@ -92,22 +118,33 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
 	}
 
 	ret = i915_gem_object_get_pages(obj);
-	if (ret) {
-		mutex_unlock(&dev->struct_mutex);
-		return ERR_PTR(ret);
-	}
+	if (ret)
+		goto error;
 
-	obj->dma_buf_vmapping = vmap(obj->pages, obj->base.size / PAGE_SIZE, 0, PAGE_KERNEL);
-	if (!obj->dma_buf_vmapping) {
-		DRM_ERROR("failed to vmap object\n");
-		goto out_unlock;
-	}
+	ret = -ENOMEM;
+
+	pages = drm_malloc_ab(obj->pages->nents, sizeof(struct page *));
+	if (pages == NULL)
+		goto error;
+
+	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i)
+		pages[i] = sg_page(sg);
+
+	obj->dma_buf_vmapping = vmap(pages, obj->pages->nents, 0, PAGE_KERNEL);
+	drm_free_large(pages);
+
+	if (!obj->dma_buf_vmapping)
+		goto error;
 
 	obj->vmapping_count = 1;
 	i915_gem_object_pin_pages(obj);
 out_unlock:
 	mutex_unlock(&dev->struct_mutex);
 	return obj->dma_buf_vmapping;
+
+error:
+	mutex_unlock(&dev->struct_mutex);
+	return ERR_PTR(ret);
 }
 
 static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr)
@@ -184,22 +221,19 @@ static const struct dma_buf_ops i915_dmabuf_ops =  {
 };
 
 struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
-				struct drm_gem_object *gem_obj, int flags)
+				      struct drm_gem_object *gem_obj, int flags)
 {
 	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
 
-	return dma_buf_export(obj, &i915_dmabuf_ops,
-						  obj->base.size, 0600);
+	return dma_buf_export(obj, &i915_dmabuf_ops, obj->base.size, 0600);
 }
 
 struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
-				struct dma_buf *dma_buf)
+					     struct dma_buf *dma_buf)
 {
 	struct dma_buf_attachment *attach;
 	struct sg_table *sg;
 	struct drm_i915_gem_object *obj;
-	int npages;
-	int size;
 	int ret;
 
 	/* is this one of own objects? */
@@ -223,21 +257,19 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
 		goto fail_detach;
 	}
 
-	size = dma_buf->size;
-	npages = size / PAGE_SIZE;
-
 	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
 	if (obj == NULL) {
 		ret = -ENOMEM;
 		goto fail_unmap;
 	}
 
-	ret = drm_gem_private_object_init(dev, &obj->base, size);
+	ret = drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
 	if (ret) {
 		kfree(obj);
 		goto fail_unmap;
 	}
 
+	obj->has_dma_mapping = true;
 	obj->sg_table = sg;
 	obj->base.import_attach = attach;
 
@@ -249,3 +281,4 @@ fail_detach:
 	dma_buf_detach(dma_buf, attach);
 	return ERR_PTR(ret);
 }
+
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index e6b2205..4ab0083 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -210,7 +210,8 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 		if (ret)
 			return ret;
 
-		vaddr = kmap_atomic(obj->pages[reloc->offset >> PAGE_SHIFT]);
+		vaddr = kmap_atomic(i915_gem_object_get_page(obj,
+							     reloc->offset >> PAGE_SHIFT));
 		*(uint32_t *)(vaddr + page_offset) = reloc->delta;
 		kunmap_atomic(vaddr);
 	} else {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 1847731..6746109 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -167,8 +167,7 @@ void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
 }
 
 static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt,
-					 struct scatterlist *sg_list,
-					 unsigned sg_len,
+					 const struct sg_table *pages,
 					 unsigned first_entry,
 					 uint32_t pte_flags)
 {
@@ -180,12 +179,12 @@ static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt,
 	struct scatterlist *sg;
 
 	/* init sg walking */
-	sg = sg_list;
+	sg = pages->sgl;
 	i = 0;
 	segment_len = sg_dma_len(sg) >> PAGE_SHIFT;
 	m = 0;
 
-	while (i < sg_len) {
+	while (i < pages->nents) {
 		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]);
 
 		for (j = first_pte; j < I915_PPGTT_PT_ENTRIES; j++) {
@@ -194,13 +193,11 @@ static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt,
 			pt_vaddr[j] = pte | pte_flags;
 
 			/* grab the next page */
-			m++;
-			if (m == segment_len) {
-				sg = sg_next(sg);
-				i++;
-				if (i == sg_len)
+			if (++m == segment_len) {
+				if (++i == pages->nents)
 					break;
 
+				sg = sg_next(sg);
 				segment_len = sg_dma_len(sg) >> PAGE_SHIFT;
 				m = 0;
 			}
@@ -213,44 +210,10 @@ static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt,
 	}
 }
 
-static void i915_ppgtt_insert_pages(struct i915_hw_ppgtt *ppgtt,
-				    unsigned first_entry, unsigned num_entries,
-				    struct page **pages, uint32_t pte_flags)
-{
-	uint32_t *pt_vaddr, pte;
-	unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
-	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
-	unsigned last_pte, i;
-	dma_addr_t page_addr;
-
-	while (num_entries) {
-		last_pte = first_pte + num_entries;
-		last_pte = min_t(unsigned, last_pte, I915_PPGTT_PT_ENTRIES);
-
-		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]);
-
-		for (i = first_pte; i < last_pte; i++) {
-			page_addr = page_to_phys(*pages);
-			pte = GEN6_PTE_ADDR_ENCODE(page_addr);
-			pt_vaddr[i] = pte | pte_flags;
-
-			pages++;
-		}
-
-		kunmap_atomic(pt_vaddr);
-
-		num_entries -= last_pte - first_pte;
-		first_pte = 0;
-		act_pd++;
-	}
-}
-
 void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
 			    struct drm_i915_gem_object *obj,
 			    enum i915_cache_level cache_level)
 {
-	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t pte_flags = GEN6_PTE_VALID;
 
 	switch (cache_level) {
@@ -270,26 +233,10 @@ void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
 		BUG();
 	}
 
-	if (obj->sg_table) {
-		i915_ppgtt_insert_sg_entries(ppgtt,
-					     obj->sg_table->sgl,
-					     obj->sg_table->nents,
-					     obj->gtt_space->start >> PAGE_SHIFT,
-					     pte_flags);
-	} else if (dev_priv->mm.gtt->needs_dmar) {
-		BUG_ON(!obj->sg_list);
-
-		i915_ppgtt_insert_sg_entries(ppgtt,
-					     obj->sg_list,
-					     obj->num_sg,
-					     obj->gtt_space->start >> PAGE_SHIFT,
-					     pte_flags);
-	} else
-		i915_ppgtt_insert_pages(ppgtt,
-					obj->gtt_space->start >> PAGE_SHIFT,
-					obj->base.size >> PAGE_SHIFT,
-					obj->pages,
-					pte_flags);
+	i915_ppgtt_insert_sg_entries(ppgtt,
+				     obj->sg_table ?: obj->pages,
+				     obj->gtt_space->start >> PAGE_SHIFT,
+				     pte_flags);
 }
 
 void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
@@ -361,44 +308,26 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 
 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
 {
-	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	/* don't map imported dma buf objects */
-	if (dev_priv->mm.gtt->needs_dmar && !obj->sg_table)
-		return intel_gtt_map_memory(obj->pages,
-					    obj->base.size >> PAGE_SHIFT,
-					    &obj->sg_list,
-					    &obj->num_sg);
-	else
+	if (obj->has_dma_mapping)
 		return 0;
+
+	if (!dma_map_sg(&obj->base.dev->pdev->dev,
+			obj->pages->sgl, obj->pages->nents,
+			PCI_DMA_BIDIRECTIONAL))
+		return -ENOSPC;
+
+	return 0;
 }
 
 void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
 			      enum i915_cache_level cache_level)
 {
 	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	unsigned int agp_type = cache_level_to_agp_type(dev, cache_level);
 
-	if (obj->sg_table) {
-		intel_gtt_insert_sg_entries(obj->sg_table->sgl,
-					    obj->sg_table->nents,
-					    obj->gtt_space->start >> PAGE_SHIFT,
-					    agp_type);
-	} else if (dev_priv->mm.gtt->needs_dmar) {
-		BUG_ON(!obj->sg_list);
-
-		intel_gtt_insert_sg_entries(obj->sg_list,
-					    obj->num_sg,
-					    obj->gtt_space->start >> PAGE_SHIFT,
-					    agp_type);
-	} else
-		intel_gtt_insert_pages(obj->gtt_space->start >> PAGE_SHIFT,
-				       obj->base.size >> PAGE_SHIFT,
-				       obj->pages,
-				       agp_type);
-
+	intel_gtt_insert_sg_entries(obj->sg_table ?: obj->pages,
+				    obj->gtt_space->start >> PAGE_SHIFT,
+				    agp_type);
 	obj->has_global_gtt_mapping = 1;
 }
 
@@ -418,10 +347,10 @@ void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
 
 	interruptible = do_idling(dev_priv);
 
-	if (obj->sg_list) {
-		intel_gtt_unmap_memory(obj->sg_list, obj->num_sg);
-		obj->sg_list = NULL;
-	}
+	if (!obj->has_dma_mapping)
+		dma_unmap_sg(&dev->pdev->dev,
+			     obj->pages->sgl, obj->pages->nents,
+			     PCI_DMA_BIDIRECTIONAL);
 
 	undo_idling(dev_priv, interruptible);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index b964df5..8093ecd 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -470,18 +470,20 @@ i915_gem_swizzle_page(struct page *page)
 void
 i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
 {
+	struct scatterlist *sg;
 	int page_count = obj->base.size >> PAGE_SHIFT;
 	int i;
 
 	if (obj->bit_17 == NULL)
 		return;
 
-	for (i = 0; i < page_count; i++) {
-		char new_bit_17 = page_to_phys(obj->pages[i]) >> 17;
+	for_each_sg(obj->pages->sgl, sg, page_count, i) {
+		struct page *page = sg_page(sg);
+		char new_bit_17 = page_to_phys(page) >> 17;
 		if ((new_bit_17 & 0x1) !=
 		    (test_bit(i, obj->bit_17) != 0)) {
-			i915_gem_swizzle_page(obj->pages[i]);
-			set_page_dirty(obj->pages[i]);
+			i915_gem_swizzle_page(page);
+			set_page_dirty(page);
 		}
 	}
 }
@@ -489,6 +491,7 @@ i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
 void
 i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
 {
+	struct scatterlist *sg;
 	int page_count = obj->base.size >> PAGE_SHIFT;
 	int i;
 
@@ -502,8 +505,9 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
 		}
 	}
 
-	for (i = 0; i < page_count; i++) {
-		if (page_to_phys(obj->pages[i]) & (1 << 17))
+	for_each_sg(obj->pages->sgl, sg, page_count, i) {
+		struct page *page = sg_page(sg);
+		if (page_to_phys(page) & (1 << 17))
 			__set_bit(i, obj->bit_17);
 		else
 			__clear_bit(i, obj->bit_17);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index d601013..dd49046 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -888,20 +888,20 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
 			 struct drm_i915_gem_object *src)
 {
 	struct drm_i915_error_object *dst;
-	int page, page_count;
+	int i, count;
 	u32 reloc_offset;
 
 	if (src == NULL || src->pages == NULL)
 		return NULL;
 
-	page_count = src->base.size / PAGE_SIZE;
+	count = src->base.size / PAGE_SIZE;
 
-	dst = kmalloc(sizeof(*dst) + page_count * sizeof(u32 *), GFP_ATOMIC);
+	dst = kmalloc(sizeof(*dst) + count * sizeof(u32 *), GFP_ATOMIC);
 	if (dst == NULL)
 		return NULL;
 
 	reloc_offset = src->gtt_offset;
-	for (page = 0; page < page_count; page++) {
+	for (i = 0; i < count; i++) {
 		unsigned long flags;
 		void *d;
 
@@ -924,30 +924,33 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
 			memcpy_fromio(d, s, PAGE_SIZE);
 			io_mapping_unmap_atomic(s);
 		} else {
+			struct page *page;
 			void *s;
 
-			drm_clflush_pages(&src->pages[page], 1);
+			page = i915_gem_object_get_page(src, i);
+
+			drm_clflush_pages(&page, 1);
 
-			s = kmap_atomic(src->pages[page]);
+			s = kmap_atomic(page);
 			memcpy(d, s, PAGE_SIZE);
 			kunmap_atomic(s);
 
-			drm_clflush_pages(&src->pages[page], 1);
+			drm_clflush_pages(&page, 1);
 		}
 		local_irq_restore(flags);
 
-		dst->pages[page] = d;
+		dst->pages[i] = d;
 
 		reloc_offset += PAGE_SIZE;
 	}
-	dst->page_count = page_count;
+	dst->page_count = count;
 	dst->gtt_offset = src->gtt_offset;
 
 	return dst;
 
 unwind:
-	while (page--)
-		kfree(dst->pages[page]);
+	while (i--)
+		kfree(dst->pages[i]);
 	kfree(dst);
 	return NULL;
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 55cdb4d..984a0c5 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -464,7 +464,7 @@ init_pipe_control(struct intel_ring_buffer *ring)
 		goto err_unref;
 
 	pc->gtt_offset = obj->gtt_offset;
-	pc->cpu_page =  kmap(obj->pages[0]);
+	pc->cpu_page =  kmap(sg_page(obj->pages->sgl));
 	if (pc->cpu_page == NULL)
 		goto err_unpin;
 
@@ -491,7 +491,8 @@ cleanup_pipe_control(struct intel_ring_buffer *ring)
 		return;
 
 	obj = pc->obj;
-	kunmap(obj->pages[0]);
+
+	kunmap(sg_page(obj->pages->sgl));
 	i915_gem_object_unpin(obj);
 	drm_gem_object_unreference(&obj->base);
 
@@ -1026,7 +1027,7 @@ static void cleanup_status_page(struct intel_ring_buffer *ring)
 	if (obj == NULL)
 		return;
 
-	kunmap(obj->pages[0]);
+	kunmap(sg_page(obj->pages->sgl));
 	i915_gem_object_unpin(obj);
 	drm_gem_object_unreference(&obj->base);
 	ring->status_page.obj = NULL;
@@ -1053,7 +1054,7 @@ static int init_status_page(struct intel_ring_buffer *ring)
 	}
 
 	ring->status_page.gfx_addr = obj->gtt_offset;
-	ring->status_page.page_addr = kmap(obj->pages[0]);
+	ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
 	if (ring->status_page.page_addr == NULL) {
 		ret = -ENOMEM;
 		goto err_unpin;
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index d6b67bb..d5f0c16 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1367,6 +1367,7 @@ extern int drm_remove_magic(struct drm_master *master, drm_magic_t magic);
 
 /* Cache management (drm_cache.c) */
 void drm_clflush_pages(struct page *pages[], unsigned long num_pages);
+void drm_clflush_sg(struct sg_table *st);
 void drm_clflush_virt_range(char *addr, unsigned long length);
 
 				/* Locking IOCTL support (drm_lock.h) */
diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h
index 8e29d55..2e37e9f 100644
--- a/include/drm/intel-gtt.h
+++ b/include/drm/intel-gtt.h
@@ -30,16 +30,10 @@ void intel_gmch_remove(void);
 bool intel_enable_gtt(void);
 
 void intel_gtt_chipset_flush(void);
-void intel_gtt_unmap_memory(struct scatterlist *sg_list, int num_sg);
-void intel_gtt_clear_range(unsigned int first_entry, unsigned int num_entries);
-int intel_gtt_map_memory(struct page **pages, unsigned int num_entries,
-			 struct scatterlist **sg_list, int *num_sg);
-void intel_gtt_insert_sg_entries(struct scatterlist *sg_list,
-				 unsigned int sg_len,
+void intel_gtt_insert_sg_entries(struct sg_table *st,
 				 unsigned int pg_start,
 				 unsigned int flags);
-void intel_gtt_insert_pages(unsigned int first_entry, unsigned int num_entries,
-			    struct page **pages, unsigned int flags);
+void intel_gtt_clear_range(unsigned int first_entry, unsigned int num_entries);
 
 /* Special gtt memory types */
 #define AGP_DCACHE_MEMORY	1
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 06/24] drm/i915: Convert the dmabuf object to use the new i915_gem_object_ops
  2012-09-04 20:02 Stolen memory, again Chris Wilson
                   ` (4 preceding siblings ...)
  2012-09-04 20:02 ` [PATCH 05/24] drm/i915: Replace the array of pages with a scatterlist Chris Wilson
@ 2012-09-04 20:02 ` Chris Wilson
  2012-09-14 18:02   ` Ben Widawsky
  2012-09-14 21:43   ` Daniel Vetter
  2012-09-04 20:02 ` [PATCH 07/24] drm: Introduce drm_mm_create_block() Chris Wilson
                   ` (17 subsequent siblings)
  23 siblings, 2 replies; 55+ messages in thread
From: Chris Wilson @ 2012-09-04 20:02 UTC (permalink / raw)
  To: intel-gfx

By providing a callback for when we need to bind the pages, and then
release them again later, we can shorten the amount of time we hold the
foreign pages mapped and pinned, and importantly the dmabuf objects then
behave as any other normal object with respect to the shrinker and
memory management.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h        |    1 -
 drivers/gpu/drm/i915/i915_gem.c        |   10 ++++----
 drivers/gpu/drm/i915/i915_gem_dmabuf.c |   44 ++++++++++++++++++++++----------
 drivers/gpu/drm/i915/i915_gem_gtt.c    |    4 +--
 4 files changed, 37 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1a714fa..a86f50d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -998,7 +998,6 @@ struct drm_i915_gem_object {
 	int pages_pin_count;
 
 	/* prime dma-buf support */
-	struct sg_table *sg_table;
 	void *dma_buf_vmapping;
 	int vmapping_count;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 06589a9..58075e3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1692,7 +1692,7 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
 {
 	const struct drm_i915_gem_object_ops *ops = obj->ops;
 
-	if (obj->sg_table || obj->pages == NULL)
+	if (obj->pages == NULL)
 		return 0;
 
 	BUG_ON(obj->gtt_space);
@@ -1838,7 +1838,7 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 	const struct drm_i915_gem_object_ops *ops = obj->ops;
 	int ret;
 
-	if (obj->sg_table || obj->pages)
+	if (obj->pages)
 		return 0;
 
 	BUG_ON(obj->pages_pin_count);
@@ -3731,9 +3731,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 
 	trace_i915_gem_object_destroy(obj);
 
-	if (gem_obj->import_attach)
-		drm_prime_gem_destroy(gem_obj, obj->sg_table);
-
 	if (obj->phys_obj)
 		i915_gem_detach_phys_object(dev, obj);
 
@@ -3755,6 +3752,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 
 	BUG_ON(obj->pages);
 
+	if (obj->base.import_attach)
+		drm_prime_gem_destroy(&obj->base, NULL);
+
 	drm_gem_object_release(&obj->base);
 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 4bb1b94..ca3497e 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -82,7 +82,8 @@ out:
 }
 
 static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment,
-			    struct sg_table *sg, enum dma_data_direction dir)
+				   struct sg_table *sg,
+				   enum dma_data_direction dir)
 {
 	dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, dir);
 	sg_free_table(sg);
@@ -228,11 +229,35 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
 	return dma_buf_export(obj, &i915_dmabuf_ops, obj->base.size, 0600);
 }
 
+static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
+{
+	struct sg_table *sg;
+
+	sg = dma_buf_map_attachment(obj->base.import_attach, DMA_BIDIRECTIONAL);
+	if (IS_ERR(sg))
+		return PTR_ERR(sg);
+
+	obj->pages = sg;
+	obj->has_dma_mapping = true;
+	return 0;
+}
+
+static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj)
+{
+	dma_buf_unmap_attachment(obj->base.import_attach,
+				 obj->pages, DMA_BIDIRECTIONAL);
+	obj->has_dma_mapping = false;
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_object_dmabuf_ops = {
+	.get_pages = i915_gem_object_get_pages_dmabuf,
+	.put_pages = i915_gem_object_put_pages_dmabuf,
+};
+
 struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
 					     struct dma_buf *dma_buf)
 {
 	struct dma_buf_attachment *attach;
-	struct sg_table *sg;
 	struct drm_i915_gem_object *obj;
 	int ret;
 
@@ -251,34 +276,25 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
 	if (IS_ERR(attach))
 		return ERR_CAST(attach);
 
-	sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
-	if (IS_ERR(sg)) {
-		ret = PTR_ERR(sg);
-		goto fail_detach;
-	}
 
 	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
 	if (obj == NULL) {
 		ret = -ENOMEM;
-		goto fail_unmap;
+		goto fail_detach;
 	}
 
 	ret = drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
 	if (ret) {
 		kfree(obj);
-		goto fail_unmap;
+		goto fail_detach;
 	}
 
-	obj->has_dma_mapping = true;
-	obj->sg_table = sg;
+	i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops);
 	obj->base.import_attach = attach;
 
 	return &obj->base;
 
-fail_unmap:
-	dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL);
 fail_detach:
 	dma_buf_detach(dma_buf, attach);
 	return ERR_PTR(ret);
 }
-
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 6746109..c86dc59 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -234,7 +234,7 @@ void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
 	}
 
 	i915_ppgtt_insert_sg_entries(ppgtt,
-				     obj->sg_table ?: obj->pages,
+				     obj->pages,
 				     obj->gtt_space->start >> PAGE_SHIFT,
 				     pte_flags);
 }
@@ -325,7 +325,7 @@ void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
 	struct drm_device *dev = obj->base.dev;
 	unsigned int agp_type = cache_level_to_agp_type(dev, cache_level);
 
-	intel_gtt_insert_sg_entries(obj->sg_table ?: obj->pages,
+	intel_gtt_insert_sg_entries(obj->pages,
 				    obj->gtt_space->start >> PAGE_SHIFT,
 				    agp_type);
 	obj->has_global_gtt_mapping = 1;
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 07/24] drm: Introduce drm_mm_create_block()
  2012-09-04 20:02 Stolen memory, again Chris Wilson
                   ` (5 preceding siblings ...)
  2012-09-04 20:02 ` [PATCH 06/24] drm/i915: Convert the dmabuf object to use the new i915_gem_object_ops Chris Wilson
@ 2012-09-04 20:02 ` Chris Wilson
  2012-09-12 13:43   ` Daniel Vetter
  2012-09-04 20:03 ` [PATCH 08/24] drm/i915: Fix detection of stolen base for gen2 Chris Wilson
                   ` (16 subsequent siblings)
  23 siblings, 1 reply; 55+ messages in thread
From: Chris Wilson @ 2012-09-04 20:02 UTC (permalink / raw)
  To: intel-gfx; +Cc: Dave Airlie

To be used later by i915 to preallocate exact blocks of space from the
range manager.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_mm.c |   49 ++++++++++++++++++++++++++++++++++++++++++++++
 include/drm/drm_mm.h     |    4 ++++
 2 files changed, 53 insertions(+)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 9bb82f7..5db8c20 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -161,6 +161,55 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
 	}
 }
 
+struct drm_mm_node *drm_mm_create_block(struct drm_mm *mm,
+					unsigned long start,
+					unsigned long size,
+					bool atomic)
+{
+	struct drm_mm_node *hole, *node;
+	unsigned long end = start + size;
+
+	list_for_each_entry(hole, &mm->hole_stack, hole_stack) {
+		unsigned long hole_start;
+		unsigned long hole_end;
+
+		BUG_ON(!hole->hole_follows);
+		hole_start = drm_mm_hole_node_start(hole);
+		hole_end = drm_mm_hole_node_end(hole);
+
+		if (hole_start > start || hole_end < end)
+			continue;
+
+		node = drm_mm_kmalloc(mm, atomic);
+		if (unlikely(node == NULL))
+			return NULL;
+
+		node->start = start;
+		node->size = size;
+		node->mm = mm;
+		node->allocated = 1;
+
+		INIT_LIST_HEAD(&node->hole_stack);
+		list_add(&node->node_list, &hole->node_list);
+
+		if (start == hole_start) {
+			hole->hole_follows = 0;
+			list_del_init(&hole->hole_stack);
+		}
+
+		node->hole_follows = 0;
+		if (end != hole_end) {
+			list_add(&node->hole_stack, &mm->hole_stack);
+			node->hole_follows = 1;
+		}
+
+		return node;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL(drm_mm_create_block);
+
 struct drm_mm_node *drm_mm_get_block_generic(struct drm_mm_node *hole_node,
 					     unsigned long size,
 					     unsigned alignment,
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index 06d7f79..4020f96 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -102,6 +102,10 @@ static inline bool drm_mm_initialized(struct drm_mm *mm)
 /*
  * Basic range manager support (drm_mm.c)
  */
+extern struct drm_mm_node *drm_mm_create_block(struct drm_mm *mm,
+					       unsigned long start,
+					       unsigned long size,
+					       bool atomic);
 extern struct drm_mm_node *drm_mm_get_block_generic(struct drm_mm_node *node,
 						    unsigned long size,
 						    unsigned alignment,
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 08/24] drm/i915: Fix detection of stolen base for gen2
  2012-09-04 20:02 Stolen memory, again Chris Wilson
                   ` (6 preceding siblings ...)
  2012-09-04 20:02 ` [PATCH 07/24] drm: Introduce drm_mm_create_block() Chris Wilson
@ 2012-09-04 20:03 ` Chris Wilson
  2012-09-04 20:03 ` [PATCH 09/24] drm/i915: Fix location of stolen memory register for SandyBridge+ Chris Wilson
                   ` (15 subsequent siblings)
  23 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2012-09-04 20:03 UTC (permalink / raw)
  To: intel-gfx

It was not until the G33 refresh, that a PCI config register was
introduced that explicitly said where the stolen memory was. Prior to
865G there was not even a register that said where the end of usable
low memory was and where the stolen memory began (or ended depending
upon chipset). Before then, one has to look at the BIOS memory maps to
find the Top of Memory. Alas that is not exported by arch/x86 and so we
have to resort to disabling stolen memory on gen2 for the time being.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h        |    1 +
 drivers/gpu/drm/i915/i915_gem_stolen.c |   69 ++++++++++++++------------------
 2 files changed, 31 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a86f50d..f614c26 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -697,6 +697,7 @@ typedef struct drm_i915_private {
 		unsigned long gtt_start;
 		unsigned long gtt_mappable_end;
 		unsigned long gtt_end;
+		unsigned long stolen_base; /* limited to low memory (32-bit) */
 
 		struct io_mapping *gtt_mapping;
 		phys_addr_t gtt_base_addr;
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index ada2e90..a01ff74 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -43,56 +43,43 @@
  * for is a boon.
  */
 
-#define PTE_ADDRESS_MASK		0xfffff000
-#define PTE_ADDRESS_MASK_HIGH		0x000000f0 /* i915+ */
-#define PTE_MAPPING_TYPE_UNCACHED	(0 << 1)
-#define PTE_MAPPING_TYPE_DCACHE		(1 << 1) /* i830 only */
-#define PTE_MAPPING_TYPE_CACHED		(3 << 1)
-#define PTE_MAPPING_TYPE_MASK		(3 << 1)
-#define PTE_VALID			(1 << 0)
-
-/**
- * i915_stolen_to_phys - take an offset into stolen memory and turn it into
- *                       a physical one
- * @dev: drm device
- * @offset: address to translate
- *
- * Some chip functions require allocations from stolen space and need the
- * physical address of the memory in question.
- */
-static unsigned long i915_stolen_to_phys(struct drm_device *dev, u32 offset)
+static unsigned long i915_stolen_to_physical(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct pci_dev *pdev = dev_priv->bridge_dev;
 	u32 base;
 
-#if 0
 	/* On the machines I have tested the Graphics Base of Stolen Memory
-	 * is unreliable, so compute the base by subtracting the stolen memory
-	 * from the Top of Low Usable DRAM which is where the BIOS places
-	 * the graphics stolen memory.
+	 * is unreliable, so on those compute the base by subtracting the
+	 * stolen memory from the Top of Low Usable DRAM which is where the
+	 * BIOS places the graphics stolen memory.
+	 *
+	 * On gen2, the layout is slightly different with the Graphics Segment
+	 * immediately following Top of Memory (or Top of Usable DRAM). Note
+	 * it appears that TOUD is only reported by 865g, so we just use the
+	 * top of memory as determined by the e820 probe.
+	 *
+	 * XXX gen2 requires an unavailable symbol and 945gm fails with
+	 * its value of TOLUD.
 	 */
+	base = 0;
 	if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
-		/* top 32bits are reserved = 0 */
+		/* Read Graphics Base of Stolen Memory directly */
 		pci_read_config_dword(pdev, 0xA4, &base);
-	} else {
-		/* XXX presume 8xx is the same as i915 */
-		pci_bus_read_config_dword(pdev->bus, 2, 0x5C, &base);
-	}
-#else
-	if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
-		u16 val;
-		pci_read_config_word(pdev, 0xb0, &val);
-		base = val >> 4 << 20;
-	} else {
+#if 0
+	} else if (IS_GEN3(dev)) {
 		u8 val;
+		/* Stolen is immediately below Top of Low Usable DRAM */
 		pci_read_config_byte(pdev, 0x9c, &val);
 		base = val >> 3 << 27;
-	}
-	base -= dev_priv->mm.gtt->stolen_size;
+		base -= dev_priv->mm.gtt->stolen_size;
+	} else {
+		/* Stolen is immediately above Top of Memory */
+		base = max_low_pfn_mapped << PAGE_SHIFT;
 #endif
+	}
 
-	return base + offset;
+	return base;
 }
 
 static void i915_warn_stolen(struct drm_device *dev)
@@ -117,7 +104,7 @@ static void i915_setup_compression(struct drm_device *dev, int size)
 	if (!compressed_fb)
 		goto err;
 
-	cfb_base = i915_stolen_to_phys(dev, compressed_fb->start);
+	cfb_base = dev_priv->mm.stolen_base + compressed_fb->start;
 	if (!cfb_base)
 		goto err_fb;
 
@@ -130,7 +117,7 @@ static void i915_setup_compression(struct drm_device *dev, int size)
 		if (!compressed_llb)
 			goto err_fb;
 
-		ll_base = i915_stolen_to_phys(dev, compressed_llb->start);
+		ll_base = dev_priv->mm.stolen_base + compressed_llb->start;
 		if (!ll_base)
 			goto err_llb;
 	}
@@ -149,7 +136,7 @@ static void i915_setup_compression(struct drm_device *dev, int size)
 	}
 
 	DRM_DEBUG_KMS("FBC base 0x%08lx, ll base 0x%08lx, size %dM\n",
-		      cfb_base, ll_base, size >> 20);
+		      (long)cfb_base, (long)ll_base, size >> 20);
 	return;
 
 err_llb:
@@ -181,6 +168,10 @@ int i915_gem_init_stolen(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	unsigned long prealloc_size = dev_priv->mm.gtt->stolen_size;
 
+	dev_priv->mm.stolen_base = i915_stolen_to_physical(dev);
+	if (dev_priv->mm.stolen_base == 0)
+		return 0;
+
 	/* Basic memrange allocator for stolen space */
 	drm_mm_init(&dev_priv->mm.stolen, 0, prealloc_size);
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 09/24] drm/i915: Fix location of stolen memory register for SandyBridge+
  2012-09-04 20:02 Stolen memory, again Chris Wilson
                   ` (7 preceding siblings ...)
  2012-09-04 20:03 ` [PATCH 08/24] drm/i915: Fix detection of stolen base for gen2 Chris Wilson
@ 2012-09-04 20:03 ` Chris Wilson
  2012-10-11 18:43   ` Jesse Barnes
  2012-09-04 20:03 ` [PATCH 10/24] drm/i915: Avoid clearing preallocated regions from the GTT Chris Wilson
                   ` (14 subsequent siblings)
  23 siblings, 1 reply; 55+ messages in thread
From: Chris Wilson @ 2012-09-04 20:03 UTC (permalink / raw)
  To: intel-gfx

A few of the earlier registers where enlarged and so the Base Data of
Stolem Memory Register (BDSM) was pushed to 0xb0.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_stolen.c |    9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index a01ff74..a528e4a 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -63,7 +63,11 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev)
 	 * its value of TOLUD.
 	 */
 	base = 0;
-	if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
+	if (INTEL_INFO(dev)->gen >= 6) {
+		/* Read Base Data of Stolen Memory Register (BDSM) directly */
+		pci_read_config_dword(pdev, 0xB0, &base);
+		base &= ~4095; /* lower bits used for locking register */
+	} else if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
 		/* Read Graphics Base of Stolen Memory directly */
 		pci_read_config_dword(pdev, 0xA4, &base);
 #if 0
@@ -172,6 +176,9 @@ int i915_gem_init_stolen(struct drm_device *dev)
 	if (dev_priv->mm.stolen_base == 0)
 		return 0;
 
+	DRM_DEBUG_KMS("found %d bytes of stolen memory at %08lx\n",
+		      dev_priv->mm.gtt->stolen_size, dev_priv->mm.stolen_base);
+
 	/* Basic memrange allocator for stolen space */
 	drm_mm_init(&dev_priv->mm.stolen, 0, prealloc_size);
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 10/24] drm/i915: Avoid clearing preallocated regions from the GTT
  2012-09-04 20:02 Stolen memory, again Chris Wilson
                   ` (8 preceding siblings ...)
  2012-09-04 20:03 ` [PATCH 09/24] drm/i915: Fix location of stolen memory register for SandyBridge+ Chris Wilson
@ 2012-09-04 20:03 ` Chris Wilson
  2012-10-11 18:45   ` Jesse Barnes
  2012-09-04 20:03 ` [PATCH 11/24] drm: Introduce an iterator over holes in the drm_mm range manager Chris Wilson
                   ` (13 subsequent siblings)
  23 siblings, 1 reply; 55+ messages in thread
From: Chris Wilson @ 2012-09-04 20:03 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h     |    2 ++
 drivers/gpu/drm/i915/i915_gem_gtt.c |   35 ++++++++++++++++++++++++++++++++---
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f614c26..533361e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -899,6 +899,8 @@ enum i915_cache_level {
 	I915_CACHE_LLC_MLC, /* gen6+, in docs at least! */
 };
 
+#define I915_GTT_RESERVED ((struct drm_mm_node *)0x1)
+
 struct drm_i915_gem_object_ops {
 	int (*get_pages)(struct drm_i915_gem_object *);
 	void (*put_pages)(struct drm_i915_gem_object *);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index c86dc59..d1b4cc8 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -378,18 +378,47 @@ void i915_gem_init_global_gtt(struct drm_device *dev,
 			      unsigned long end)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_mm_node *entry;
+	struct drm_i915_gem_object *obj;
 
-	/* Substract the guard page ... */
+	/* Subtract the guard page ... */
 	drm_mm_init(&dev_priv->mm.gtt_space, start, end - start - PAGE_SIZE);
 	if (!HAS_LLC(dev))
 		dev_priv->mm.gtt_space.color_adjust = i915_gtt_color_adjust;
 
+	/* Mark any preallocated objects as occupied */
+	list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) {
+		DRM_DEBUG_KMS("reserving preallocated space: %x + %zx\n",
+			      obj->gtt_offset, obj->base.size);
+
+		BUG_ON(obj->gtt_space != I915_GTT_RESERVED);
+		obj->gtt_space = drm_mm_create_block(&dev_priv->mm.gtt_space,
+						     obj->gtt_offset,
+						     obj->base.size,
+						     false);
+		obj->has_global_gtt_mapping = 1;
+	}
+
 	dev_priv->mm.gtt_start = start;
 	dev_priv->mm.gtt_mappable_end = mappable_end;
 	dev_priv->mm.gtt_end = end;
 	dev_priv->mm.gtt_total = end - start;
 	dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start;
 
-	/* ... but ensure that we clear the entire range. */
-	intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE);
+	/* Clear any non-preallocated blocks */
+	list_for_each_entry(entry, &dev_priv->mm.gtt_space.hole_stack, hole_stack) {
+		unsigned long hole_start = entry->start + entry->size;
+		unsigned long hole_end = list_entry(entry->node_list.next,
+						    struct drm_mm_node,
+						    node_list)->start;
+
+		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
+			      hole_start, hole_end);
+
+		intel_gtt_clear_range(hole_start / PAGE_SIZE,
+				      (hole_end-hole_start) / PAGE_SIZE);
+	}
+
+	/* And finally clear the reserved guard page */
+	intel_gtt_clear_range(end / PAGE_SIZE - 1, 1);
 }
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 11/24] drm: Introduce an iterator over holes in the drm_mm range manager
  2012-09-04 20:02 Stolen memory, again Chris Wilson
                   ` (9 preceding siblings ...)
  2012-09-04 20:03 ` [PATCH 10/24] drm/i915: Avoid clearing preallocated regions from the GTT Chris Wilson
@ 2012-09-04 20:03 ` Chris Wilson
  2012-09-12 13:54   ` Daniel Vetter
  2012-09-04 20:03 ` [PATCH 12/24] drm/i915: Delay allocation of stolen space for FBC Chris Wilson
                   ` (12 subsequent siblings)
  23 siblings, 1 reply; 55+ messages in thread
From: Chris Wilson @ 2012-09-04 20:03 UTC (permalink / raw)
  To: intel-gfx; +Cc: Dave Airlie

This will be used i915 in forthcoming patches in order to measure the
largest contiguous chunk of memory available for enabling chipset
features.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_mm.c |   55 +++++++++++++++-------------------------------
 include/drm/drm_mm.h     |   26 ++++++++++++++++++++++
 2 files changed, 44 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 5db8c20..c3d11ec 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -102,20 +102,6 @@ int drm_mm_pre_get(struct drm_mm *mm)
 }
 EXPORT_SYMBOL(drm_mm_pre_get);
 
-static inline unsigned long drm_mm_hole_node_start(struct drm_mm_node *hole_node)
-{
-	return hole_node->start + hole_node->size;
-}
-
-static inline unsigned long drm_mm_hole_node_end(struct drm_mm_node *hole_node)
-{
-	struct drm_mm_node *next_node =
-		list_entry(hole_node->node_list.next, struct drm_mm_node,
-			   node_list);
-
-	return next_node->start;
-}
-
 static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
 				 struct drm_mm_node *node,
 				 unsigned long size, unsigned alignment,
@@ -127,7 +113,7 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
 	unsigned long adj_start = hole_start;
 	unsigned long adj_end = hole_end;
 
-	BUG_ON(!hole_node->hole_follows || node->allocated);
+	BUG_ON(node->allocated);
 
 	if (mm->color_adjust)
 		mm->color_adjust(hole_node, color, &adj_start, &adj_end);
@@ -168,15 +154,10 @@ struct drm_mm_node *drm_mm_create_block(struct drm_mm *mm,
 {
 	struct drm_mm_node *hole, *node;
 	unsigned long end = start + size;
+	unsigned long hole_start;
+	unsigned long hole_end;
 
-	list_for_each_entry(hole, &mm->hole_stack, hole_stack) {
-		unsigned long hole_start;
-		unsigned long hole_end;
-
-		BUG_ON(!hole->hole_follows);
-		hole_start = drm_mm_hole_node_start(hole);
-		hole_end = drm_mm_hole_node_end(hole);
-
+	drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
 		if (hole_start > start || hole_end < end)
 			continue;
 
@@ -361,8 +342,10 @@ void drm_mm_remove_node(struct drm_mm_node *node)
 				== drm_mm_hole_node_end(node));
 		list_del(&node->hole_stack);
 	} else
-		BUG_ON(drm_mm_hole_node_start(node)
-				!= drm_mm_hole_node_end(node));
+		BUG_ON(node->start + node->size !=
+		       list_entry(node->node_list.next,
+				  struct drm_mm_node, node_list)->start);
+
 
 	if (!prev_node->hole_follows) {
 		prev_node->hole_follows = 1;
@@ -420,6 +403,8 @@ struct drm_mm_node *drm_mm_search_free_generic(const struct drm_mm *mm,
 {
 	struct drm_mm_node *entry;
 	struct drm_mm_node *best;
+	unsigned long adj_start;
+	unsigned long adj_end;
 	unsigned long best_size;
 
 	BUG_ON(mm->scanned_blocks);
@@ -427,17 +412,13 @@ struct drm_mm_node *drm_mm_search_free_generic(const struct drm_mm *mm,
 	best = NULL;
 	best_size = ~0UL;
 
-	list_for_each_entry(entry, &mm->hole_stack, hole_stack) {
-		unsigned long adj_start = drm_mm_hole_node_start(entry);
-		unsigned long adj_end = drm_mm_hole_node_end(entry);
-
+	drm_mm_for_each_hole(entry, mm, adj_start, adj_end) {
 		if (mm->color_adjust) {
 			mm->color_adjust(entry, color, &adj_start, &adj_end);
 			if (adj_end <= adj_start)
 				continue;
 		}
 
-		BUG_ON(!entry->hole_follows);
 		if (!check_free_hole(adj_start, adj_end, size, alignment))
 			continue;
 
@@ -464,6 +445,8 @@ struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_mm *mm,
 {
 	struct drm_mm_node *entry;
 	struct drm_mm_node *best;
+	unsigned long adj_start;
+	unsigned long adj_end;
 	unsigned long best_size;
 
 	BUG_ON(mm->scanned_blocks);
@@ -471,13 +454,11 @@ struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_mm *mm,
 	best = NULL;
 	best_size = ~0UL;
 
-	list_for_each_entry(entry, &mm->hole_stack, hole_stack) {
-		unsigned long adj_start = drm_mm_hole_node_start(entry) < start ?
-			start : drm_mm_hole_node_start(entry);
-		unsigned long adj_end = drm_mm_hole_node_end(entry) > end ?
-			end : drm_mm_hole_node_end(entry);
-
-		BUG_ON(!entry->hole_follows);
+	drm_mm_for_each_hole(entry, mm, adj_start, adj_end) {
+		if (adj_start < start)
+			adj_start = start;
+		if (adj_end > end)
+			adj_end = end;
 
 		if (mm->color_adjust) {
 			mm->color_adjust(entry, color, &adj_start, &adj_end);
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index 4020f96..d710a10 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -89,6 +89,19 @@ static inline bool drm_mm_initialized(struct drm_mm *mm)
 {
 	return mm->hole_stack.next;
 }
+
+static inline unsigned long drm_mm_hole_node_start(struct drm_mm_node *hole_node)
+{
+	BUG_ON(!hole_node->hole_follows);
+	return hole_node->start + hole_node->size;
+}
+
+static inline unsigned long drm_mm_hole_node_end(struct drm_mm_node *hole_node)
+{
+	return list_entry(hole_node->node_list.next,
+			  struct drm_mm_node, node_list)->start;
+}
+
 #define drm_mm_for_each_node(entry, mm) list_for_each_entry(entry, \
 						&(mm)->head_node.node_list, \
 						node_list)
@@ -99,6 +112,19 @@ static inline bool drm_mm_initialized(struct drm_mm *mm)
 	     entry != NULL; entry = next, \
 		next = entry ? list_entry(entry->node_list.next, \
 			struct drm_mm_node, node_list) : NULL) \
+
+/* Note that we need to unroll list_for_each_entry in order to inline
+ * setting hole_start and hole_end on each iteration and keep the
+ * macro sane.
+ */
+#define drm_mm_for_each_hole(entry, mm, hole_start, hole_end) \
+	for (entry = list_entry((mm)->hole_stack.next, typeof(struct drm_mm_node), hole_stack); \
+	     &entry->hole_stack != &(mm)->hole_stack ? \
+	     hole_start = drm_mm_hole_node_start(entry), \
+	     hole_end = drm_mm_hole_node_end(entry) : \
+	     0; \
+	     entry = list_entry(entry->hole_stack.next, typeof(struct drm_mm_node), hole_stack))
+
 /*
  * Basic range manager support (drm_mm.c)
  */
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 12/24] drm/i915: Delay allocation of stolen space for FBC
  2012-09-04 20:02 Stolen memory, again Chris Wilson
                   ` (10 preceding siblings ...)
  2012-09-04 20:03 ` [PATCH 11/24] drm: Introduce an iterator over holes in the drm_mm range manager Chris Wilson
@ 2012-09-04 20:03 ` Chris Wilson
  2012-10-11 18:49   ` Jesse Barnes
  2012-09-04 20:03 ` [PATCH 13/24] drm/i915: Defer allocation of stolen memory for FBC until first use Chris Wilson
                   ` (11 subsequent siblings)
  23 siblings, 1 reply; 55+ messages in thread
From: Chris Wilson @ 2012-09-04 20:03 UTC (permalink / raw)
  To: intel-gfx

As we may wish to wrap regions preallocated by the BIOS, we need to do
that before carving out contiguous chunks of stolen space for FBC.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h        |    1 +
 drivers/gpu/drm/i915/i915_gem_stolen.c |  110 ++++++++++++++++----------------
 drivers/gpu/drm/i915/intel_display.c   |    3 +
 3 files changed, 59 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 533361e..31d3a9f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1491,6 +1491,7 @@ int i915_gem_evict_everything(struct drm_device *dev);
 
 /* i915_gem_stolen.c */
 int i915_gem_init_stolen(struct drm_device *dev);
+int i915_gem_stolen_setup_compression(struct drm_device *dev);
 void i915_gem_cleanup_stolen(struct drm_device *dev);
 
 /* i915_gem_tiling.c */
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index a528e4a..17119d7 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -86,21 +86,13 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev)
 	return base;
 }
 
-static void i915_warn_stolen(struct drm_device *dev)
-{
-	DRM_INFO("not enough stolen space for compressed buffer, disabling\n");
-	DRM_INFO("hint: you may be able to increase stolen memory size in the BIOS to avoid this\n");
-}
-
-static void i915_setup_compression(struct drm_device *dev, int size)
+static int i915_setup_compression(struct drm_device *dev, int size)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_mm_node *compressed_fb, *uninitialized_var(compressed_llb);
-	unsigned long cfb_base;
-	unsigned long ll_base = 0;
 
-	/* Just in case the BIOS is doing something questionable. */
-	intel_disable_fbc(dev);
+	DRM_DEBUG_KMS("reserving %d bytes of contiguous stolen space for FBC\n",
+		      size);
 
 	compressed_fb = drm_mm_search_free(&dev_priv->mm.stolen, size, 4096, 0);
 	if (compressed_fb)
@@ -108,11 +100,11 @@ static void i915_setup_compression(struct drm_device *dev, int size)
 	if (!compressed_fb)
 		goto err;
 
-	cfb_base = dev_priv->mm.stolen_base + compressed_fb->start;
-	if (!cfb_base)
-		goto err_fb;
-
-	if (!(IS_GM45(dev) || HAS_PCH_SPLIT(dev))) {
+	if (HAS_PCH_SPLIT(dev))
+		I915_WRITE(ILK_DPFC_CB_BASE, compressed_fb->start);
+	else if (IS_GM45(dev)) {
+		I915_WRITE(DPFC_CB_BASE, compressed_fb->start);
+	} else {
 		compressed_llb = drm_mm_search_free(&dev_priv->mm.stolen,
 						    4096, 4096, 0);
 		if (compressed_llb)
@@ -121,56 +113,78 @@ static void i915_setup_compression(struct drm_device *dev, int size)
 		if (!compressed_llb)
 			goto err_fb;
 
-		ll_base = dev_priv->mm.stolen_base + compressed_llb->start;
-		if (!ll_base)
-			goto err_llb;
-	}
+		dev_priv->compressed_llb = compressed_llb;
 
-	dev_priv->cfb_size = size;
+		I915_WRITE(FBC_CFB_BASE,
+			   dev_priv->mm.stolen_base + compressed_fb->start);
+		I915_WRITE(FBC_LL_BASE,
+			   dev_priv->mm.stolen_base + compressed_llb->start);
+	}
 
 	dev_priv->compressed_fb = compressed_fb;
-	if (HAS_PCH_SPLIT(dev))
-		I915_WRITE(ILK_DPFC_CB_BASE, compressed_fb->start);
-	else if (IS_GM45(dev)) {
-		I915_WRITE(DPFC_CB_BASE, compressed_fb->start);
-	} else {
-		I915_WRITE(FBC_CFB_BASE, cfb_base);
-		I915_WRITE(FBC_LL_BASE, ll_base);
-		dev_priv->compressed_llb = compressed_llb;
-	}
+	dev_priv->cfb_size = size;
 
-	DRM_DEBUG_KMS("FBC base 0x%08lx, ll base 0x%08lx, size %dM\n",
-		      (long)cfb_base, (long)ll_base, size >> 20);
-	return;
+	return size;
 
-err_llb:
-	drm_mm_put_block(compressed_llb);
 err_fb:
 	drm_mm_put_block(compressed_fb);
 err:
 	dev_priv->no_fbc_reason = FBC_STOLEN_TOO_SMALL;
-	i915_warn_stolen(dev);
+	DRM_INFO("not enough stolen space for compressed buffer (need %d bytes), disabling\n", size);
+	DRM_INFO("hint: you may be able to increase stolen memory size in the BIOS to avoid this\n");
+	return 0;
+}
+
+int i915_gem_stolen_setup_compression(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_mm_node *node;
+	unsigned long hole_start, hole_end, size;
+
+	if (dev_priv->mm.stolen_base == 0)
+		return 0;
+
+	if (dev_priv->cfb_size)
+		return dev_priv->cfb_size;
+
+	/* Try to set up FBC with a reasonable compressed buffer size */
+	size = 0;
+	drm_mm_for_each_hole(node, &dev_priv->mm.stolen, hole_start, hole_end) {
+		unsigned long hole_size = hole_end - hole_start;
+		if (hole_size > size)
+			size = hole_size;
+	}
+
+	/* Try to get a 32M buffer... */
+	if (size > (36*1024*1024))
+		size = 32*1024*1024;
+	else /* fall back to 3/4 of the stolen space */
+		size = size * 3 / 4;
+
+	return i915_setup_compression(dev, size);
 }
 
 static void i915_cleanup_compression(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	drm_mm_put_block(dev_priv->compressed_fb);
+	if (dev_priv->compressed_fb)
+		drm_mm_put_block(dev_priv->compressed_fb);
+
 	if (dev_priv->compressed_llb)
 		drm_mm_put_block(dev_priv->compressed_llb);
+
+	dev_priv->cfb_size = 0;
 }
 
 void i915_gem_cleanup_stolen(struct drm_device *dev)
 {
-	if (I915_HAS_FBC(dev) && i915_powersave)
-		i915_cleanup_compression(dev);
+	i915_cleanup_compression(dev);
 }
 
 int i915_gem_init_stolen(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	unsigned long prealloc_size = dev_priv->mm.gtt->stolen_size;
 
 	dev_priv->mm.stolen_base = i915_stolen_to_physical(dev);
 	if (dev_priv->mm.stolen_base == 0)
@@ -180,21 +194,7 @@ int i915_gem_init_stolen(struct drm_device *dev)
 		      dev_priv->mm.gtt->stolen_size, dev_priv->mm.stolen_base);
 
 	/* Basic memrange allocator for stolen space */
-	drm_mm_init(&dev_priv->mm.stolen, 0, prealloc_size);
-
-	/* Try to set up FBC with a reasonable compressed buffer size */
-	if (I915_HAS_FBC(dev) && i915_powersave) {
-		int cfb_size;
-
-		/* Leave 1M for line length buffer & misc. */
-
-		/* Try to get a 32M buffer... */
-		if (prealloc_size > (36*1024*1024))
-			cfb_size = 32*1024*1024;
-		else /* fall back to 7/8 of the stolen space */
-			cfb_size = prealloc_size * 7 / 8;
-		i915_setup_compression(dev, cfb_size);
-	}
+	drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->mm.gtt->stolen_size);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 778cbb8..221d035 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -7231,6 +7231,9 @@ void intel_modeset_init(struct drm_device *dev)
 	/* Just disable it once at startup */
 	i915_disable_vga(dev);
 	intel_setup_outputs(dev);
+
+	/* Just in case the BIOS is doing something questionable. */
+	intel_disable_fbc(dev);
 }
 
 void intel_modeset_gem_init(struct drm_device *dev)
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 13/24] drm/i915: Defer allocation of stolen memory for FBC until first use
  2012-09-04 20:02 Stolen memory, again Chris Wilson
                   ` (11 preceding siblings ...)
  2012-09-04 20:03 ` [PATCH 12/24] drm/i915: Delay allocation of stolen space for FBC Chris Wilson
@ 2012-09-04 20:03 ` Chris Wilson
  2012-09-04 20:03 ` [PATCH 14/24] drm/i915: Allow objects to be created with no backing pages, but stolen space Chris Wilson
                   ` (10 subsequent siblings)
  23 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2012-09-04 20:03 UTC (permalink / raw)
  To: intel-gfx

As FBC is commonly disabled due to limitations of the chipset upon
output configurations, on many systems FBC is never enabled. For those
systems, it is advantageous to make use of the stolen memory for other
objects and so we defer allocation of the FBC chunk until we actually
require it. This increases the likelihood of that allocation failing,
which in turns means that we are already taking advantage of the stolen
memory!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_pm.c |   13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 36c6409..b4b1cce 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -440,12 +440,6 @@ void intel_update_fbc(struct drm_device *dev)
 		dev_priv->no_fbc_reason = FBC_MODULE_PARAM;
 		goto out_disable;
 	}
-	if (intel_fb->obj->base.size > dev_priv->cfb_size) {
-		DRM_DEBUG_KMS("framebuffer too large, disabling "
-			      "compression\n");
-		dev_priv->no_fbc_reason = FBC_STOLEN_TOO_SMALL;
-		goto out_disable;
-	}
 	if ((crtc->mode.flags & DRM_MODE_FLAG_INTERLACE) ||
 	    (crtc->mode.flags & DRM_MODE_FLAG_DBLSCAN)) {
 		DRM_DEBUG_KMS("mode incompatible with compression, "
@@ -479,6 +473,13 @@ void intel_update_fbc(struct drm_device *dev)
 	if (in_dbg_master())
 		goto out_disable;
 
+	if (intel_fb->obj->base.size > i915_gem_stolen_setup_compression(dev)) {
+		DRM_DEBUG_KMS("framebuffer too large, disabling "
+			      "compression\n");
+		dev_priv->no_fbc_reason = FBC_STOLEN_TOO_SMALL;
+		goto out_disable;
+	}
+
 	/* If the scanout has not changed, don't modify the FBC settings.
 	 * Note that we make the fundamental assumption that the fb->obj
 	 * cannot be unpinned (and have its GTT offset and fence revoked)
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 14/24] drm/i915: Allow objects to be created with no backing pages, but stolen space
  2012-09-04 20:02 Stolen memory, again Chris Wilson
                   ` (12 preceding siblings ...)
  2012-09-04 20:03 ` [PATCH 13/24] drm/i915: Defer allocation of stolen memory for FBC until first use Chris Wilson
@ 2012-09-04 20:03 ` Chris Wilson
  2012-09-04 20:03 ` [PATCH 15/24] drm/i915: Differentiate between prime and stolen objects Chris Wilson
                   ` (9 subsequent siblings)
  23 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2012-09-04 20:03 UTC (permalink / raw)
  To: intel-gfx

In order to accommodate objects that are not backed by struct pages, but
instead point into a contiguous region of stolen space, we need to make
various changes to avoid dereferencing obj->pages or obj->base.filp.

First introduce a marker for the stolen object, that specifies its
offset into the stolen region and implies that it has no backing pages.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c |    2 ++
 drivers/gpu/drm/i915/i915_drv.h     |    2 ++
 2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 274a328..af342e0 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -125,6 +125,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 	if (obj->gtt_space != NULL)
 		seq_printf(m, " (gtt offset: %08x, size: %08x)",
 			   obj->gtt_offset, (unsigned int)obj->gtt_space->size);
+	if (obj->stolen)
+		seq_printf(m, " (stolen: %08lx)", obj->stolen->start);
 	if (obj->pin_mappable || obj->fault_mappable) {
 		char s[3], *t = s;
 		if (obj->pin_mappable)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 31d3a9f..637babb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -913,6 +913,8 @@ struct drm_i915_gem_object {
 
 	/** Current space allocated to this object in the GTT, if any. */
 	struct drm_mm_node *gtt_space;
+	/** Stolen memory for this object, instead of being backed by shmem. */
+	struct drm_mm_node *stolen;
 	struct list_head gtt_list;
 
 	/** This object's place on the active/inactive lists */
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 15/24] drm/i915: Differentiate between prime and stolen objects
  2012-09-04 20:02 Stolen memory, again Chris Wilson
                   ` (13 preceding siblings ...)
  2012-09-04 20:03 ` [PATCH 14/24] drm/i915: Allow objects to be created with no backing pages, but stolen space Chris Wilson
@ 2012-09-04 20:03 ` Chris Wilson
  2012-10-11 18:50   ` Jesse Barnes
  2012-09-04 20:03 ` [PATCH 16/24] drm/i915: Support readback of stolen objects upon error Chris Wilson
                   ` (8 subsequent siblings)
  23 siblings, 1 reply; 55+ messages in thread
From: Chris Wilson @ 2012-09-04 20:03 UTC (permalink / raw)
  To: intel-gfx

Stolen objects also share the property that they have no backing shmemfs
filp, but they can be used with pwrite/pread/gtt-mapping.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h |    5 +++++
 drivers/gpu/drm/i915/i915_gem.c |    4 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 637babb..cc3cc4f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1049,6 +1049,11 @@ struct drm_i915_gem_object {
 	atomic_t pending_flip;
 };
 
+inline static bool i915_gem_object_is_prime(struct drm_i915_gem_object *obj)
+{
+	return obj->base.import_attach != NULL;
+}
+
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 58075e3..f1cef1f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -553,7 +553,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 	/* prime objects have no backing filp to GEM pread/pwrite
 	 * pages from.
 	 */
-	if (!obj->base.filp) {
+	if (i915_gem_object_is_prime(obj)) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -902,7 +902,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	/* prime objects have no backing filp to GEM pread/pwrite
 	 * pages from.
 	 */
-	if (!obj->base.filp) {
+	if (i915_gem_object_is_prime(obj)) {
 		ret = -EINVAL;
 		goto out;
 	}
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 16/24] drm/i915: Support readback of stolen objects upon error
  2012-09-04 20:02 Stolen memory, again Chris Wilson
                   ` (14 preceding siblings ...)
  2012-09-04 20:03 ` [PATCH 15/24] drm/i915: Differentiate between prime and stolen objects Chris Wilson
@ 2012-09-04 20:03 ` Chris Wilson
  2012-10-11 18:51   ` Jesse Barnes
  2012-09-04 20:03 ` [PATCH 17/24] drm/i915: Handle stolen objects in pwrite Chris Wilson
                   ` (7 subsequent siblings)
  23 siblings, 1 reply; 55+ messages in thread
From: Chris Wilson @ 2012-09-04 20:03 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_irq.c |    8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index dd49046..fe3f60c 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -923,6 +923,14 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
 						     reloc_offset);
 			memcpy_fromio(d, s, PAGE_SIZE);
 			io_mapping_unmap_atomic(s);
+		} else if (src->stolen) {
+			unsigned long offset;
+
+			offset = dev_priv->mm.stolen_base;
+			offset += src->stolen->start;
+			offset += i << PAGE_SHIFT;
+
+			memcpy_fromio(d, (void *)offset, PAGE_SIZE);
 		} else {
 			struct page *page;
 			void *s;
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 17/24] drm/i915: Handle stolen objects in pwrite
  2012-09-04 20:02 Stolen memory, again Chris Wilson
                   ` (15 preceding siblings ...)
  2012-09-04 20:03 ` [PATCH 16/24] drm/i915: Support readback of stolen objects upon error Chris Wilson
@ 2012-09-04 20:03 ` Chris Wilson
  2012-09-04 20:03 ` [PATCH 18/24] drm/i915: Handle stolen objects for pread Chris Wilson
                   ` (6 subsequent siblings)
  23 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2012-09-04 20:03 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c |  159 ++++++++++++++++++++++++---------------
 1 file changed, 100 insertions(+), 59 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f1cef1f..7946d73 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -664,19 +664,17 @@ out:
  * needs_clflush_before is set and flushes out any written cachelines after
  * writing if needs_clflush is set. */
 static int
-shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
+shmem_pwrite_fast(char *vaddr, int shmem_page_offset, int page_length,
 		  char __user *user_data,
 		  bool page_do_bit17_swizzling,
 		  bool needs_clflush_before,
 		  bool needs_clflush_after)
 {
-	char *vaddr;
 	int ret;
 
 	if (unlikely(page_do_bit17_swizzling))
 		return -EINVAL;
 
-	vaddr = kmap_atomic(page);
 	if (needs_clflush_before)
 		drm_clflush_virt_range(vaddr + shmem_page_offset,
 				       page_length);
@@ -686,7 +684,6 @@ shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
 	if (needs_clflush_after)
 		drm_clflush_virt_range(vaddr + shmem_page_offset,
 				       page_length);
-	kunmap_atomic(vaddr);
 
 	return ret ? -EFAULT : 0;
 }
@@ -694,16 +691,14 @@ shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
 /* Only difference to the fast-path function is that this can handle bit17
  * and uses non-atomic copy and kmap functions. */
 static int
-shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
+shmem_pwrite_slow(char *vaddr, int shmem_page_offset, int page_length,
 		  char __user *user_data,
 		  bool page_do_bit17_swizzling,
 		  bool needs_clflush_before,
 		  bool needs_clflush_after)
 {
-	char *vaddr;
 	int ret;
 
-	vaddr = kmap(page);
 	if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
 		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
 					     page_length,
@@ -720,7 +715,6 @@ shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
 		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
 					     page_length,
 					     page_do_bit17_swizzling);
-	kunmap(page);
 
 	return ret ? -EFAULT : 0;
 }
@@ -731,10 +725,11 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 		      struct drm_i915_gem_pwrite *args,
 		      struct drm_file *file)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	ssize_t remain;
 	loff_t offset;
 	char __user *user_data;
-	int shmem_page_offset, page_length, ret = 0;
+	int page_length, ret = 0;
 	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
 	int hit_slowpath = 0;
 	int needs_clflush_after = 0;
@@ -770,74 +765,120 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 	if (ret)
 		return ret;
 
-	i915_gem_object_pin_pages(obj);
-
 	offset = args->offset;
 	obj->dirty = 1;
 
-	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
-		struct page *page;
-		int partial_cacheline_write;
+	if (obj->stolen) {
+		char *vaddr;
 
-		if (i < offset >> PAGE_SHIFT)
-			continue;
+		vaddr = (char *)dev_priv->mm.stolen_base;
+		vaddr += obj->stolen->start + offset;
 
-		if (remain <= 0)
-			break;
+		offset = offset_in_page(offset);
+		while (remain > 0) {
+			int partial_cacheline_write;
 
-		/* Operation in this page
-		 *
-		 * shmem_page_offset = offset within page in shmem file
-		 * page_length = bytes to copy for this page
-		 */
-		shmem_page_offset = offset_in_page(offset);
+			page_length = remain;
+			if ((offset + page_length) > PAGE_SIZE)
+				page_length = PAGE_SIZE - offset;
 
-		page_length = remain;
-		if ((shmem_page_offset + page_length) > PAGE_SIZE)
-			page_length = PAGE_SIZE - shmem_page_offset;
+			/* If we don't overwrite a cacheline completely we need to be
+			 * careful to have up-to-date data by first clflushing. Don't
+			 * overcomplicate things and flush the entire patch. */
+			partial_cacheline_write = needs_clflush_before &&
+				((offset | page_length) & (boot_cpu_data.x86_clflush_size - 1));
 
-		/* If we don't overwrite a cacheline completely we need to be
-		 * careful to have up-to-date data by first clflushing. Don't
-		 * overcomplicate things and flush the entire patch. */
-		partial_cacheline_write = needs_clflush_before &&
-			((shmem_page_offset | page_length)
-				& (boot_cpu_data.x86_clflush_size - 1));
+			page_do_bit17_swizzling = obj_do_bit17_swizzling &&
+				((uintptr_t)vaddr & (1 << 17)) != 0;
 
-		page = sg_page(sg);
-		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
-			(page_to_phys(page) & (1 << 17)) != 0;
+			ret = shmem_pwrite_fast(vaddr, offset, page_length,
+						user_data, page_do_bit17_swizzling,
+						partial_cacheline_write,
+						needs_clflush_after);
 
-		ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
-					user_data, page_do_bit17_swizzling,
-					partial_cacheline_write,
-					needs_clflush_after);
-		if (ret == 0)
-			goto next_page;
+			if (ret == 0)
+				goto next_stolen;
 
-		hit_slowpath = 1;
-		mutex_unlock(&dev->struct_mutex);
-		ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
-					user_data, page_do_bit17_swizzling,
-					partial_cacheline_write,
-					needs_clflush_after);
+			hit_slowpath = 1;
+			mutex_unlock(&dev->struct_mutex);
 
-		mutex_lock(&dev->struct_mutex);
+			ret = shmem_pwrite_slow(vaddr, offset, page_length,
+						user_data, page_do_bit17_swizzling,
+						partial_cacheline_write,
+						needs_clflush_after);
 
-next_page:
-		set_page_dirty(page);
-		mark_page_accessed(page);
+			mutex_lock(&dev->struct_mutex);
+			if (ret)
+				goto out;
 
-		if (ret)
-			goto out;
+next_stolen:
+			remain -= page_length;
+			user_data += page_length;
+			offset = 0;
+		}
+	} else {
+		i915_gem_object_pin_pages(obj);
 
-		remain -= page_length;
-		user_data += page_length;
-		offset += page_length;
+		offset = offset_in_page(offset);
+		for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
+			struct page *page;
+			char *vaddr;
+			int partial_cacheline_write;
+
+			if (i < args->offset >> PAGE_SHIFT)
+				continue;
+
+			if (remain <= 0)
+				break;
+
+			page_length = remain;
+			if ((offset + page_length) > PAGE_SIZE)
+				page_length = PAGE_SIZE - offset;
+
+			/* If we don't overwrite a cacheline completely we need to be
+			 * careful to have up-to-date data by first clflushing. Don't
+			 * overcomplicate things and flush the entire patch. */
+			partial_cacheline_write = needs_clflush_before &&
+				((offset | page_length) & (boot_cpu_data.x86_clflush_size - 1));
+
+			page = sg_page(sg);
+			page_do_bit17_swizzling = obj_do_bit17_swizzling &&
+				(page_to_phys(page) & (1 << 17)) != 0;
+
+			vaddr = kmap_atomic(page);
+			ret = shmem_pwrite_fast(vaddr, offset, page_length,
+						user_data, page_do_bit17_swizzling,
+						partial_cacheline_write,
+						needs_clflush_after);
+			kunmap_atomic(vaddr);
+
+			if (ret == 0)
+				goto next_page;
+
+			hit_slowpath = 1;
+			mutex_unlock(&dev->struct_mutex);
+
+			vaddr = kmap(page);
+			ret = shmem_pwrite_slow(vaddr, offset, page_length,
+						user_data, page_do_bit17_swizzling,
+						partial_cacheline_write,
+						needs_clflush_after);
+			kunmap(page);
+
+			mutex_lock(&dev->struct_mutex);
+			if (ret)
+				goto out_unpin;
+
+next_page:
+			remain -= page_length;
+			user_data += page_length;
+			offset = 0;
+		}
+out_unpin:
+		i915_gem_object_unpin_pages(obj);
 	}
 
 out:
-	i915_gem_object_unpin_pages(obj);
-
 	if (hit_slowpath) {
 		/* Fixup: Kill any reinstated backing storage pages */
 		if (obj->madv == __I915_MADV_PURGED)
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 18/24] drm/i915: Handle stolen objects for pread
  2012-09-04 20:02 Stolen memory, again Chris Wilson
                   ` (16 preceding siblings ...)
  2012-09-04 20:03 ` [PATCH 17/24] drm/i915: Handle stolen objects in pwrite Chris Wilson
@ 2012-09-04 20:03 ` Chris Wilson
  2012-09-04 20:03 ` [PATCH 19/24] drm/i915: Introduce i915_gem_object_create_stolen() Chris Wilson
                   ` (5 subsequent siblings)
  23 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2012-09-04 20:03 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c |  175 ++++++++++++++++++++++++++-------------
 1 file changed, 116 insertions(+), 59 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7946d73..070ddf2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -324,24 +324,21 @@ __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
  * Flushes invalid cachelines before reading the target if
  * needs_clflush is set. */
 static int
-shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
+shmem_pread_fast(char *vaddr, int shmem_page_offset, int page_length,
 		 char __user *user_data,
 		 bool page_do_bit17_swizzling, bool needs_clflush)
 {
-	char *vaddr;
 	int ret;
 
 	if (unlikely(page_do_bit17_swizzling))
 		return -EINVAL;
 
-	vaddr = kmap_atomic(page);
 	if (needs_clflush)
 		drm_clflush_virt_range(vaddr + shmem_page_offset,
 				       page_length);
 	ret = __copy_to_user_inatomic(user_data,
 				      vaddr + shmem_page_offset,
 				      page_length);
-	kunmap_atomic(vaddr);
 
 	return ret ? -EFAULT : 0;
 }
@@ -371,14 +368,12 @@ shmem_clflush_swizzled_range(char *addr, unsigned long length,
 /* Only difference to the fast-path function is that this can handle bit17
  * and uses non-atomic copy and kmap functions. */
 static int
-shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
+shmem_pread_slow(char *vaddr, int shmem_page_offset, int page_length,
 		 char __user *user_data,
 		 bool page_do_bit17_swizzling, bool needs_clflush)
 {
-	char *vaddr;
 	int ret;
 
-	vaddr = kmap(page);
 	if (needs_clflush)
 		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
 					     page_length,
@@ -392,7 +387,6 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
 		ret = __copy_to_user(user_data,
 				     vaddr + shmem_page_offset,
 				     page_length);
-	kunmap(page);
 
 	return ret ? - EFAULT : 0;
 }
@@ -403,6 +397,7 @@ i915_gem_shmem_pread(struct drm_device *dev,
 		     struct drm_i915_gem_pread *args,
 		     struct drm_file *file)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	char __user *user_data;
 	ssize_t remain;
 	loff_t offset;
@@ -433,76 +428,138 @@ i915_gem_shmem_pread(struct drm_device *dev,
 		}
 	}
 
-	ret = i915_gem_object_get_pages(obj);
-	if (ret)
-		return ret;
+	offset = args->offset;
 
-	i915_gem_object_pin_pages(obj);
+	if (obj->stolen) {
+		char *vaddr;
 
-	offset = args->offset;
+		vaddr = (char *)dev_priv->mm.stolen_base;
+		vaddr += obj->stolen->start + offset;
 
-	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
-		struct page *page;
+		shmem_page_offset = offset_in_page(offset);
+		while (remain > 0) {
+			/* Operation in this page
+			 *
+			 * shmem_page_offset = offset within page in shmem file
+			 * page_length = bytes to copy for this page
+			 */
+			page_length = remain;
+			if ((shmem_page_offset + page_length) > PAGE_SIZE)
+				page_length = PAGE_SIZE - shmem_page_offset;
 
-		if (i < offset >> PAGE_SHIFT)
-			continue;
+			page_do_bit17_swizzling = obj_do_bit17_swizzling &&
+				((uintptr_t)vaddr & (1 << 17)) != 0;
 
-		if (remain <= 0)
-			break;
+			ret = shmem_pread_fast(vaddr, shmem_page_offset, page_length,
+					       user_data, page_do_bit17_swizzling,
+					       needs_clflush);
+			if (ret == 0)
+				goto next_stolen;
 
-		/* Operation in this page
-		 *
-		 * shmem_page_offset = offset within page in shmem file
-		 * page_length = bytes to copy for this page
-		 */
-		shmem_page_offset = offset_in_page(offset);
-		page_length = remain;
-		if ((shmem_page_offset + page_length) > PAGE_SIZE)
-			page_length = PAGE_SIZE - shmem_page_offset;
+			hit_slowpath = 1;
+			mutex_unlock(&dev->struct_mutex);
 
-		page = sg_page(sg);
-		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
-			(page_to_phys(page) & (1 << 17)) != 0;
+			if (!prefaulted) {
+				ret = fault_in_multipages_writeable(user_data, remain);
+				/* Userspace is tricking us, but we've already clobbered
+				 * its pages with the prefault and promised to write the
+				 * data up to the first fault. Hence ignore any errors
+				 * and just continue. */
+				(void)ret;
+				prefaulted = 1;
+			}
 
-		ret = shmem_pread_fast(page, shmem_page_offset, page_length,
-				       user_data, page_do_bit17_swizzling,
-				       needs_clflush);
-		if (ret == 0)
-			goto next_page;
+			ret = shmem_pread_slow(vaddr, shmem_page_offset, page_length,
+					       user_data, page_do_bit17_swizzling,
+					       needs_clflush);
 
-		hit_slowpath = 1;
-		mutex_unlock(&dev->struct_mutex);
+			mutex_lock(&dev->struct_mutex);
+			if (ret)
+				goto out;
 
-		if (!prefaulted) {
-			ret = fault_in_multipages_writeable(user_data, remain);
-			/* Userspace is tricking us, but we've already clobbered
-			 * its pages with the prefault and promised to write the
-			 * data up to the first fault. Hence ignore any errors
-			 * and just continue. */
-			(void)ret;
-			prefaulted = 1;
+next_stolen:
+			remain -= page_length;
+			user_data += page_length;
+			vaddr += page_length;
+			shmem_page_offset = 0;
 		}
+	} else {
+		ret = i915_gem_object_get_pages(obj);
+		if (ret)
+			return ret;
 
-		ret = shmem_pread_slow(page, shmem_page_offset, page_length,
-				       user_data, page_do_bit17_swizzling,
-				       needs_clflush);
+		i915_gem_object_pin_pages(obj);
 
-		mutex_lock(&dev->struct_mutex);
+		for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
+			char *vaddr;
+			struct page *page;
 
-next_page:
-		mark_page_accessed(page);
+			if (i < offset >> PAGE_SHIFT)
+				continue;
 
-		if (ret)
-			goto out;
+			if (remain <= 0)
+				break;
 
-		remain -= page_length;
-		user_data += page_length;
-		offset += page_length;
+			/* Operation in this page
+			 *
+			 * shmem_page_offset = offset within page in shmem file
+			 * page_length = bytes to copy for this page
+			 */
+			shmem_page_offset = offset_in_page(offset);
+			page_length = remain;
+			if ((shmem_page_offset + page_length) > PAGE_SIZE)
+				page_length = PAGE_SIZE - shmem_page_offset;
+
+			page = sg_page(sg);
+			mark_page_accessed(page);
+
+			page_do_bit17_swizzling = obj_do_bit17_swizzling &&
+				(page_to_phys(page) & (1 << 17)) != 0;
+
+			vaddr = kmap_atomic(page);
+			ret = shmem_pread_fast(vaddr, shmem_page_offset, page_length,
+					       user_data, page_do_bit17_swizzling,
+					       needs_clflush);
+			kunmap_atomic(vaddr);
+
+			if (ret == 0)
+				goto next_page;
+
+			hit_slowpath = 1;
+			mutex_unlock(&dev->struct_mutex);
+
+			if (!prefaulted) {
+				ret = fault_in_multipages_writeable(user_data, remain);
+				/* Userspace is tricking us, but we've already clobbered
+				 * its pages with the prefault and promised to write the
+				 * data up to the first fault. Hence ignore any errors
+				 * and just continue. */
+				(void)ret;
+				prefaulted = 1;
+			}
+
+			vaddr = kmap(page);
+			ret = shmem_pread_slow(vaddr, shmem_page_offset, page_length,
+					       user_data, page_do_bit17_swizzling,
+					       needs_clflush);
+			kunmap(page);
+
+			mutex_lock(&dev->struct_mutex);
+
+			if (ret)
+				goto out_unpin;
+
+next_page:
+			remain -= page_length;
+			user_data += page_length;
+			offset += page_length;
+		}
+out_unpin:
+		i915_gem_object_unpin_pages(obj);
 	}
 
-out:
-	i915_gem_object_unpin_pages(obj);
 
+out:
 	if (hit_slowpath) {
 		/* Fixup: Kill any reinstated backing storage pages */
 		if (obj->madv == __I915_MADV_PURGED)
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 19/24] drm/i915: Introduce i915_gem_object_create_stolen()
  2012-09-04 20:02 Stolen memory, again Chris Wilson
                   ` (17 preceding siblings ...)
  2012-09-04 20:03 ` [PATCH 18/24] drm/i915: Handle stolen objects for pread Chris Wilson
@ 2012-09-04 20:03 ` Chris Wilson
  2012-10-11 18:53   ` Jesse Barnes
  2012-09-04 20:03 ` [PATCH 20/24] drm/i915: Allocate fbcon from stolen memory Chris Wilson
                   ` (4 subsequent siblings)
  23 siblings, 1 reply; 55+ messages in thread
From: Chris Wilson @ 2012-09-04 20:03 UTC (permalink / raw)
  To: intel-gfx

Allow for the creation of GEM objects backed by stolen memory. As these
are not backed by ordinary pages, we create a fake dma mapping and store
the address in the scatterlist rather than obj->pages.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h        |    3 +
 drivers/gpu/drm/i915/i915_gem.c        |    1 +
 drivers/gpu/drm/i915/i915_gem_stolen.c |  122 ++++++++++++++++++++++++++++++++
 3 files changed, 126 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cc3cc4f..f19a4f2 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1500,6 +1500,9 @@ int i915_gem_evict_everything(struct drm_device *dev);
 int i915_gem_init_stolen(struct drm_device *dev);
 int i915_gem_stolen_setup_compression(struct drm_device *dev);
 void i915_gem_cleanup_stolen(struct drm_device *dev);
+struct drm_i915_gem_object *
+i915_gem_object_create_stolen(struct drm_device *dev, u32 size);
+void i915_gem_object_release_stolen(struct drm_i915_gem_object *obj);
 
 /* i915_gem_tiling.c */
 void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 070ddf2..2c04ea4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3847,6 +3847,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	obj->pages_pin_count = 0;
 	i915_gem_object_put_pages(obj);
 	i915_gem_object_free_mmap_offset(obj);
+	i915_gem_object_release_stolen(obj);
 
 	BUG_ON(obj->pages);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 17119d7..d91f6eb 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -198,3 +198,125 @@ int i915_gem_init_stolen(struct drm_device *dev)
 
 	return 0;
 }
+
+static struct sg_table *
+i915_pages_create_for_stolen(struct drm_device *dev,
+			     u32 offset, u32 size)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct sg_table *st;
+	struct scatterlist *sg;
+
+	/* We hide that we have no struct page backing our stolen object
+	 * by wrapping the contiguous physical allocation with a fake
+	 * dma mapping in a single scatterlist.
+	 */
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (st == NULL)
+		return NULL;
+
+	if (!sg_alloc_table(st, 1, GFP_KERNEL)) {
+		kfree(st);
+		return NULL;
+	}
+
+	sg = st->sgl;
+	sg->offset = offset;
+	sg->length = size;
+
+	sg_dma_address(sg) = dev_priv->mm.stolen_base + offset;
+	sg_dma_len(sg) = size;
+
+	return st;
+}
+
+static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
+{
+	BUG();
+	return -EINVAL;
+}
+
+static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj)
+{
+	/* Should only be called during free */
+	sg_free_table(obj->pages);
+	kfree(obj->pages);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = {
+	.get_pages = i915_gem_object_get_pages_stolen,
+	.put_pages = i915_gem_object_put_pages_stolen,
+};
+
+struct drm_i915_gem_object *
+_i915_gem_object_create_stolen(struct drm_device *dev,
+			       struct drm_mm_node *stolen)
+{
+	struct drm_i915_gem_object *obj;
+
+	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+	if (obj == NULL)
+		return NULL;
+
+	if (drm_gem_private_object_init(dev, &obj->base, stolen->size))
+		goto cleanup;
+
+	i915_gem_object_init(obj, &i915_gem_object_stolen_ops);
+
+	obj->pages = i915_pages_create_for_stolen(dev,
+						  stolen->start, stolen->size);
+	if (obj->pages == NULL)
+		goto cleanup;
+
+	obj->has_dma_mapping = true;
+	obj->pages_pin_count = 1;
+	obj->stolen = stolen;
+
+	obj->base.write_domain = I915_GEM_DOMAIN_GTT;
+	obj->base.read_domains = I915_GEM_DOMAIN_GTT;
+	obj->cache_level = I915_CACHE_NONE;
+
+	return obj;
+
+cleanup:
+	kfree(obj);
+	return NULL;
+}
+
+struct drm_i915_gem_object *
+i915_gem_object_create_stolen(struct drm_device *dev, u32 size)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_object *obj;
+	struct drm_mm_node *stolen;
+
+	if (dev_priv->mm.stolen_base == 0)
+		return 0;
+
+	DRM_DEBUG_KMS("creating stolen object: size=%x\n", size);
+	if (size == 0)
+		return NULL;
+
+	stolen = drm_mm_search_free(&dev_priv->mm.stolen, size, 4096, 0);
+	if (stolen)
+		stolen = drm_mm_get_block(stolen, size, 4096);
+	if (stolen == NULL)
+		return NULL;
+
+	obj = _i915_gem_object_create_stolen(dev, stolen);
+	if (obj)
+		return obj;
+
+	drm_mm_put_block(stolen);
+	return NULL;
+}
+
+void
+i915_gem_object_release_stolen(struct drm_i915_gem_object *obj)
+{
+	if (obj->stolen) {
+		drm_mm_put_block(obj->stolen);
+		obj->stolen = NULL;
+	}
+}
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 20/24] drm/i915: Allocate fbcon from stolen memory
  2012-09-04 20:02 Stolen memory, again Chris Wilson
                   ` (18 preceding siblings ...)
  2012-09-04 20:03 ` [PATCH 19/24] drm/i915: Introduce i915_gem_object_create_stolen() Chris Wilson
@ 2012-09-04 20:03 ` Chris Wilson
  2012-10-11 18:54   ` Jesse Barnes
  2012-09-04 20:03 ` [PATCH 21/24] drm/i915: Allocate ringbuffers " Chris Wilson
                   ` (3 subsequent siblings)
  23 siblings, 1 reply; 55+ messages in thread
From: Chris Wilson @ 2012-09-04 20:03 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_fb.c |    4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
index 97f6735..9de9cd9 100644
--- a/drivers/gpu/drm/i915/intel_fb.c
+++ b/drivers/gpu/drm/i915/intel_fb.c
@@ -84,7 +84,9 @@ static int intelfb_create(struct intel_fbdev *ifbdev,
 
 	size = mode_cmd.pitches[0] * mode_cmd.height;
 	size = ALIGN(size, PAGE_SIZE);
-	obj = i915_gem_alloc_object(dev, size);
+	obj = i915_gem_object_create_stolen(dev, size);
+	if (obj == NULL)
+		obj = i915_gem_alloc_object(dev, size);
 	if (!obj) {
 		DRM_ERROR("failed to allocate framebuffer\n");
 		ret = -ENOMEM;
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 21/24] drm/i915: Allocate ringbuffers from stolen memory
  2012-09-04 20:02 Stolen memory, again Chris Wilson
                   ` (19 preceding siblings ...)
  2012-09-04 20:03 ` [PATCH 20/24] drm/i915: Allocate fbcon from stolen memory Chris Wilson
@ 2012-09-04 20:03 ` Chris Wilson
  2012-10-11 18:54   ` Jesse Barnes
  2012-09-04 20:03 ` [PATCH 22/24] drm/i915: Allocate overlay registers " Chris Wilson
                   ` (2 subsequent siblings)
  23 siblings, 1 reply; 55+ messages in thread
From: Chris Wilson @ 2012-09-04 20:03 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c |    6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 984a0c5..577a96a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1096,7 +1096,11 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 			return ret;
 	}
 
-	obj = i915_gem_alloc_object(dev, ring->size);
+	obj = NULL;
+	if (!HAS_LLC(dev))
+		obj = i915_gem_object_create_stolen(dev, ring->size);
+	if (obj == NULL)
+		obj = i915_gem_alloc_object(dev, ring->size);
 	if (obj == NULL) {
 		DRM_ERROR("Failed to allocate ringbuffer\n");
 		ret = -ENOMEM;
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 22/24] drm/i915: Allocate overlay registers from stolen memory
  2012-09-04 20:02 Stolen memory, again Chris Wilson
                   ` (20 preceding siblings ...)
  2012-09-04 20:03 ` [PATCH 21/24] drm/i915: Allocate ringbuffers " Chris Wilson
@ 2012-09-04 20:03 ` Chris Wilson
  2012-10-11 18:55   ` Jesse Barnes
  2012-09-04 20:03 ` [PATCH 23/24] drm/i915: Use a slab for object allocation Chris Wilson
  2012-09-04 20:03 ` [PATCH 24/24] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl Chris Wilson
  23 siblings, 1 reply; 55+ messages in thread
From: Chris Wilson @ 2012-09-04 20:03 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_overlay.c |    6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index afd0f30..2fa20a4 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -1368,8 +1368,10 @@ void intel_setup_overlay(struct drm_device *dev)
 
 	overlay->dev = dev;
 
-	reg_bo = i915_gem_alloc_object(dev, PAGE_SIZE);
-	if (!reg_bo)
+	reg_bo = i915_gem_object_create_stolen(dev, PAGE_SIZE);
+	if (reg_bo == NULL)
+		reg_bo = i915_gem_alloc_object(dev, PAGE_SIZE);
+	if (reg_bo == NULL)
 		goto out_free;
 	overlay->reg_bo = reg_bo;
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 23/24] drm/i915: Use a slab for object allocation
  2012-09-04 20:02 Stolen memory, again Chris Wilson
                   ` (21 preceding siblings ...)
  2012-09-04 20:03 ` [PATCH 22/24] drm/i915: Allocate overlay registers " Chris Wilson
@ 2012-09-04 20:03 ` Chris Wilson
  2012-10-11 18:55   ` Jesse Barnes
  2012-09-04 20:03 ` [PATCH 24/24] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl Chris Wilson
  23 siblings, 1 reply; 55+ messages in thread
From: Chris Wilson @ 2012-09-04 20:03 UTC (permalink / raw)
  To: intel-gfx

The primary purpose of this was to debug some use-after-free memory
corruption that was causing an OOPS inside drm/i915. As it turned out
the corruption was being caused elsewhere and i915.ko as a major user of
many objects was being hit hardest.

Indeed as we do frequent the generic kmalloc caches, dedicating one to
ourselves (or at least naming one for us depending upon the core) aids
debugging our own slab usage.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_dma.c        |    3 +++
 drivers/gpu/drm/i915/i915_drv.h        |    4 ++++
 drivers/gpu/drm/i915/i915_gem.c        |   28 +++++++++++++++++++++++-----
 drivers/gpu/drm/i915/i915_gem_dmabuf.c |    5 ++---
 drivers/gpu/drm/i915/i915_gem_stolen.c |    4 ++--
 5 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 2c09900..f2e3439 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1760,6 +1760,9 @@ int i915_driver_unload(struct drm_device *dev)
 
 	destroy_workqueue(dev_priv->wq);
 
+	if (dev_priv->slab)
+		kmem_cache_destroy(dev_priv->slab);
+
 	pci_dev_put(dev_priv->bridge_dev);
 	kfree(dev->dev_private);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f19a4f2..ec8c0fc 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -391,6 +391,7 @@ struct intel_gmbus {
 
 typedef struct drm_i915_private {
 	struct drm_device *dev;
+	struct kmem_cache *slab;
 
 	const struct intel_device_info *info;
 
@@ -1316,12 +1317,15 @@ int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
 void i915_gem_load(struct drm_device *dev);
+void *i915_gem_object_alloc(struct drm_device *dev);
+void i915_gem_object_free(struct drm_i915_gem_object *obj);
 int i915_gem_init_object(struct drm_gem_object *obj);
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
 			 const struct drm_i915_gem_object_ops *ops);
 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 						  size_t size);
 void i915_gem_free_object(struct drm_gem_object *obj);
+
 int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
 				     uint32_t alignment,
 				     bool map_and_fenceable,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2c04ea4..a32d3eb 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -193,6 +193,18 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
+void *i915_gem_object_alloc(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	return kmem_cache_alloc(dev_priv->slab, GFP_KERNEL | __GFP_ZERO);
+}
+
+void i915_gem_object_free(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+	kmem_cache_free(dev_priv->slab, obj);
+}
+
 static int
 i915_gem_create(struct drm_file *file,
 		struct drm_device *dev,
@@ -216,7 +228,7 @@ i915_gem_create(struct drm_file *file,
 	if (ret) {
 		drm_gem_object_release(&obj->base);
 		i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
-		kfree(obj);
+		i915_gem_object_free(obj);
 		return ret;
 	}
 
@@ -3770,12 +3782,12 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 	struct address_space *mapping;
 	u32 mask;
 
-	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+	obj = i915_gem_object_alloc(dev);
 	if (obj == NULL)
 		return NULL;
 
 	if (drm_gem_object_init(dev, &obj->base, size) != 0) {
-		kfree(obj);
+		i915_gem_object_free(obj);
 		return NULL;
 	}
 
@@ -3858,7 +3870,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
 
 	kfree(obj->bit_17);
-	kfree(obj);
+	i915_gem_object_free(obj);
 }
 
 int
@@ -4236,8 +4248,14 @@ init_ring_lists(struct intel_ring_buffer *ring)
 void
 i915_gem_load(struct drm_device *dev)
 {
-	int i;
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	int i;
+
+	dev_priv->slab =
+		kmem_cache_create("i915_gem_object",
+				  sizeof(struct drm_i915_gem_object), 0,
+				  SLAB_HWCACHE_ALIGN,
+				  NULL);
 
 	INIT_LIST_HEAD(&dev_priv->mm.active_list);
 	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index ca3497e..f307e31 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -276,8 +276,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
 	if (IS_ERR(attach))
 		return ERR_CAST(attach);
 
-
-	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+	obj = i915_gem_object_alloc(dev);
 	if (obj == NULL) {
 		ret = -ENOMEM;
 		goto fail_detach;
@@ -285,7 +284,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
 
 	ret = drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
 	if (ret) {
-		kfree(obj);
+		i915_gem_object_free(obj);
 		goto fail_detach;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index d91f6eb..fc9228a 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -255,7 +255,7 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
 {
 	struct drm_i915_gem_object *obj;
 
-	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+	obj = i915_gem_object_alloc(dev);
 	if (obj == NULL)
 		return NULL;
 
@@ -280,7 +280,7 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
 	return obj;
 
 cleanup:
-	kfree(obj);
+	i915_gem_object_free(obj);
 	return NULL;
 }
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* [PATCH 24/24] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
  2012-09-04 20:02 Stolen memory, again Chris Wilson
                   ` (22 preceding siblings ...)
  2012-09-04 20:03 ` [PATCH 23/24] drm/i915: Use a slab for object allocation Chris Wilson
@ 2012-09-04 20:03 ` Chris Wilson
  23 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2012-09-04 20:03 UTC (permalink / raw)
  To: intel-gfx

By exporting the ability to map user address and inserting PTEs
representing their backing pages into the GTT, we can exploit UMA in order
to utilize normal application data as a texture source or even as a
render target (depending upon the capabilities of the chipset). This has
a number of uses, with zero-copy downloads to the GPU and efficient
readback making the intermixed streaming of CPU and GPU operations
fairly efficient. This ability has many widespread implications from
faster rendering of partial software fallbacks (xterm!) to faster
pipelining of texture data (such as pixel buffer objects in GL).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile           |    1 +
 drivers/gpu/drm/i915/i915_dma.c         |    1 +
 drivers/gpu/drm/i915/i915_drv.h         |   21 +++
 drivers/gpu/drm/i915/i915_gem.c         |    9 +-
 drivers/gpu/drm/i915/i915_gem_userptr.c |  273 +++++++++++++++++++++++++++++++
 include/drm/i915_drm.h                  |   15 ++
 6 files changed, 317 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_userptr.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 0f2c549..754d665 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -14,6 +14,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o \
 	  i915_gem_gtt.o \
 	  i915_gem_stolen.o \
 	  i915_gem_tiling.o \
+	  i915_gem_userptr.o \
 	  i915_sysfs.o \
 	  i915_trace_points.o \
 	  intel_display.o \
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index f2e3439..837fc63 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1883,6 +1883,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_MASTER|DRM_UNLOCKED),
 };
 
 int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ec8c0fc..b9dc7f9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -40,6 +40,7 @@
 #include <linux/backlight.h>
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
+#include <linux/mmu_notifier.h>
 
 /* General customization:
  */
@@ -905,6 +906,7 @@ enum i915_cache_level {
 struct drm_i915_gem_object_ops {
 	int (*get_pages)(struct drm_i915_gem_object *);
 	void (*put_pages)(struct drm_i915_gem_object *);
+	void (*release)(struct drm_i915_gem_object *);
 };
 
 struct drm_i915_gem_object {
@@ -1050,6 +1052,23 @@ struct drm_i915_gem_object {
 	atomic_t pending_flip;
 };
 
+struct i915_gem_userptr_object {
+	struct drm_i915_gem_object gem;
+	uintptr_t user_ptr;
+	size_t user_size;
+	int read_only;
+
+	struct mm_struct *mm;
+#ifdef CONFIG_MMU_NOTIFIER
+	struct mmu_notifier *notifier;
+#endif
+};
+
+union drm_i915_gem_objects {
+	struct drm_i915_gem_object base;
+	struct i915_gem_userptr_object userptr;
+};
+
 inline static bool i915_gem_object_is_prime(struct drm_i915_gem_object *obj)
 {
 	return obj->base.import_attach != NULL;
@@ -1308,6 +1327,8 @@ int i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv);
 int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv);
+int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
 int i915_gem_set_tiling(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
 int i915_gem_get_tiling(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a32d3eb..770ea6e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2556,9 +2556,9 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 	/* Avoid an unnecessary call to unbind on rebind. */
 	obj->map_and_fenceable = true;
 
+	obj->gtt_offset -= obj->gtt_space->start;
 	drm_mm_put_block(obj->gtt_space);
 	obj->gtt_space = NULL;
-	obj->gtt_offset = 0;
 
 	return 0;
 }
@@ -3074,7 +3074,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
 	list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list);
 	list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
 
-	obj->gtt_offset = obj->gtt_space->start;
+	obj->gtt_offset += obj->gtt_space->start;
 
 	fenceable =
 		obj->gtt_space->size == fence_size &&
@@ -3866,6 +3866,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	if (obj->base.import_attach)
 		drm_prime_gem_destroy(&obj->base, NULL);
 
+	if (obj->ops->release)
+		obj->ops->release(obj);
+
 	drm_gem_object_release(&obj->base);
 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
 
@@ -4253,7 +4256,7 @@ i915_gem_load(struct drm_device *dev)
 
 	dev_priv->slab =
 		kmem_cache_create("i915_gem_object",
-				  sizeof(struct drm_i915_gem_object), 0,
+				  sizeof(union drm_i915_gem_objects), 0,
 				  SLAB_HWCACHE_ALIGN,
 				  NULL);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
new file mode 100644
index 0000000..94bf09e
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -0,0 +1,273 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "drm.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+#include <linux/mmu_notifier.h>
+#include <linux/swap.h>
+
+static struct i915_gem_userptr_object *to_userptr_object(struct drm_i915_gem_object *obj)
+{
+	return container_of(obj, struct i915_gem_userptr_object, gem);
+}
+
+#ifdef CONFIG_MMU_NOTIFIER
+static void i915_gem_userptr_mn_release(struct mmu_notifier *mn,
+					struct mm_struct *mm)
+{
+	struct i915_gem_userptr_object *vmap;
+
+	vmap = container_of(mn, struct i915_gem_userptr_object, mmu_notifier);
+	BUG_ON(vmap->mm != mm);
+	vmap->mm = NULL;
+}
+
+static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
+	.release = i915_gem_userptr_mn_release;
+};
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct i915_gem_userptr_object *vmap)
+{
+	if (vmap->mm) {
+		mmu_notifier_unregister(&vmap->mmu_notifier, vmap->mm);
+		BUG_ON(vmap->mm);
+	}
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct i915_gem_userptr_object *vmap)
+{
+	obj->mmu_notifier.ops = &i915_gem_userptr_notifier;
+	return mmu_notifier_register(&obj->mmu_notifier, obj->mm);
+}
+
+#else
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct i915_gem_userptr_object *vmap)
+{
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct i915_gem_userptr_object *vmap)
+{
+	return 0;
+}
+#endif
+
+static int
+i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
+{
+	struct i915_gem_userptr_object *vmap = to_userptr_object(obj);
+	int num_pages = obj->base.size >> PAGE_SHIFT;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	struct page **pvec;
+	int n, pinned, ret;
+
+	if (vmap->mm == NULL)
+		return -EFAULT;
+
+	if (!access_ok(vmap->read_only ? VERIFY_READ : VERIFY_WRITE,
+		       (char __user *)vmap->user_ptr, vmap->user_size))
+		return -EFAULT;
+
+	/* If userspace should engineer that these pages are replaced in
+	 * the vma between us binding this page into the GTT and completion
+	 * of rendering... Their loss. If they change the mapping of their
+	 * pages they need to create a new bo to point to the new vma.
+	 *
+	 * However, that still leaves open the possibility of the vma
+	 * being copied upon fork. Which falls under the same userspace
+	 * synchronisation issue as a regular bo, except that this time
+	 * the process may not be expecting that a particular piece of
+	 * memory is tied to the GPU.
+	 */
+
+	pvec = kmalloc(num_pages*sizeof(struct page *),
+		       GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
+	if (pvec == NULL) {
+		pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
+		if (pvec == NULL)
+			return -ENOMEM;
+	}
+
+	pinned = 0;
+	if (vmap->mm == current->mm)
+		pinned = __get_user_pages_fast(vmap->user_ptr, num_pages,
+					       !vmap->read_only, pvec);
+	if (pinned < num_pages) {
+		struct mm_struct *mm = vmap->mm;
+		ret = 0;
+		mutex_unlock(&obj->base.dev->struct_mutex);
+		down_read(&mm->mmap_sem);
+		if (vmap->mm != NULL)
+			ret = get_user_pages(current, mm,
+					     vmap->user_ptr + (pinned << PAGE_SHIFT),
+					     num_pages - pinned,
+					     !vmap->read_only, 0,
+					     pvec + pinned,
+					     NULL);
+		up_read(&mm->mmap_sem);
+		mutex_lock(&obj->base.dev->struct_mutex);
+		if (ret > 0)
+			pinned += ret;
+
+		if (obj->pages || pinned < num_pages) {
+			ret = obj->pages ? 0 : -EFAULT;
+			goto cleanup_pinned;
+		}
+	}
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (st == NULL) {
+		ret = -ENOMEM;
+		goto cleanup_pinned;
+	}
+
+	if (sg_alloc_table(st, num_pages, GFP_KERNEL)) {
+		ret = -ENOMEM;
+		goto cleanup_st;
+	}
+
+	for_each_sg(st->sgl, sg, num_pages, n)
+		sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
+	drm_free_large(pvec);
+
+	obj->pages = st;
+	return 0;
+
+cleanup_st:
+	kfree(st);
+cleanup_pinned:
+	release_pages(pvec, pinned, 0);
+	drm_free_large(pvec);
+	return ret;
+}
+
+static void
+i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
+{
+	struct scatterlist *sg;
+	int i;
+
+	if (obj->madv != I915_MADV_WILLNEED)
+		obj->dirty = 0;
+
+	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
+		struct page *page = sg_page(sg);
+
+		if (obj->dirty)
+			set_page_dirty(page);
+
+		mark_page_accessed(page);
+		page_cache_release(page);
+	}
+	obj->dirty = 0;
+
+	sg_free_table(obj->pages);
+	kfree(obj->pages);
+}
+
+static void
+i915_gem_userptr_release(struct drm_i915_gem_object *obj)
+{
+	struct i915_gem_userptr_object *vmap = to_userptr_object(obj);
+
+	i915_gem_userptr_release__mmu_notifier(vmap);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
+	.get_pages = i915_gem_userptr_get_pages,
+	.put_pages = i915_gem_userptr_put_pages,
+	.release = i915_gem_userptr_release,
+};
+
+/**
+ * Creates a new mm object that wraps some user memory.
+ */
+int
+i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_userptr *args = data;
+	struct i915_gem_userptr_object *obj;
+	loff_t first_data_page, last_data_page;
+	int num_pages;
+	int ret;
+	u32 handle;
+
+	first_data_page = args->user_ptr / PAGE_SIZE;
+	last_data_page = (args->user_ptr + args->user_size - 1) / PAGE_SIZE;
+	num_pages = last_data_page - first_data_page + 1;
+	if (num_pages * PAGE_SIZE > dev_priv->mm.gtt_total)
+		return -E2BIG;
+
+	ret = fault_in_multipages_readable((char __user *)(uintptr_t)args->user_ptr,
+					   args->user_size);
+	if (ret)
+		return ret;
+
+	/* Allocate the new object */
+	obj = i915_gem_object_alloc(dev);
+	if (obj == NULL)
+		return -ENOMEM;
+
+	if (drm_gem_private_object_init(dev, &obj->gem.base,
+					num_pages * PAGE_SIZE)) {
+		i915_gem_object_free(&obj->gem);
+		return -ENOMEM;
+	}
+
+	i915_gem_object_init(&obj->gem, &i915_gem_userptr_ops);
+	obj->gem.cache_level = I915_CACHE_LLC_MLC;
+
+	obj->gem.gtt_offset = offset_in_page(args->user_ptr);
+	obj->user_ptr = args->user_ptr;
+	obj->user_size = args->user_size;
+	obj->read_only = args->flags & I915_USERPTR_READ_ONLY;
+
+	/* And keep a pointer to the current->mm for resolving the user pages
+	 * at binding. This means that we need to hook into the mmu_notifier
+	 * in order to detect if the mmu is destroyed.
+	 */
+	obj->mm = current->mm;
+	ret = i915_gem_userptr_init__mmu_notifier(obj);
+	if (ret)
+		return ret;
+
+	ret = drm_gem_handle_create(file, &obj->gem.base, &handle);
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference(&obj->gem.base);
+	if (ret)
+		return ret;
+
+	args->handle = handle;
+	return 0;
+}
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 05e24d3..0a0b881 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -206,6 +206,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_GEM_SET_CACHEING	0x2f
 #define DRM_I915_GEM_GET_CACHEING	0x30
 #define DRM_I915_REG_READ		0x31
+#define DRM_I915_GEM_USERPTR		0x32
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -255,6 +256,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create)
 #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
 #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
+#define DRM_IOCTL_I915_GEM_USERPTR			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -474,6 +476,19 @@ struct drm_i915_gem_mmap_gtt {
 	__u64 offset;
 };
 
+struct drm_i915_gem_userptr {
+	__u64 user_ptr;
+	__u32 user_size;
+	__u32 flags;
+#define I915_USERPTR_READ_ONLY 0x1
+	/**
+	 * Returned handle for the object.
+	 *
+	 * Object handles are nonzero.
+	 */
+	__u32 handle;
+};
+
 struct drm_i915_gem_set_domain {
 	/** Handle for the object */
 	__u32 handle;
-- 
1.7.10.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* Re: [PATCH 01/24] drm/i915: Introduce drm_i915_gem_object_ops
  2012-09-04 20:02 ` [PATCH 01/24] drm/i915: Introduce drm_i915_gem_object_ops Chris Wilson
@ 2012-09-06 22:32   ` Ben Widawsky
  2012-10-11 18:28   ` Jesse Barnes
  1 sibling, 0 replies; 55+ messages in thread
From: Ben Widawsky @ 2012-09-06 22:32 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue,  4 Sep 2012 21:02:53 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> In order to specialise functions depending upon the type of object, we
> can attach vfuncs to each object via a new ->ops pointer.
> 
> For instance, this will be used in future patches to only bind pages from
> a dma-buf for the duration that the object is used by the GPU - and so
> prevent them from pinning those pages for the entire of the object.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
With comments below (addressed or not) this is:
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>

> ---
>  drivers/gpu/drm/i915/i915_drv.h        |   12 +++++-
>  drivers/gpu/drm/i915/i915_gem.c        |   71 +++++++++++++++++++++-----------
>  drivers/gpu/drm/i915/i915_gem_dmabuf.c |    4 +-
>  3 files changed, 60 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index f16ab5e..f180874 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -898,9 +898,16 @@ enum i915_cache_level {
>  	I915_CACHE_LLC_MLC, /* gen6+, in docs at least! */
>  };
>  
> +struct drm_i915_gem_object_ops {
> +	int (*get_pages)(struct drm_i915_gem_object *);
> +	void (*put_pages)(struct drm_i915_gem_object *);
> +};
> +

For the sake of "namespace" can we name this gem_get_pages() or
something similar?

I would love comments describing what get/put should do for when people
other than you want to add new ops. If you'd prefer to put them in the
definitions of the get/put functions, that's even better.

>  struct drm_i915_gem_object {
>  	struct drm_gem_object base;
>  
> +	const struct drm_i915_gem_object_ops *ops;
> +
>  	/** Current space allocated to this object in the GTT, if any. */
>  	struct drm_mm_node *gtt_space;
>  	struct list_head gtt_list;
> @@ -1305,7 +1312,8 @@ int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
>  			struct drm_file *file_priv);
>  void i915_gem_load(struct drm_device *dev);
>  int i915_gem_init_object(struct drm_gem_object *obj);
> -void i915_gem_object_init(struct drm_i915_gem_object *obj);
> +void i915_gem_object_init(struct drm_i915_gem_object *obj,
> +			 const struct drm_i915_gem_object_ops *ops);
>  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>  						  size_t size);
>  void i915_gem_free_object(struct drm_gem_object *obj);
> @@ -1318,7 +1326,7 @@ int __must_check i915_gem_object_unbind(struct drm_i915_gem_object *obj);
>  void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
>  void i915_gem_lastclose(struct drm_device *dev);
>  
> -int __must_check i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj);
> +int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
>  int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
>  int i915_gem_object_sync(struct drm_i915_gem_object *obj,
>  			 struct intel_ring_buffer *to);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 87a64e5..66fbd9f 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1650,18 +1650,12 @@ i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
>  	return obj->madv == I915_MADV_DONTNEED;
>  }
>  
> -static int
> +static void
>  i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
>  {
>  	int page_count = obj->base.size / PAGE_SIZE;
>  	int ret, i;
>  
> -	BUG_ON(obj->gtt_space);
> -
> -	if (obj->pages == NULL)
> -		return 0;
> -
> -	BUG_ON(obj->gtt_space);
>  	BUG_ON(obj->madv == __I915_MADV_PURGED);
>  
>  	ret = i915_gem_object_set_to_cpu_domain(obj, true);
> @@ -1693,9 +1687,21 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
>  
>  	drm_free_large(obj->pages);
>  	obj->pages = NULL;
> +}
>  
> -	list_del(&obj->gtt_list);
> +static int
> +i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
> +{
> +	const struct drm_i915_gem_object_ops *ops = obj->ops;
> +
> +	if (obj->sg_table || obj->pages == NULL)
> +		return 0;
> +
> +	BUG_ON(obj->gtt_space);
>  
> +	ops->put_pages(obj);
> +
> +	list_del(&obj->gtt_list);
>  	if (i915_gem_object_is_purgeable(obj))
>  		i915_gem_object_truncate(obj);
>  
> @@ -1712,7 +1718,7 @@ i915_gem_purge(struct drm_i915_private *dev_priv, long target)
>  				 &dev_priv->mm.unbound_list,
>  				 gtt_list) {
>  		if (i915_gem_object_is_purgeable(obj) &&
> -		    i915_gem_object_put_pages_gtt(obj) == 0) {
> +		    i915_gem_object_put_pages(obj) == 0) {
>  			count += obj->base.size >> PAGE_SHIFT;
>  			if (count >= target)
>  				return count;
> @@ -1724,7 +1730,7 @@ i915_gem_purge(struct drm_i915_private *dev_priv, long target)
>  				 mm_list) {
>  		if (i915_gem_object_is_purgeable(obj) &&
>  		    i915_gem_object_unbind(obj) == 0 &&
> -		    i915_gem_object_put_pages_gtt(obj) == 0) {
> +		    i915_gem_object_put_pages(obj) == 0) {
>  			count += obj->base.size >> PAGE_SHIFT;
>  			if (count >= target)
>  				return count;
> @@ -1742,10 +1748,10 @@ i915_gem_shrink_all(struct drm_i915_private *dev_priv)
>  	i915_gem_evict_everything(dev_priv->dev);
>  
>  	list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, gtt_list)
> -		i915_gem_object_put_pages_gtt(obj);
> +		i915_gem_object_put_pages(obj);
>  }
>  
> -int
> +static int
>  i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
>  {
>  	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> @@ -1754,9 +1760,6 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
>  	struct page *page;
>  	gfp_t gfp;
>  
> -	if (obj->pages || obj->sg_table)
> -		return 0;
> -
>  	/* Assert that the object is not currently in any GPU domain. As it
>  	 * wasn't in the GTT, there shouldn't be any way it could have been in
>  	 * a GPU cache
> @@ -1806,7 +1809,6 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
>  	if (i915_gem_object_needs_bit17_swizzle(obj))
>  		i915_gem_object_do_bit_17_swizzle(obj);
>  
> -	list_add_tail(&obj->gtt_list, &dev_priv->mm.unbound_list);
>  	return 0;
>  
>  err_pages:
> @@ -1818,6 +1820,24 @@ err_pages:
>  	return PTR_ERR(page);
>  }
>  
> +int
> +i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
> +{
> +	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> +	const struct drm_i915_gem_object_ops *ops = obj->ops;
> +	int ret;
> +

	BUG_ON(!obj->gtt_space)?

> +	if (obj->sg_table || obj->pages)
> +		return 0;
> +
> +	ret = ops->get_pages(obj);
> +	if (ret)
> +		return ret;
> +
> +	list_add_tail(&obj->gtt_list, &dev_priv->mm.unbound_list);
> +	return 0;
> +}
> +
>  void
>  i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
>  			       struct intel_ring_buffer *ring,
> @@ -2071,7 +2091,6 @@ void i915_gem_reset(struct drm_device *dev)
>  		obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
>  	}
>  
> -
>  	/* The fence registers are invalidated so clear them out */
>  	i915_gem_reset_fences(dev);
>  }
> @@ -2871,7 +2890,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
>  		return -E2BIG;
>  	}
>  
> -	ret = i915_gem_object_get_pages_gtt(obj);
> +	ret = i915_gem_object_get_pages(obj);
>  	if (ret)
>  		return ret;
>  
> @@ -3610,15 +3629,16 @@ unlock:
>  	return ret;
>  }
>  
> -void i915_gem_object_init(struct drm_i915_gem_object *obj)
> +void i915_gem_object_init(struct drm_i915_gem_object *obj,
> +			  const struct drm_i915_gem_object_ops *ops)
>  {
> -	obj->base.driver_private = NULL;
> -
>  	INIT_LIST_HEAD(&obj->mm_list);
>  	INIT_LIST_HEAD(&obj->gtt_list);
>  	INIT_LIST_HEAD(&obj->ring_list);
>  	INIT_LIST_HEAD(&obj->exec_list);
>  
> +	obj->ops = ops;
> +
>  	obj->fence_reg = I915_FENCE_REG_NONE;
>  	obj->madv = I915_MADV_WILLNEED;
>  	/* Avoid an unnecessary call to unbind on the first bind. */
> @@ -3627,6 +3647,11 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj)
>  	i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
>  }
>  
> +static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
> +	.get_pages = i915_gem_object_get_pages_gtt,
> +	.put_pages = i915_gem_object_put_pages_gtt,
> +};
> +
>  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>  						  size_t size)
>  {
> @@ -3653,7 +3678,7 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>  	mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
>  	mapping_set_gfp_mask(mapping, mask);
>  
> -	i915_gem_object_init(obj);
> +	i915_gem_object_init(obj, &i915_gem_object_ops);
>  
>  	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
>  	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
> @@ -3711,7 +3736,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  		dev_priv->mm.interruptible = was_interruptible;
>  	}
>  
> -	i915_gem_object_put_pages_gtt(obj);
> +	i915_gem_object_put_pages(obj);
>  	i915_gem_object_free_mmap_offset(obj);
>  
>  	drm_gem_object_release(&obj->base);
> diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> index 43c9530..e4f1141 100644
> --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> @@ -41,7 +41,7 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme
>  	if (ret)
>  		return ERR_PTR(ret);
>  
> -	ret = i915_gem_object_get_pages_gtt(obj);
> +	ret = i915_gem_object_get_pages(obj);
>  	if (ret) {
>  		sg = ERR_PTR(ret);
>  		goto out;
> @@ -89,7 +89,7 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
>  		goto out_unlock;
>  	}
>  
> -	ret = i915_gem_object_get_pages_gtt(obj);
> +	ret = i915_gem_object_get_pages(obj);
>  	if (ret) {
>  		mutex_unlock(&dev->struct_mutex);
>  		return ERR_PTR(ret);


-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 02/24] drm/i915: Pin backing pages whilst exporting through a dmabuf vmap
  2012-09-04 20:02 ` [PATCH 02/24] drm/i915: Pin backing pages whilst exporting through a dmabuf vmap Chris Wilson
@ 2012-09-06 22:55   ` Ben Widawsky
  2012-10-11 18:30   ` Jesse Barnes
  1 sibling, 0 replies; 55+ messages in thread
From: Ben Widawsky @ 2012-09-06 22:55 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue,  4 Sep 2012 21:02:54 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> We need to refcount our pages in order to prevent reaping them at
> inopportune times, such as when they currently vmapped or exported to
> another driver. However, we also wish to keep the lazy deallocation of
> our pages so we need to take a pin/unpinned approach rather than a
> simple refcount.

I've not followed the dmabuf development much but is there no interface
to map partial objects, ie. have some pages of an object pinned, but not
all?

> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

With comment below addressed or not it's:
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>

> ---
>  drivers/gpu/drm/i915/i915_drv.h        |   12 ++++++++++++
>  drivers/gpu/drm/i915/i915_gem.c        |   11 +++++++++--
>  drivers/gpu/drm/i915/i915_gem_dmabuf.c |    8 ++++++--
>  3 files changed, 27 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index f180874..0747472 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -994,6 +994,7 @@ struct drm_i915_gem_object {
>  	unsigned int has_global_gtt_mapping:1;
>  
>  	struct page **pages;
> +	int pages_pin_count;
>  
>  	/**
>  	 * DMAR support
> @@ -1327,6 +1328,17 @@ void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
>  void i915_gem_lastclose(struct drm_device *dev);
>  
>  int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
> +static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
> +{
> +	BUG_ON(obj->pages == NULL);
> +	obj->pages_pin_count++;
> +}
> +static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
> +{
> +	BUG_ON(obj->pages_pin_count == 0);
> +	obj->pages_pin_count--;
> +}
> +

Big fan of BUG_ON(!mutex_is_locked()) here.

>  int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
>  int i915_gem_object_sync(struct drm_i915_gem_object *obj,
>  			 struct intel_ring_buffer *to);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 66fbd9f..aa088ef 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1699,6 +1699,9 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
>  
>  	BUG_ON(obj->gtt_space);
>  
> +	if (obj->pages_pin_count)
> +		return -EBUSY;
> +
>  	ops->put_pages(obj);
>  
>  	list_del(&obj->gtt_list);
> @@ -1830,6 +1833,8 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
>  	if (obj->sg_table || obj->pages)
>  		return 0;
>  
> +	BUG_ON(obj->pages_pin_count);
> +
>  	ret = ops->get_pages(obj);
>  	if (ret)
>  		return ret;
> @@ -3736,6 +3741,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  		dev_priv->mm.interruptible = was_interruptible;
>  	}
>  
> +	obj->pages_pin_count = 0;
>  	i915_gem_object_put_pages(obj);
>  	i915_gem_object_free_mmap_offset(obj);
>  
> @@ -4395,9 +4401,10 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
>  
>  	cnt = 0;
>  	list_for_each_entry(obj, &dev_priv->mm.unbound_list, gtt_list)
> -		cnt += obj->base.size >> PAGE_SHIFT;
> +		if (obj->pages_pin_count == 0)
> +			cnt += obj->base.size >> PAGE_SHIFT;
>  	list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list)
> -		if (obj->pin_count == 0)
> +		if (obj->pin_count == 0 && obj->pages_pin_count == 0)
>  			cnt += obj->base.size >> PAGE_SHIFT;
>  
>  	mutex_unlock(&dev->struct_mutex);
> diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> index e4f1141..eca4726 100644
> --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> @@ -50,6 +50,8 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme
>  	/* link the pages into an SG then map the sg */
>  	sg = drm_prime_pages_to_sg(obj->pages, npages);
>  	nents = dma_map_sg(attachment->dev, sg->sgl, sg->nents, dir);
> +	i915_gem_object_pin_pages(obj);
> +
>  out:
>  	mutex_unlock(&dev->struct_mutex);
>  	return sg;
> @@ -102,6 +104,7 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
>  	}
>  
>  	obj->vmapping_count = 1;
> +	i915_gem_object_pin_pages(obj);
>  out_unlock:
>  	mutex_unlock(&dev->struct_mutex);
>  	return obj->dma_buf_vmapping;
> @@ -117,10 +120,11 @@ static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr)
>  	if (ret)
>  		return;
>  
> -	--obj->vmapping_count;
> -	if (obj->vmapping_count == 0) {
> +	if (--obj->vmapping_count == 0) {
>  		vunmap(obj->dma_buf_vmapping);
>  		obj->dma_buf_vmapping = NULL;
> +
> +		i915_gem_object_unpin_pages(obj);
>  	}
>  	mutex_unlock(&dev->struct_mutex);
>  }



-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 03/24] drm/i915: Pin backing pages for pwrite
  2012-09-04 20:02 ` [PATCH 03/24] drm/i915: Pin backing pages for pwrite Chris Wilson
@ 2012-09-07  0:07   ` Ben Widawsky
  2012-09-12 13:13     ` Daniel Vetter
  2012-10-11 18:31   ` Jesse Barnes
  1 sibling, 1 reply; 55+ messages in thread
From: Ben Widawsky @ 2012-09-07  0:07 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue,  4 Sep 2012 21:02:55 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> By using the recently introduced pinning of pages, we can safely drop
> the mutex in the knowledge that the pages are not going to disappear
> beneath us, and so we can simplify the code for iterating over the pages.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem.c |   37 +++++++++++++------------------------
>  1 file changed, 13 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index aa088ef..8a4eac0 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -690,7 +690,7 @@ shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
>  				       page_length);
>  	kunmap_atomic(vaddr);
>  
> -	return ret;
> +	return ret ? -EFAULT : 0;
>  }
>  
>  /* Only difference to the fast-path function is that this can handle bit17
> @@ -724,7 +724,7 @@ shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
>  					     page_do_bit17_swizzling);
>  	kunmap(page);
>  
> -	return ret;
> +	return ret ? -EFAULT : 0;
>  }
>  
>  static int
> @@ -733,7 +733,6 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
>  		      struct drm_i915_gem_pwrite *args,
>  		      struct drm_file *file)
>  {
> -	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
>  	ssize_t remain;
>  	loff_t offset;
>  	char __user *user_data;

Without digging to deep to see if you looked already. It would be nice
if we can get a DRM_INFO or something for cases where return isn't
actually EFAULT.

> @@ -742,7 +741,6 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
>  	int hit_slowpath = 0;
>  	int needs_clflush_after = 0;
>  	int needs_clflush_before = 0;
> -	int release_page;
>  
>  	user_data = (char __user *) (uintptr_t) args->data_ptr;
>  	remain = args->size;
> @@ -768,6 +766,12 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
>  	    && obj->cache_level == I915_CACHE_NONE)
>  		needs_clflush_before = 1;
>  
> +	ret = i915_gem_object_get_pages(obj);
> +	if (ret)
> +		return ret;
> +
> +	i915_gem_object_pin_pages(obj);
> +
>  	offset = args->offset;
>  	obj->dirty = 1;
>  
> @@ -793,18 +797,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
>  			((shmem_page_offset | page_length)
>  				& (boot_cpu_data.x86_clflush_size - 1));
>  
> -		if (obj->pages) {
> -			page = obj->pages[offset >> PAGE_SHIFT];
> -			release_page = 0;
> -		} else {
> -			page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
> -			if (IS_ERR(page)) {
> -				ret = PTR_ERR(page);
> -				goto out;
> -			}
> -			release_page = 1;
> -		}
> -
> +		page = obj->pages[offset >> PAGE_SHIFT];
>  		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
>  			(page_to_phys(page) & (1 << 17)) != 0;
>  

So the obvious question is what about the page caching? Can you add to
the commit message for my edification why previously the shmem page is
released from the page cache and now it isn't?

> @@ -816,26 +809,20 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
>  			goto next_page;
>  
>  		hit_slowpath = 1;
> -		page_cache_get(page);
>  		mutex_unlock(&dev->struct_mutex);
> -
>  		ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
>  					user_data, page_do_bit17_swizzling,
>  					partial_cacheline_write,
>  					needs_clflush_after);
>  
>  		mutex_lock(&dev->struct_mutex);
> -		page_cache_release(page);
> +
>  next_page:
>  		set_page_dirty(page);
>  		mark_page_accessed(page);
> -		if (release_page)
> -			page_cache_release(page);
>  
> -		if (ret) {
> -			ret = -EFAULT;
> +		if (ret)
>  			goto out;
> -		}
>  
>  		remain -= page_length;
>  		user_data += page_length;
> @@ -843,6 +830,8 @@ next_page:
>  	}
>  
>  out:
> +	i915_gem_object_unpin_pages(obj);
> +
>  	if (hit_slowpath) {
>  		/* Fixup: Kill any reinstated backing storage pages */
>  		if (obj->madv == __I915_MADV_PURGED)



-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 04/24] drm/i915: Pin backing pages for pread
  2012-09-04 20:02 ` [PATCH 04/24] drm/i915: Pin backing pages for pread Chris Wilson
@ 2012-09-07  0:10   ` Ben Widawsky
  0 siblings, 0 replies; 55+ messages in thread
From: Ben Widawsky @ 2012-09-07  0:10 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue,  4 Sep 2012 21:02:56 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> By using the recently introduced pinning of pages, we can safely drop
> the mutex in the knowledge that the pages are not going to disappear
> beneath us, and so we can simplify the code for iterating over the pages.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
If you fix patch3 to explain, it explain here too. Write is always
scarier for me anyway. With fixed patch 3, it's
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 05/24] drm/i915: Replace the array of pages with a scatterlist
  2012-09-04 20:02 ` [PATCH 05/24] drm/i915: Replace the array of pages with a scatterlist Chris Wilson
@ 2012-09-07  1:49   ` Ben Widawsky
  2012-09-10 16:34     ` Chris Wilson
  0 siblings, 1 reply; 55+ messages in thread
From: Ben Widawsky @ 2012-09-07  1:49 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue,  4 Sep 2012 21:02:57 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> Rather than have multiple data structures for describing our page layout
> in conjunction with the array of pages, we can migrate all users over to
> a scatterlist.
> 
> One major advantage, other than unifying the page tracking structures,
> this offers is that we replace the vmalloc'ed array (which can be up to
> a megabyte in size) with a chain of individual pages which helps reduce
> memory pressure.
> 
> The disadvantage is that we then do not have a simple array to iterate,
> or to access randomly. The common case for this is in the relocation
> processing, which will typically fit within a single scatterlist page
> and so be almost the same cost as the simple array. For iterating over
> the array, the extra function call could be optimised away, but in
> reality is an insignificant cost of either binding the pages, or
> performing the pwrite/pread.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>


Now that my eyes are done bleeding, easy ones:

ERROR: space required after that ',' (ctx:VxV)
#69: FILE: drivers/char/agp/intel-gtt.c:99:
+	for_each_sg(st->sgl, sg, num_entries,i)
 	                                    ^

WARNING: Prefer pr_err(... to printk(KERN_ERR, ...
#189: FILE: drivers/gpu/drm/drm_cache.c:117:
+		printk(KERN_ERR "Timed out waiting for cache
flush.\n");

WARNING: Prefer pr_err(... to printk(KERN_ERR, ...
#191: FILE: drivers/gpu/drm/drm_cache.c:119:
+	printk(KERN_ERR "Architecture has no drm_cache.c support\n");

In addition to the inline comments, it would have been even slightly
easier to review without the s/page/i since it seems to just be for no
compelling reason anyway.



> ---
>  drivers/char/agp/intel-gtt.c               |   51 +++++-------
>  drivers/gpu/drm/drm_cache.c                |   23 ++++++
>  drivers/gpu/drm/i915/i915_drv.h            |   18 +++--
>  drivers/gpu/drm/i915/i915_gem.c            |   79 ++++++++++++------
>  drivers/gpu/drm/i915/i915_gem_dmabuf.c     |   99 +++++++++++++++--------
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |    3 +-
>  drivers/gpu/drm/i915/i915_gem_gtt.c        |  121 ++++++----------------------
>  drivers/gpu/drm/i915/i915_gem_tiling.c     |   16 ++--
>  drivers/gpu/drm/i915/i915_irq.c            |   25 +++---
>  drivers/gpu/drm/i915/intel_ringbuffer.c    |    9 ++-
>  include/drm/drmP.h                         |    1 +
>  include/drm/intel-gtt.h                    |   10 +--
>  12 files changed, 236 insertions(+), 219 deletions(-)
> 
> diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
> index 58e32f7..511d1b1 100644
> --- a/drivers/char/agp/intel-gtt.c
> +++ b/drivers/char/agp/intel-gtt.c
> @@ -84,40 +84,33 @@ static struct _intel_private {
>  #define IS_IRONLAKE	intel_private.driver->is_ironlake
>  #define HAS_PGTBL_EN	intel_private.driver->has_pgtbl_enable
>  
> -int intel_gtt_map_memory(struct page **pages, unsigned int num_entries,
> -			 struct scatterlist **sg_list, int *num_sg)
> +static int intel_gtt_map_memory(struct page **pages,
> +				unsigned int num_entries,
> +				struct sg_table *st)
>  {
> -	struct sg_table st;
>  	struct scatterlist *sg;
>  	int i;
>  
> -	if (*sg_list)
> -		return 0; /* already mapped (for e.g. resume */
> -
>  	DBG("try mapping %lu pages\n", (unsigned long)num_entries);
>  
> -	if (sg_alloc_table(&st, num_entries, GFP_KERNEL))
> +	if (sg_alloc_table(st, num_entries, GFP_KERNEL))
>  		goto err;
>  
> -	*sg_list = sg = st.sgl;
> -
> -	for (i = 0 ; i < num_entries; i++, sg = sg_next(sg))
> +	for_each_sg(st->sgl, sg, num_entries,i)
>  		sg_set_page(sg, pages[i], PAGE_SIZE, 0);
>  
> -	*num_sg = pci_map_sg(intel_private.pcidev, *sg_list,
> -				 num_entries, PCI_DMA_BIDIRECTIONAL);
> -	if (unlikely(!*num_sg))
> +	if (!pci_map_sg(intel_private.pcidev,
> +			st->sgl, st->nents, PCI_DMA_BIDIRECTIONAL))
>  		goto err;
>  
>  	return 0;
>  
>  err:
> -	sg_free_table(&st);
> +	sg_free_table(st);
>  	return -ENOMEM;
>  }
> -EXPORT_SYMBOL(intel_gtt_map_memory);
>  
> -void intel_gtt_unmap_memory(struct scatterlist *sg_list, int num_sg)
> +static void intel_gtt_unmap_memory(struct scatterlist *sg_list, int num_sg)
>  {
>  	struct sg_table st;
>  	DBG("try unmapping %lu pages\n", (unsigned long)mem->page_count);
> @@ -130,7 +123,6 @@ void intel_gtt_unmap_memory(struct scatterlist *sg_list, int num_sg)
>  
>  	sg_free_table(&st);
>  }
> -EXPORT_SYMBOL(intel_gtt_unmap_memory);
>  
>  static void intel_fake_agp_enable(struct agp_bridge_data *bridge, u32 mode)
>  {
> @@ -879,8 +871,7 @@ static bool i830_check_flags(unsigned int flags)
>  	return false;
>  }
>  
> -void intel_gtt_insert_sg_entries(struct scatterlist *sg_list,
> -				 unsigned int sg_len,
> +void intel_gtt_insert_sg_entries(struct sg_table *st,
>  				 unsigned int pg_start,
>  				 unsigned int flags)
>  {
> @@ -892,12 +883,11 @@ void intel_gtt_insert_sg_entries(struct scatterlist *sg_list,
>  
>  	/* sg may merge pages, but we have to separate
>  	 * per-page addr for GTT */
> -	for_each_sg(sg_list, sg, sg_len, i) {
> +	for_each_sg(st->sgl, sg, st->nents, i) {
>  		len = sg_dma_len(sg) >> PAGE_SHIFT;
>  		for (m = 0; m < len; m++) {
>  			dma_addr_t addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
> -			intel_private.driver->write_entry(addr,
> -							  j, flags);
> +			intel_private.driver->write_entry(addr, j, flags);
>  			j++;
>  		}
>  	}
> @@ -905,8 +895,10 @@ void intel_gtt_insert_sg_entries(struct scatterlist *sg_list,
>  }
>  EXPORT_SYMBOL(intel_gtt_insert_sg_entries);
>  
> -void intel_gtt_insert_pages(unsigned int first_entry, unsigned int num_entries,
> -			    struct page **pages, unsigned int flags)
> +static void intel_gtt_insert_pages(unsigned int first_entry,
> +				   unsigned int num_entries,
> +				   struct page **pages,
> +				   unsigned int flags)
>  {
>  	int i, j;
>  
> @@ -917,7 +909,6 @@ void intel_gtt_insert_pages(unsigned int first_entry, unsigned int num_entries,
>  	}
>  	readl(intel_private.gtt+j-1);
>  }
> -EXPORT_SYMBOL(intel_gtt_insert_pages);
>  
>  static int intel_fake_agp_insert_entries(struct agp_memory *mem,
>  					 off_t pg_start, int type)
> @@ -953,13 +944,15 @@ static int intel_fake_agp_insert_entries(struct agp_memory *mem,
>  		global_cache_flush();
>  
>  	if (intel_private.base.needs_dmar) {
> -		ret = intel_gtt_map_memory(mem->pages, mem->page_count,
> -					   &mem->sg_list, &mem->num_sg);
> +		struct sg_table st;
> +
> +		ret = intel_gtt_map_memory(mem->pages, mem->page_count, &st);
>  		if (ret != 0)
>  			return ret;
>  
> -		intel_gtt_insert_sg_entries(mem->sg_list, mem->num_sg,
> -					    pg_start, type);
> +		intel_gtt_insert_sg_entries(&st, pg_start, type);
> +		mem->sg_list = st.sgl;
> +		mem->num_sg = st.nents;

Can you explain how the corresponding free for the sg_table gets called
here?

>  	} else
>  		intel_gtt_insert_pages(pg_start, mem->page_count, mem->pages,
>  				       type);
> diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
> index 08758e0..628a2e0 100644
> --- a/drivers/gpu/drm/drm_cache.c
> +++ b/drivers/gpu/drm/drm_cache.c
> @@ -100,6 +100,29 @@ drm_clflush_pages(struct page *pages[], unsigned long num_pages)
>  EXPORT_SYMBOL(drm_clflush_pages);
>  
>  void
> +drm_clflush_sg(struct sg_table *st)
> +{
> +#if defined(CONFIG_X86)
> +	if (cpu_has_clflush) {
> +		struct scatterlist *sg;
> +		int i;
> +
> +		mb();
> +		for_each_sg(st->sgl, sg, st->nents, i)
> +			drm_clflush_page(sg_page(sg));
> +		mb();
> +	}
> +
> +	if (on_each_cpu(drm_clflush_ipi_handler, NULL, 1) != 0)
> +		printk(KERN_ERR "Timed out waiting for cache flush.\n");
> +#else
> +	printk(KERN_ERR "Architecture has no drm_cache.c support\n");
> +	WARN_ON_ONCE(1);
> +#endif
> +}
> +EXPORT_SYMBOL(drm_clflush_sg);
> +
> +void
>  drm_clflush_virt_range(char *addr, unsigned long length)
>  {
>  #if defined(CONFIG_X86)
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 0747472..1a714fa 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -992,16 +992,11 @@ struct drm_i915_gem_object {
>  
>  	unsigned int has_aliasing_ppgtt_mapping:1;
>  	unsigned int has_global_gtt_mapping:1;
> +	unsigned int has_dma_mapping:1;
>  
> -	struct page **pages;
> +	struct sg_table *pages;
>  	int pages_pin_count;
>  
> -	/**
> -	 * DMAR support
> -	 */
> -	struct scatterlist *sg_list;
> -	int num_sg;
> -
>  	/* prime dma-buf support */
>  	struct sg_table *sg_table;
>  	void *dma_buf_vmapping;
> @@ -1328,6 +1323,15 @@ void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
>  void i915_gem_lastclose(struct drm_device *dev);
>  
>  int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
> +static inline struct page *i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n)
> +{
> +	struct scatterlist *sg = obj->pages->sgl;
> +	while (n >= SG_MAX_SINGLE_ALLOC) {
> +		sg = sg_chain_ptr(sg + SG_MAX_SINGLE_ALLOC - 1);
> +		n -= SG_MAX_SINGLE_ALLOC - 1;
> +	}
> +	return sg_page(sg+n);
> +}
>  static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
>  {
>  	BUG_ON(obj->pages == NULL);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 171bc51..06589a9 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -411,6 +411,8 @@ i915_gem_shmem_pread(struct drm_device *dev,
>  	int hit_slowpath = 0;
>  	int prefaulted = 0;
>  	int needs_clflush = 0;
> +	struct scatterlist *sg;
> +	int i;
>  
>  	user_data = (char __user *) (uintptr_t) args->data_ptr;
>  	remain = args->size;
> @@ -439,9 +441,15 @@ i915_gem_shmem_pread(struct drm_device *dev,
>  
>  	offset = args->offset;
>  
> -	while (remain > 0) {
> +	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
>  		struct page *page;
>  
> +		if (i < offset >> PAGE_SHIFT)
> +			continue;
> +
> +		if (remain <= 0)
> +			break;
> +
>  		/* Operation in this page
>  		 *
>  		 * shmem_page_offset = offset within page in shmem file
> @@ -452,7 +460,7 @@ i915_gem_shmem_pread(struct drm_device *dev,
>  		if ((shmem_page_offset + page_length) > PAGE_SIZE)
>  			page_length = PAGE_SIZE - shmem_page_offset;
>  
> -		page = obj->pages[offset >> PAGE_SHIFT];
> +		page = sg_page(sg);
>  		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
>  			(page_to_phys(page) & (1 << 17)) != 0;
>  
> @@ -731,6 +739,8 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
>  	int hit_slowpath = 0;
>  	int needs_clflush_after = 0;
>  	int needs_clflush_before = 0;
> +	int i;
> +	struct scatterlist *sg;
>  
>  	user_data = (char __user *) (uintptr_t) args->data_ptr;
>  	remain = args->size;
> @@ -765,10 +775,16 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
>  	offset = args->offset;
>  	obj->dirty = 1;
>  
> -	while (remain > 0) {
> +	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
>  		struct page *page;
>  		int partial_cacheline_write;
>  
> +		if (i < offset >> PAGE_SHIFT)
> +			continue;
> +
> +		if (remain <= 0)
> +			break;
> +
>  		/* Operation in this page
>  		 *
>  		 * shmem_page_offset = offset within page in shmem file
> @@ -787,7 +803,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
>  			((shmem_page_offset | page_length)
>  				& (boot_cpu_data.x86_clflush_size - 1));
>  
> -		page = obj->pages[offset >> PAGE_SHIFT];
> +		page = sg_page(sg);
>  		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
>  			(page_to_phys(page) & (1 << 17)) != 0;
>  
> @@ -1633,6 +1649,7 @@ static void
>  i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
>  {
>  	int page_count = obj->base.size / PAGE_SIZE;
> +	struct scatterlist *sg;
>  	int ret, i;
>  
>  	BUG_ON(obj->madv == __I915_MADV_PURGED);
> @@ -1653,19 +1670,21 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
>  	if (obj->madv == I915_MADV_DONTNEED)
>  		obj->dirty = 0;
>  
> -	for (i = 0; i < page_count; i++) {
> +	for_each_sg(obj->pages->sgl, sg, page_count, i) {
> +		struct page *page = sg_page(sg);
> +
>  		if (obj->dirty)
> -			set_page_dirty(obj->pages[i]);
> +			set_page_dirty(page);
>  
>  		if (obj->madv == I915_MADV_WILLNEED)
> -			mark_page_accessed(obj->pages[i]);
> +			mark_page_accessed(page);
>  
> -		page_cache_release(obj->pages[i]);
> +		page_cache_release(page);
>  	}
>  	obj->dirty = 0;
>  
> -	drm_free_large(obj->pages);
> -	obj->pages = NULL;
> +	sg_free_table(obj->pages);
> +	kfree(obj->pages);
>  }
>  
>  static int
> @@ -1682,6 +1701,7 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
>  		return -EBUSY;
>  
>  	ops->put_pages(obj);
> +	obj->pages = NULL;
>  
>  	list_del(&obj->gtt_list);
>  	if (i915_gem_object_is_purgeable(obj))
> @@ -1739,6 +1759,8 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
>  	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
>  	int page_count, i;
>  	struct address_space *mapping;
> +	struct sg_table *st;
> +	struct scatterlist *sg;
>  	struct page *page;
>  	gfp_t gfp;
>  
> @@ -1749,20 +1771,27 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
>  	BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
>  	BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
>  
> -	/* Get the list of pages out of our struct file.  They'll be pinned
> -	 * at this point until we release them.
> -	 */
> +	st = kmalloc(sizeof(*st), GFP_KERNEL);
> +	if (st == NULL)
> +		return -ENOMEM;
> +
>  	page_count = obj->base.size / PAGE_SIZE;
> -	obj->pages = drm_malloc_ab(page_count, sizeof(struct page *));
> -	if (obj->pages == NULL)
> +	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
> +		sg_free_table(st);
> +		kfree(st);
>  		return -ENOMEM;
> +	}

I think the call here to sg_free_table is bogus.

>  
> -	/* Fail silently without starting the shrinker */
> +	/* Get the list of pages out of our struct file.  They'll be pinned
> +	 * at this point until we release them.
> +	 *
> +	 * Fail silently without starting the shrinker
> +	 */
>  	mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
>  	gfp = mapping_gfp_mask(mapping);
>  	gfp |= __GFP_NORETRY | __GFP_NOWARN;
>  	gfp &= ~(__GFP_IO | __GFP_WAIT);
> -	for (i = 0; i < page_count; i++) {
> +	for_each_sg(st->sgl, sg, page_count, i) {
>  		page = shmem_read_mapping_page_gfp(mapping, i, gfp);
>  		if (IS_ERR(page)) {
>  			i915_gem_purge(dev_priv, page_count);
> @@ -1785,20 +1814,20 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
>  			gfp &= ~(__GFP_IO | __GFP_WAIT);
>  		}
>  
> -		obj->pages[i] = page;
> +		sg_set_page(sg, page, PAGE_SIZE, 0);
>  	}
>  
>  	if (i915_gem_object_needs_bit17_swizzle(obj))
>  		i915_gem_object_do_bit_17_swizzle(obj);
>  
> +	obj->pages = st;
>  	return 0;
>  
>  err_pages:
> -	while (i--)
> -		page_cache_release(obj->pages[i]);
> -
> -	drm_free_large(obj->pages);
> -	obj->pages = NULL;
> +	for_each_sg(st->sgl, sg, i, page_count)
> +		page_cache_release(sg_page(sg));
> +	sg_free_table(st);
> +	kfree(st);
>  	return PTR_ERR(page);
>  }
>  
> @@ -2974,7 +3003,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj)
>  
>  	trace_i915_gem_object_clflush(obj);
>  
> -	drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE);
> +	drm_clflush_sg(obj->pages);
>  }
>  
>  /** Flushes the GTT write domain for the object if it's dirty. */
> @@ -3724,6 +3753,8 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  	i915_gem_object_put_pages(obj);
>  	i915_gem_object_free_mmap_offset(obj);
>  
> +	BUG_ON(obj->pages);
> +
>  	drm_gem_object_release(&obj->base);
>  	i915_gem_info_remove_obj(dev_priv, obj->base.size);
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> index eca4726..4bb1b94 100644
> --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> @@ -28,33 +28,57 @@
>  #include <linux/dma-buf.h>
>  
>  static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachment,
> -				      enum dma_data_direction dir)
> +					     enum dma_data_direction dir)
>  {
>  	struct drm_i915_gem_object *obj = attachment->dmabuf->priv;
> -	struct drm_device *dev = obj->base.dev;
> -	int npages = obj->base.size / PAGE_SIZE;
> -	struct sg_table *sg;
> -	int ret;
> -	int nents;
> +	struct sg_table *st;
> +	struct scatterlist *src, *dst;
> +	int ret, i;
>  
> -	ret = i915_mutex_lock_interruptible(dev);
> +	ret = i915_mutex_lock_interruptible(obj->base.dev);
>  	if (ret)
>  		return ERR_PTR(ret);
>  
>  	ret = i915_gem_object_get_pages(obj);
>  	if (ret) {
> -		sg = ERR_PTR(ret);
> +		st = ERR_PTR(ret);
> +		goto out;
> +	}
> +
> +	/* Copy sg so that we make an independent mapping */
> +	st = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
> +	if (st == NULL) {
> +		st = ERR_PTR(-ENOMEM);
> +		goto out;
> +	}
> +
> +	ret = sg_alloc_table(st, obj->pages->nents, GFP_KERNEL);
> +	if (ret) {
> +		kfree(st);
> +		st = ERR_PTR(ret);
> +		goto out;
> +	}
> +
> +	src = obj->pages->sgl;
> +	dst = st->sgl;
> +	for (i = 0; i < obj->pages->nents; i++) {
> +		sg_set_page(dst, sg_page(src), PAGE_SIZE, 0);
> +		dst = sg_next(dst);
> +		src = sg_next(src);
> +	}
> +
> +	if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) {
> +		sg_free_table(st);
> +		kfree(st);
> +		st = ERR_PTR(-ENOMEM);
>  		goto out;
>  	}
>  
> -	/* link the pages into an SG then map the sg */
> -	sg = drm_prime_pages_to_sg(obj->pages, npages);
> -	nents = dma_map_sg(attachment->dev, sg->sgl, sg->nents, dir);
>  	i915_gem_object_pin_pages(obj);

<bikeshed>
I think the right way to go about this is to add rm_prime_pages_to_st
since you're pushing the whole st>sg thing, other drivers can leverage
it.
</bikeshed>

The lifetime description we discussed on IRC would have helped here as
well.

>  
>  out:
> -	mutex_unlock(&dev->struct_mutex);
> -	return sg;
> +	mutex_unlock(&obj->base.dev->struct_mutex);
> +	return st;
>  }
>  
>  static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment,
> @@ -80,7 +104,9 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
>  {
>  	struct drm_i915_gem_object *obj = dma_buf->priv;
>  	struct drm_device *dev = obj->base.dev;
> -	int ret;
> +	struct scatterlist *sg;
> +	struct page **pages;
> +	int ret, i;
>  
>  	ret = i915_mutex_lock_interruptible(dev);
>  	if (ret)
> @@ -92,22 +118,33 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
>  	}
>  
>  	ret = i915_gem_object_get_pages(obj);
> -	if (ret) {
> -		mutex_unlock(&dev->struct_mutex);
> -		return ERR_PTR(ret);
> -	}
> +	if (ret)
> +		goto error;
>  
> -	obj->dma_buf_vmapping = vmap(obj->pages, obj->base.size / PAGE_SIZE, 0, PAGE_KERNEL);
> -	if (!obj->dma_buf_vmapping) {
> -		DRM_ERROR("failed to vmap object\n");
> -		goto out_unlock;
> -	}
> +	ret = -ENOMEM;
> +
> +	pages = drm_malloc_ab(obj->pages->nents, sizeof(struct page *));
> +	if (pages == NULL)
> +		goto error;
> +
> +	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i)
> +		pages[i] = sg_page(sg);
> +
> +	obj->dma_buf_vmapping = vmap(pages, obj->pages->nents, 0, PAGE_KERNEL);
> +	drm_free_large(pages);
> +
> +	if (!obj->dma_buf_vmapping)
> +		goto error;
>  
>  	obj->vmapping_count = 1;
>  	i915_gem_object_pin_pages(obj);
>  out_unlock:
>  	mutex_unlock(&dev->struct_mutex);
>  	return obj->dma_buf_vmapping;
> +
> +error:
> +	mutex_unlock(&dev->struct_mutex);
> +	return ERR_PTR(ret);
>  }
>  
>  static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr)

The return on vmap failing looks incorrect to me here. Also, I think
leaving the DRM_ERROR would have been nice.

> @@ -184,22 +221,19 @@ static const struct dma_buf_ops i915_dmabuf_ops =  {
>  };
>  
>  struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
> -				struct drm_gem_object *gem_obj, int flags)
> +				      struct drm_gem_object *gem_obj, int flags)
>  {
>  	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
>  
> -	return dma_buf_export(obj, &i915_dmabuf_ops,
> -						  obj->base.size, 0600);
> +	return dma_buf_export(obj, &i915_dmabuf_ops, obj->base.size, 0600);
>  }
>  
>  struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
> -				struct dma_buf *dma_buf)
> +					     struct dma_buf *dma_buf)
>  {
>  	struct dma_buf_attachment *attach;
>  	struct sg_table *sg;
>  	struct drm_i915_gem_object *obj;
> -	int npages;
> -	int size;
>  	int ret;
>  
>  	/* is this one of own objects? */
> @@ -223,21 +257,19 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
>  		goto fail_detach;
>  	}
>  
> -	size = dma_buf->size;
> -	npages = size / PAGE_SIZE;
> -
>  	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
>  	if (obj == NULL) {
>  		ret = -ENOMEM;
>  		goto fail_unmap;
>  	}
>  
> -	ret = drm_gem_private_object_init(dev, &obj->base, size);
> +	ret = drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
>  	if (ret) {
>  		kfree(obj);
>  		goto fail_unmap;
>  	}
>  
> +	obj->has_dma_mapping = true;
>  	obj->sg_table = sg;
>  	obj->base.import_attach = attach;
>  
> @@ -249,3 +281,4 @@ fail_detach:
>  	dma_buf_detach(dma_buf, attach);
>  	return ERR_PTR(ret);
>  }
> +
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index e6b2205..4ab0083 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -210,7 +210,8 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
>  		if (ret)
>  			return ret;
>  
> -		vaddr = kmap_atomic(obj->pages[reloc->offset >> PAGE_SHIFT]);
> +		vaddr = kmap_atomic(i915_gem_object_get_page(obj,
> +							     reloc->offset >> PAGE_SHIFT));
>  		*(uint32_t *)(vaddr + page_offset) = reloc->delta;
>  		kunmap_atomic(vaddr);
>  	} else {
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 1847731..6746109 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -167,8 +167,7 @@ void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
>  }
>  
>  static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt,
> -					 struct scatterlist *sg_list,
> -					 unsigned sg_len,
> +					 const struct sg_table *pages,
>  					 unsigned first_entry,
>  					 uint32_t pte_flags)
>  {
> @@ -180,12 +179,12 @@ static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt,
>  	struct scatterlist *sg;
>  
>  	/* init sg walking */
> -	sg = sg_list;
> +	sg = pages->sgl;
>  	i = 0;
>  	segment_len = sg_dma_len(sg) >> PAGE_SHIFT;
>  	m = 0;
>  
> -	while (i < sg_len) {
> +	while (i < pages->nents) {
>  		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]);
>  
>  		for (j = first_pte; j < I915_PPGTT_PT_ENTRIES; j++) {
> @@ -194,13 +193,11 @@ static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt,
>  			pt_vaddr[j] = pte | pte_flags;
>  
>  			/* grab the next page */
> -			m++;
> -			if (m == segment_len) {
> -				sg = sg_next(sg);
> -				i++;
> -				if (i == sg_len)
> +			if (++m == segment_len) {
> +				if (++i == pages->nents)
>  					break;
>  
> +				sg = sg_next(sg);
>  				segment_len = sg_dma_len(sg) >> PAGE_SHIFT;
>  				m = 0;
>  			}
> @@ -213,44 +210,10 @@ static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt,
>  	}
>  }
>  
> -static void i915_ppgtt_insert_pages(struct i915_hw_ppgtt *ppgtt,
> -				    unsigned first_entry, unsigned num_entries,
> -				    struct page **pages, uint32_t pte_flags)
> -{
> -	uint32_t *pt_vaddr, pte;
> -	unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
> -	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
> -	unsigned last_pte, i;
> -	dma_addr_t page_addr;
> -
> -	while (num_entries) {
> -		last_pte = first_pte + num_entries;
> -		last_pte = min_t(unsigned, last_pte, I915_PPGTT_PT_ENTRIES);
> -
> -		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]);
> -
> -		for (i = first_pte; i < last_pte; i++) {
> -			page_addr = page_to_phys(*pages);
> -			pte = GEN6_PTE_ADDR_ENCODE(page_addr);
> -			pt_vaddr[i] = pte | pte_flags;
> -
> -			pages++;
> -		}
> -
> -		kunmap_atomic(pt_vaddr);
> -
> -		num_entries -= last_pte - first_pte;
> -		first_pte = 0;
> -		act_pd++;
> -	}
> -}
> -
>  void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
>  			    struct drm_i915_gem_object *obj,
>  			    enum i915_cache_level cache_level)
>  {
> -	struct drm_device *dev = obj->base.dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
>  	uint32_t pte_flags = GEN6_PTE_VALID;
>  
>  	switch (cache_level) {

Methinks this isn't what you wanted to do.

> @@ -270,26 +233,10 @@ void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
>  		BUG();
>  	}
>  
> -	if (obj->sg_table) {
> -		i915_ppgtt_insert_sg_entries(ppgtt,
> -					     obj->sg_table->sgl,
> -					     obj->sg_table->nents,
> -					     obj->gtt_space->start >> PAGE_SHIFT,
> -					     pte_flags);
> -	} else if (dev_priv->mm.gtt->needs_dmar) {
> -		BUG_ON(!obj->sg_list);
> -
> -		i915_ppgtt_insert_sg_entries(ppgtt,
> -					     obj->sg_list,
> -					     obj->num_sg,
> -					     obj->gtt_space->start >> PAGE_SHIFT,
> -					     pte_flags);
> -	} else
> -		i915_ppgtt_insert_pages(ppgtt,
> -					obj->gtt_space->start >> PAGE_SHIFT,
> -					obj->base.size >> PAGE_SHIFT,
> -					obj->pages,
> -					pte_flags);
> +	i915_ppgtt_insert_sg_entries(ppgtt,
> +				     obj->sg_table ?: obj->pages,
> +				     obj->gtt_space->start >> PAGE_SHIFT,
> +				     pte_flags);
>  }

I got lost here. Is it, if there is a prime sg_table use that, otherwise
just use the object's sgt? If so, I think has_dma_mapping is more
readable.
Also, I wonder if ?: pissed off the clang people?

>  
>  void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
> @@ -361,44 +308,26 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
>  
>  int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
>  {
> -	struct drm_device *dev = obj->base.dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -
> -	/* don't map imported dma buf objects */
> -	if (dev_priv->mm.gtt->needs_dmar && !obj->sg_table)
> -		return intel_gtt_map_memory(obj->pages,
> -					    obj->base.size >> PAGE_SHIFT,
> -					    &obj->sg_list,
> -					    &obj->num_sg);
> -	else
> +	if (obj->has_dma_mapping)
>  		return 0;
> +
> +	if (!dma_map_sg(&obj->base.dev->pdev->dev,
> +			obj->pages->sgl, obj->pages->nents,
> +			PCI_DMA_BIDIRECTIONAL))
> +		return -ENOSPC;
> +
> +	return 0;
>  }
>  
>  void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
>  			      enum i915_cache_level cache_level)
>  {
>  	struct drm_device *dev = obj->base.dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
>  	unsigned int agp_type = cache_level_to_agp_type(dev, cache_level);
>  
> -	if (obj->sg_table) {
> -		intel_gtt_insert_sg_entries(obj->sg_table->sgl,
> -					    obj->sg_table->nents,
> -					    obj->gtt_space->start >> PAGE_SHIFT,
> -					    agp_type);
> -	} else if (dev_priv->mm.gtt->needs_dmar) {
> -		BUG_ON(!obj->sg_list);
> -
> -		intel_gtt_insert_sg_entries(obj->sg_list,
> -					    obj->num_sg,
> -					    obj->gtt_space->start >> PAGE_SHIFT,
> -					    agp_type);
> -	} else
> -		intel_gtt_insert_pages(obj->gtt_space->start >> PAGE_SHIFT,
> -				       obj->base.size >> PAGE_SHIFT,
> -				       obj->pages,
> -				       agp_type);
> -
> +	intel_gtt_insert_sg_entries(obj->sg_table ?: obj->pages,
> +				    obj->gtt_space->start >> PAGE_SHIFT,
> +				    agp_type);
>  	obj->has_global_gtt_mapping = 1;
>  }
>  
> @@ -418,10 +347,10 @@ void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
>  
>  	interruptible = do_idling(dev_priv);
>  
> -	if (obj->sg_list) {
> -		intel_gtt_unmap_memory(obj->sg_list, obj->num_sg);
> -		obj->sg_list = NULL;
> -	}
> +	if (!obj->has_dma_mapping)
> +		dma_unmap_sg(&dev->pdev->dev,
> +			     obj->pages->sgl, obj->pages->nents,
> +			     PCI_DMA_BIDIRECTIONAL);
>  
>  	undo_idling(dev_priv, interruptible);
>  }
> diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
> index b964df5..8093ecd 100644
> --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> @@ -470,18 +470,20 @@ i915_gem_swizzle_page(struct page *page)
>  void
>  i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
>  {
> +	struct scatterlist *sg;
>  	int page_count = obj->base.size >> PAGE_SHIFT;
>  	int i;
>  
>  	if (obj->bit_17 == NULL)
>  		return;
>  
> -	for (i = 0; i < page_count; i++) {
> -		char new_bit_17 = page_to_phys(obj->pages[i]) >> 17;
> +	for_each_sg(obj->pages->sgl, sg, page_count, i) {
> +		struct page *page = sg_page(sg);
> +		char new_bit_17 = page_to_phys(page) >> 17;
>  		if ((new_bit_17 & 0x1) !=
>  		    (test_bit(i, obj->bit_17) != 0)) {
> -			i915_gem_swizzle_page(obj->pages[i]);
> -			set_page_dirty(obj->pages[i]);
> +			i915_gem_swizzle_page(page);
> +			set_page_dirty(page);
>  		}
>  	}
>  }
> @@ -489,6 +491,7 @@ i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
>  void
>  i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
>  {
> +	struct scatterlist *sg;
>  	int page_count = obj->base.size >> PAGE_SHIFT;
>  	int i;
>  
> @@ -502,8 +505,9 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
>  		}
>  	}
>  
> -	for (i = 0; i < page_count; i++) {
> -		if (page_to_phys(obj->pages[i]) & (1 << 17))
> +	for_each_sg(obj->pages->sgl, sg, page_count, i) {
> +		struct page *page = sg_page(sg);
> +		if (page_to_phys(page) & (1 << 17))
>  			__set_bit(i, obj->bit_17);
>  		else
>  			__clear_bit(i, obj->bit_17);
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index d601013..dd49046 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -888,20 +888,20 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
>  			 struct drm_i915_gem_object *src)
>  {
>  	struct drm_i915_error_object *dst;
> -	int page, page_count;
> +	int i, count;
>  	u32 reloc_offset;
>  
>  	if (src == NULL || src->pages == NULL)
>  		return NULL;
>  
> -	page_count = src->base.size / PAGE_SIZE;
> +	count = src->base.size / PAGE_SIZE;
>  
> -	dst = kmalloc(sizeof(*dst) + page_count * sizeof(u32 *), GFP_ATOMIC);
> +	dst = kmalloc(sizeof(*dst) + count * sizeof(u32 *), GFP_ATOMIC);
>  	if (dst == NULL)
>  		return NULL;
>  
>  	reloc_offset = src->gtt_offset;
> -	for (page = 0; page < page_count; page++) {
> +	for (i = 0; i < count; i++) {
>  		unsigned long flags;
>  		void *d;
>  
> @@ -924,30 +924,33 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
>  			memcpy_fromio(d, s, PAGE_SIZE);
>  			io_mapping_unmap_atomic(s);
>  		} else {
> +			struct page *page;
>  			void *s;
>  
> -			drm_clflush_pages(&src->pages[page], 1);
> +			page = i915_gem_object_get_page(src, i);
> +
> +			drm_clflush_pages(&page, 1);
>  
> -			s = kmap_atomic(src->pages[page]);
> +			s = kmap_atomic(page);
>  			memcpy(d, s, PAGE_SIZE);
>  			kunmap_atomic(s);
>  
> -			drm_clflush_pages(&src->pages[page], 1);
> +			drm_clflush_pages(&page, 1);
>  		}
>  		local_irq_restore(flags);
>  
> -		dst->pages[page] = d;
> +		dst->pages[i] = d;
>  
>  		reloc_offset += PAGE_SIZE;
>  	}
> -	dst->page_count = page_count;
> +	dst->page_count = count;
>  	dst->gtt_offset = src->gtt_offset;
>  
>  	return dst;
>  
>  unwind:
> -	while (page--)
> -		kfree(dst->pages[page]);
> +	while (i--)
> +		kfree(dst->pages[i]);
>  	kfree(dst);
>  	return NULL;
>  }
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 55cdb4d..984a0c5 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -464,7 +464,7 @@ init_pipe_control(struct intel_ring_buffer *ring)
>  		goto err_unref;
>  
>  	pc->gtt_offset = obj->gtt_offset;
> -	pc->cpu_page =  kmap(obj->pages[0]);
> +	pc->cpu_page =  kmap(sg_page(obj->pages->sgl));
>  	if (pc->cpu_page == NULL)
>  		goto err_unpin;
>  
> @@ -491,7 +491,8 @@ cleanup_pipe_control(struct intel_ring_buffer *ring)
>  		return;
>  
>  	obj = pc->obj;
> -	kunmap(obj->pages[0]);
> +
> +	kunmap(sg_page(obj->pages->sgl));
>  	i915_gem_object_unpin(obj);
>  	drm_gem_object_unreference(&obj->base);
>  
> @@ -1026,7 +1027,7 @@ static void cleanup_status_page(struct intel_ring_buffer *ring)
>  	if (obj == NULL)
>  		return;
>  
> -	kunmap(obj->pages[0]);
> +	kunmap(sg_page(obj->pages->sgl));
>  	i915_gem_object_unpin(obj);
>  	drm_gem_object_unreference(&obj->base);
>  	ring->status_page.obj = NULL;
> @@ -1053,7 +1054,7 @@ static int init_status_page(struct intel_ring_buffer *ring)
>  	}
>  
>  	ring->status_page.gfx_addr = obj->gtt_offset;
> -	ring->status_page.page_addr = kmap(obj->pages[0]);
> +	ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
>  	if (ring->status_page.page_addr == NULL) {
>  		ret = -ENOMEM;
>  		goto err_unpin;
> diff --git a/include/drm/drmP.h b/include/drm/drmP.h
> index d6b67bb..d5f0c16 100644
> --- a/include/drm/drmP.h
> +++ b/include/drm/drmP.h
> @@ -1367,6 +1367,7 @@ extern int drm_remove_magic(struct drm_master *master, drm_magic_t magic);
>  
>  /* Cache management (drm_cache.c) */
>  void drm_clflush_pages(struct page *pages[], unsigned long num_pages);
> +void drm_clflush_sg(struct sg_table *st);
>  void drm_clflush_virt_range(char *addr, unsigned long length);
>  
>  				/* Locking IOCTL support (drm_lock.h) */
> diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h
> index 8e29d55..2e37e9f 100644
> --- a/include/drm/intel-gtt.h
> +++ b/include/drm/intel-gtt.h
> @@ -30,16 +30,10 @@ void intel_gmch_remove(void);
>  bool intel_enable_gtt(void);
>  
>  void intel_gtt_chipset_flush(void);
> -void intel_gtt_unmap_memory(struct scatterlist *sg_list, int num_sg);
> -void intel_gtt_clear_range(unsigned int first_entry, unsigned int num_entries);
> -int intel_gtt_map_memory(struct page **pages, unsigned int num_entries,
> -			 struct scatterlist **sg_list, int *num_sg);
> -void intel_gtt_insert_sg_entries(struct scatterlist *sg_list,
> -				 unsigned int sg_len,
> +void intel_gtt_insert_sg_entries(struct sg_table *st,
>  				 unsigned int pg_start,
>  				 unsigned int flags);
> -void intel_gtt_insert_pages(unsigned int first_entry, unsigned int num_entries,
> -			    struct page **pages, unsigned int flags);
> +void intel_gtt_clear_range(unsigned int first_entry, unsigned int num_entries);
>  
>  /* Special gtt memory types */
>  #define AGP_DCACHE_MEMORY	1

-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 05/24] drm/i915: Replace the array of pages with a scatterlist
  2012-09-07  1:49   ` Ben Widawsky
@ 2012-09-10 16:34     ` Chris Wilson
  2012-09-12 13:33       ` Daniel Vetter
  0 siblings, 1 reply; 55+ messages in thread
From: Chris Wilson @ 2012-09-10 16:34 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: intel-gfx

On Thu, 6 Sep 2012 18:49:24 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> On Tue,  4 Sep 2012 21:02:57 +0100
> Chris Wilson <chris@chris-wilson.co.uk> wrote:
> 
> > Rather than have multiple data structures for describing our page layout
> > in conjunction with the array of pages, we can migrate all users over to
> > a scatterlist.
> > 
> > One major advantage, other than unifying the page tracking structures,
> > this offers is that we replace the vmalloc'ed array (which can be up to
> > a megabyte in size) with a chain of individual pages which helps reduce
> > memory pressure.
> > 
> > The disadvantage is that we then do not have a simple array to iterate,
> > or to access randomly. The common case for this is in the relocation
> > processing, which will typically fit within a single scatterlist page
> > and so be almost the same cost as the simple array. For iterating over
> > the array, the extra function call could be optimised away, but in
> > reality is an insignificant cost of either binding the pages, or
> > performing the pwrite/pread.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> 
> Now that my eyes are done bleeding, easy ones:
> 
> ERROR: space required after that ',' (ctx:VxV)
> #69: FILE: drivers/char/agp/intel-gtt.c:99:
> +	for_each_sg(st->sgl, sg, num_entries,i)
>  	                                    ^
> 
> WARNING: Prefer pr_err(... to printk(KERN_ERR, ...
> #189: FILE: drivers/gpu/drm/drm_cache.c:117:
> +		printk(KERN_ERR "Timed out waiting for cache
> flush.\n");
> 
> WARNING: Prefer pr_err(... to printk(KERN_ERR, ...
> #191: FILE: drivers/gpu/drm/drm_cache.c:119:
> +	printk(KERN_ERR "Architecture has no drm_cache.c support\n");

Hmm, the drm_cache one is tricky as it is a continuation of the style of
the file and so is probably best kept and then the whole file fixed to
follow the new conventions.

> In addition to the inline comments, it would have been even slightly
> easier to review without the s/page/i since it seems to just be for no
> compelling reason anyway.

It was motivated by using the common idiom for for_each_sg() and by
allowing 'struct page *page' as being the natural local variable within
the loop. So I think the end result justifies the small amount of extra
churn in the patch.

> >  	if (intel_private.base.needs_dmar) {
> > -		ret = intel_gtt_map_memory(mem->pages, mem->page_count,
> > -					   &mem->sg_list, &mem->num_sg);
> > +		struct sg_table st;
> > +
> > +		ret = intel_gtt_map_memory(mem->pages, mem->page_count, &st);
> >  		if (ret != 0)
> >  			return ret;
> >  
> > -		intel_gtt_insert_sg_entries(mem->sg_list, mem->num_sg,
> > -					    pg_start, type);
> > +		intel_gtt_insert_sg_entries(&st, pg_start, type);
> > +		mem->sg_list = st.sgl;
> > +		mem->num_sg = st.nents;
> 
> Can you explain how the corresponding free for the sg_table gets called
> here?

The sg_table is just a small placeholder that is reconstructed in
intel_gtt_unmap_memory() for sg_free_table().

> > @@ -1749,20 +1771,27 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
> >  	BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
> >  	BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
> >  
> > -	/* Get the list of pages out of our struct file.  They'll be pinned
> > -	 * at this point until we release them.
> > -	 */
> > +	st = kmalloc(sizeof(*st), GFP_KERNEL);
> > +	if (st == NULL)
> > +		return -ENOMEM;
> > +
> >  	page_count = obj->base.size / PAGE_SIZE;
> > -	obj->pages = drm_malloc_ab(page_count, sizeof(struct page *));
> > -	if (obj->pages == NULL)
> > +	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
> > +		sg_free_table(st);
> > +		kfree(st);
> >  		return -ENOMEM;
> > +	}
> 
> I think the call here to sg_free_table is bogus.

Experience says otherwise ;-)

The reason is that the sg_alloc_table chains together its individual
page allocations but doesn't perform any unwind if one fails before
reporting the error. sg_free_table() does the right thing in those
circumstances.

> > -	/* link the pages into an SG then map the sg */
> > -	sg = drm_prime_pages_to_sg(obj->pages, npages);
> > -	nents = dma_map_sg(attachment->dev, sg->sgl, sg->nents, dir);
> >  	i915_gem_object_pin_pages(obj);
> 
> <bikeshed>
> I think the right way to go about this is to add rm_prime_pages_to_st
> since you're pushing the whole st>sg thing, other drivers can leverage
> it.
> </bikeshed>

Quite possibly true, but the code will change later and lose some of its
generality. Or at least no else is like i915 yet.
 
> The lifetime description we discussed on IRC would have helped here as
> well.

> >  static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment,
> > @@ -80,7 +104,9 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
> >  {
> >  	struct drm_i915_gem_object *obj = dma_buf->priv;
> >  	struct drm_device *dev = obj->base.dev;
> > -	int ret;
> > +	struct scatterlist *sg;
> > +	struct page **pages;
> > +	int ret, i;
> >  
> >  	ret = i915_mutex_lock_interruptible(dev);
> >  	if (ret)
> > @@ -92,22 +118,33 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
> >  	}
> >  
> >  	ret = i915_gem_object_get_pages(obj);
> > -	if (ret) {
> > -		mutex_unlock(&dev->struct_mutex);
> > -		return ERR_PTR(ret);
> > -	}
> > +	if (ret)
> > +		goto error;
> >  
> > -	obj->dma_buf_vmapping = vmap(obj->pages, obj->base.size / PAGE_SIZE, 0, PAGE_KERNEL);
> > -	if (!obj->dma_buf_vmapping) {
> > -		DRM_ERROR("failed to vmap object\n");
> > -		goto out_unlock;
> > -	}
> > +	ret = -ENOMEM;
> > +
> > +	pages = drm_malloc_ab(obj->pages->nents, sizeof(struct page *));
> > +	if (pages == NULL)
> > +		goto error;
> > +
> > +	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i)
> > +		pages[i] = sg_page(sg);
> > +
> > +	obj->dma_buf_vmapping = vmap(pages, obj->pages->nents, 0, PAGE_KERNEL);
> > +	drm_free_large(pages);
> > +
> > +	if (!obj->dma_buf_vmapping)
> > +		goto error;
> >  
> >  	obj->vmapping_count = 1;
> >  	i915_gem_object_pin_pages(obj);
> >  out_unlock:
> >  	mutex_unlock(&dev->struct_mutex);
> >  	return obj->dma_buf_vmapping;
> > +
> > +error:
> > +	mutex_unlock(&dev->struct_mutex);
> > +	return ERR_PTR(ret);
> >  }
> >  
> >  static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr)
> 
> The return on vmap failing looks incorrect to me here. Also, I think
> leaving the DRM_ERROR would have been nice.

Since we already return the ERR_PTR(-ENOMEM) we are not breaking any
semantics by reporting the oom for vmap as well. And yes it would be
nice if vmap gave a specific error as well. So other than the change to
an explicit errno, I'm not sure what mistake you are point out.

In this case the DRM_ERROR has an obvious errno returned to userspace,
much more informative.
> 
> > @@ -270,26 +233,10 @@ void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
> >  		BUG();
> >  	}
> >  
> > -	if (obj->sg_table) {
> > -		i915_ppgtt_insert_sg_entries(ppgtt,
> > -					     obj->sg_table->sgl,
> > -					     obj->sg_table->nents,
> > -					     obj->gtt_space->start >> PAGE_SHIFT,
> > -					     pte_flags);
> > -	} else if (dev_priv->mm.gtt->needs_dmar) {
> > -		BUG_ON(!obj->sg_list);
> > -
> > -		i915_ppgtt_insert_sg_entries(ppgtt,
> > -					     obj->sg_list,
> > -					     obj->num_sg,
> > -					     obj->gtt_space->start >> PAGE_SHIFT,
> > -					     pte_flags);
> > -	} else
> > -		i915_ppgtt_insert_pages(ppgtt,
> > -					obj->gtt_space->start >> PAGE_SHIFT,
> > -					obj->base.size >> PAGE_SHIFT,
> > -					obj->pages,
> > -					pte_flags);
> > +	i915_ppgtt_insert_sg_entries(ppgtt,
> > +				     obj->sg_table ?: obj->pages,
> > +				     obj->gtt_space->start >> PAGE_SHIFT,
> > +				     pte_flags);
> >  }
> 
> I got lost here. Is it, if there is a prime sg_table use that, otherwise
> just use the object's sgt? If so, I think has_dma_mapping is more
> readable.
> Also, I wonder if ?: pissed off the clang people?

Right, this is just a step along the path to enlightment. 2 out of the 3
paths now use obj->pages with the dmabuf being the only exception to
still create an obj->sg_table scatterlist. '?:' is widely used by the
kernel, if clang doesn't yet support it, that's their problem. But rest
assured it is removed in a couple of patches after migrating dmabuf over
to the page ops.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 03/24] drm/i915: Pin backing pages for pwrite
  2012-09-07  0:07   ` Ben Widawsky
@ 2012-09-12 13:13     ` Daniel Vetter
  2012-09-12 13:20       ` Daniel Vetter
  0 siblings, 1 reply; 55+ messages in thread
From: Daniel Vetter @ 2012-09-12 13:13 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: intel-gfx

On Thu, Sep 06, 2012 at 05:07:58PM -0700, Ben Widawsky wrote:
> On Tue,  4 Sep 2012 21:02:55 +0100
> Chris Wilson <chris@chris-wilson.co.uk> wrote:
> 
> > By using the recently introduced pinning of pages, we can safely drop
> > the mutex in the knowledge that the pages are not going to disappear
> > beneath us, and so we can simplify the code for iterating over the pages.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_gem.c |   37 +++++++++++++------------------------
> >  1 file changed, 13 insertions(+), 24 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index aa088ef..8a4eac0 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -690,7 +690,7 @@ shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
> >  				       page_length);
> >  	kunmap_atomic(vaddr);
> >  
> > -	return ret;
> > +	return ret ? -EFAULT : 0;
> >  }
> >  
> >  /* Only difference to the fast-path function is that this can handle bit17
> > @@ -724,7 +724,7 @@ shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
> >  					     page_do_bit17_swizzling);
> >  	kunmap(page);
> >  
> > -	return ret;
> > +	return ret ? -EFAULT : 0;
> >  }
> >  
> >  static int
> > @@ -733,7 +733,6 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
> >  		      struct drm_i915_gem_pwrite *args,
> >  		      struct drm_file *file)
> >  {
> > -	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
> >  	ssize_t remain;
> >  	loff_t offset;
> >  	char __user *user_data;
> 
> Without digging to deep to see if you looked already. It would be nice
> if we can get a DRM_INFO or something for cases where return isn't
> actually EFAULT.

If I understand your question correctly, the answer is that ret is never
-EFAULT; the copy functions return the amount of uncopied data in bytes.
This simply aligns the revalue with our usualy -errno stuff. Since these
two functions are not pure wrappers around the copy helpers, I agree that
-errno is a better fit for the return semantics.

> 
> > @@ -742,7 +741,6 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
> >  	int hit_slowpath = 0;
> >  	int needs_clflush_after = 0;
> >  	int needs_clflush_before = 0;
> > -	int release_page;
> >  
> >  	user_data = (char __user *) (uintptr_t) args->data_ptr;
> >  	remain = args->size;
> > @@ -768,6 +766,12 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
> >  	    && obj->cache_level == I915_CACHE_NONE)
> >  		needs_clflush_before = 1;
> >  
> > +	ret = i915_gem_object_get_pages(obj);
> > +	if (ret)
> > +		return ret;
> > +
> > +	i915_gem_object_pin_pages(obj);
> > +
> >  	offset = args->offset;
> >  	obj->dirty = 1;
> >  
> > @@ -793,18 +797,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
> >  			((shmem_page_offset | page_length)
> >  				& (boot_cpu_data.x86_clflush_size - 1));
> >  
> > -		if (obj->pages) {
> > -			page = obj->pages[offset >> PAGE_SHIFT];
> > -			release_page = 0;
> > -		} else {
> > -			page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
> > -			if (IS_ERR(page)) {
> > -				ret = PTR_ERR(page);
> > -				goto out;
> > -			}
> > -			release_page = 1;
> > -		}
> > -
> > +		page = obj->pages[offset >> PAGE_SHIFT];
> >  		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
> >  			(page_to_phys(page) & (1 << 17)) != 0;
> >  
> 
> So the obvious question is what about the page caching? Can you add to
> the commit message for my edification why previously the shmem page is
> released from the page cache and now it isn't?

The really old code simply held onto dev->struct_mutex to guarantee that
the pages (in obj->pages) won't disappear. My pwrite/pread rework drops
the lock in the slowpath (to avoid deadlocking with our own pagefault
handler), so I needed to manually grab a reference to the page to avoid it
disappearing (and then also drop that ref again).

Chris' new code uses the new pages_pin stuff to ensure that the backing
storage doesn't vanish, so we can reap this complexity.
-Daniel

> 
> > @@ -816,26 +809,20 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
> >  			goto next_page;
> >  
> >  		hit_slowpath = 1;
> > -		page_cache_get(page);
> >  		mutex_unlock(&dev->struct_mutex);
> > -
> >  		ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
> >  					user_data, page_do_bit17_swizzling,
> >  					partial_cacheline_write,
> >  					needs_clflush_after);
> >  
> >  		mutex_lock(&dev->struct_mutex);
> > -		page_cache_release(page);
> > +
> >  next_page:
> >  		set_page_dirty(page);
> >  		mark_page_accessed(page);
> > -		if (release_page)
> > -			page_cache_release(page);
> >  
> > -		if (ret) {
> > -			ret = -EFAULT;
> > +		if (ret)
> >  			goto out;
> > -		}
> >  
> >  		remain -= page_length;
> >  		user_data += page_length;
> > @@ -843,6 +830,8 @@ next_page:
> >  	}
> >  
> >  out:
> > +	i915_gem_object_unpin_pages(obj);
> > +
> >  	if (hit_slowpath) {
> >  		/* Fixup: Kill any reinstated backing storage pages */
> >  		if (obj->madv == __I915_MADV_PURGED)
> 
> 
> 
> -- 
> Ben Widawsky, Intel Open Source Technology Center
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 03/24] drm/i915: Pin backing pages for pwrite
  2012-09-12 13:13     ` Daniel Vetter
@ 2012-09-12 13:20       ` Daniel Vetter
  0 siblings, 0 replies; 55+ messages in thread
From: Daniel Vetter @ 2012-09-12 13:20 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: intel-gfx

On Wed, Sep 12, 2012 at 03:13:27PM +0200, Daniel Vetter wrote:
> On Thu, Sep 06, 2012 at 05:07:58PM -0700, Ben Widawsky wrote:
> > On Tue,  4 Sep 2012 21:02:55 +0100
> > > @@ -742,7 +741,6 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
> > >  	int hit_slowpath = 0;
> > >  	int needs_clflush_after = 0;
> > >  	int needs_clflush_before = 0;
> > > -	int release_page;
> > >  
> > >  	user_data = (char __user *) (uintptr_t) args->data_ptr;
> > >  	remain = args->size;
> > > @@ -768,6 +766,12 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
> > >  	    && obj->cache_level == I915_CACHE_NONE)
> > >  		needs_clflush_before = 1;
> > >  
> > > +	ret = i915_gem_object_get_pages(obj);
> > > +	if (ret)
> > > +		return ret;
> > > +
> > > +	i915_gem_object_pin_pages(obj);
> > > +
> > >  	offset = args->offset;
> > >  	obj->dirty = 1;
> > >  
> > > @@ -793,18 +797,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
> > >  			((shmem_page_offset | page_length)
> > >  				& (boot_cpu_data.x86_clflush_size - 1));
> > >  
> > > -		if (obj->pages) {
> > > -			page = obj->pages[offset >> PAGE_SHIFT];
> > > -			release_page = 0;
> > > -		} else {
> > > -			page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
> > > -			if (IS_ERR(page)) {
> > > -				ret = PTR_ERR(page);
> > > -				goto out;
> > > -			}
> > > -			release_page = 1;
> > > -		}
> > > -
> > > +		page = obj->pages[offset >> PAGE_SHIFT];
> > >  		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
> > >  			(page_to_phys(page) & (1 << 17)) != 0;
> > >  
> > 
> > So the obvious question is what about the page caching? Can you add to
> > the commit message for my edification why previously the shmem page is
> > released from the page cache and now it isn't?
> 
> The really old code simply held onto dev->struct_mutex to guarantee that
> the pages (in obj->pages) won't disappear. My pwrite/pread rework drops
> the lock in the slowpath (to avoid deadlocking with our own pagefault
> handler), so I needed to manually grab a reference to the page to avoid it
> disappearing (and then also drop that ref again).
> 
> Chris' new code uses the new pages_pin stuff to ensure that the backing
> storage doesn't vanish, so we can reap this complexity.

I guess I've misunderstood your question: The current code either uses the
obj->pages page array or grabs the page from the backing storage. The
later gives you a page with a reference. But since the obj->pages array
can disapppear when we drop dev->struct_mutex, we need to manually hold a
reference. Since it's a slow-path I didn't bother between whether we've
got the page from obj->pages (where grabbing a ref while dropping the lock
is required) and from shmem_read_mapping_page (where we already hold a
ref) and simply grabbed an additional ref unconditionally.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 05/24] drm/i915: Replace the array of pages with a scatterlist
  2012-09-10 16:34     ` Chris Wilson
@ 2012-09-12 13:33       ` Daniel Vetter
  0 siblings, 0 replies; 55+ messages in thread
From: Daniel Vetter @ 2012-09-12 13:33 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Ben Widawsky, intel-gfx

On Mon, Sep 10, 2012 at 05:34:48PM +0100, Chris Wilson wrote:
> On Thu, 6 Sep 2012 18:49:24 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> > On Tue,  4 Sep 2012 21:02:57 +0100
> > Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > 
> > > Rather than have multiple data structures for describing our page layout
> > > in conjunction with the array of pages, we can migrate all users over to
> > > a scatterlist.
> > > 
> > > One major advantage, other than unifying the page tracking structures,
> > > this offers is that we replace the vmalloc'ed array (which can be up to
> > > a megabyte in size) with a chain of individual pages which helps reduce
> > > memory pressure.
> > > 
> > > The disadvantage is that we then do not have a simple array to iterate,
> > > or to access randomly. The common case for this is in the relocation
> > > processing, which will typically fit within a single scatterlist page
> > > and so be almost the same cost as the simple array. For iterating over
> > > the array, the extra function call could be optimised away, but in
> > > reality is an insignificant cost of either binding the pages, or
> > > performing the pwrite/pread.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Ok, I agree with Chris' comments here and slurped in patches 1-5.

Thanks, Daniel

> > 
> > 
> > Now that my eyes are done bleeding, easy ones:
> > 
> > ERROR: space required after that ',' (ctx:VxV)
> > #69: FILE: drivers/char/agp/intel-gtt.c:99:
> > +	for_each_sg(st->sgl, sg, num_entries,i)
> >  	                                    ^
> > 
> > WARNING: Prefer pr_err(... to printk(KERN_ERR, ...
> > #189: FILE: drivers/gpu/drm/drm_cache.c:117:
> > +		printk(KERN_ERR "Timed out waiting for cache
> > flush.\n");
> > 
> > WARNING: Prefer pr_err(... to printk(KERN_ERR, ...
> > #191: FILE: drivers/gpu/drm/drm_cache.c:119:
> > +	printk(KERN_ERR "Architecture has no drm_cache.c support\n");
> 
> Hmm, the drm_cache one is tricky as it is a continuation of the style of
> the file and so is probably best kept and then the whole file fixed to
> follow the new conventions.
> 
> > In addition to the inline comments, it would have been even slightly
> > easier to review without the s/page/i since it seems to just be for no
> > compelling reason anyway.
> 
> It was motivated by using the common idiom for for_each_sg() and by
> allowing 'struct page *page' as being the natural local variable within
> the loop. So I think the end result justifies the small amount of extra
> churn in the patch.
> 
> > >  	if (intel_private.base.needs_dmar) {
> > > -		ret = intel_gtt_map_memory(mem->pages, mem->page_count,
> > > -					   &mem->sg_list, &mem->num_sg);
> > > +		struct sg_table st;
> > > +
> > > +		ret = intel_gtt_map_memory(mem->pages, mem->page_count, &st);
> > >  		if (ret != 0)
> > >  			return ret;
> > >  
> > > -		intel_gtt_insert_sg_entries(mem->sg_list, mem->num_sg,
> > > -					    pg_start, type);
> > > +		intel_gtt_insert_sg_entries(&st, pg_start, type);
> > > +		mem->sg_list = st.sgl;
> > > +		mem->num_sg = st.nents;
> > 
> > Can you explain how the corresponding free for the sg_table gets called
> > here?
> 
> The sg_table is just a small placeholder that is reconstructed in
> intel_gtt_unmap_memory() for sg_free_table().
> 
> > > @@ -1749,20 +1771,27 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
> > >  	BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
> > >  	BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
> > >  
> > > -	/* Get the list of pages out of our struct file.  They'll be pinned
> > > -	 * at this point until we release them.
> > > -	 */
> > > +	st = kmalloc(sizeof(*st), GFP_KERNEL);
> > > +	if (st == NULL)
> > > +		return -ENOMEM;
> > > +
> > >  	page_count = obj->base.size / PAGE_SIZE;
> > > -	obj->pages = drm_malloc_ab(page_count, sizeof(struct page *));
> > > -	if (obj->pages == NULL)
> > > +	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
> > > +		sg_free_table(st);
> > > +		kfree(st);
> > >  		return -ENOMEM;
> > > +	}
> > 
> > I think the call here to sg_free_table is bogus.
> 
> Experience says otherwise ;-)
> 
> The reason is that the sg_alloc_table chains together its individual
> page allocations but doesn't perform any unwind if one fails before
> reporting the error. sg_free_table() does the right thing in those
> circumstances.
> 
> > > -	/* link the pages into an SG then map the sg */
> > > -	sg = drm_prime_pages_to_sg(obj->pages, npages);
> > > -	nents = dma_map_sg(attachment->dev, sg->sgl, sg->nents, dir);
> > >  	i915_gem_object_pin_pages(obj);
> > 
> > <bikeshed>
> > I think the right way to go about this is to add rm_prime_pages_to_st
> > since you're pushing the whole st>sg thing, other drivers can leverage
> > it.
> > </bikeshed>
> 
> Quite possibly true, but the code will change later and lose some of its
> generality. Or at least no else is like i915 yet.
>  
> > The lifetime description we discussed on IRC would have helped here as
> > well.
> 
> > >  static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment,
> > > @@ -80,7 +104,9 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
> > >  {
> > >  	struct drm_i915_gem_object *obj = dma_buf->priv;
> > >  	struct drm_device *dev = obj->base.dev;
> > > -	int ret;
> > > +	struct scatterlist *sg;
> > > +	struct page **pages;
> > > +	int ret, i;
> > >  
> > >  	ret = i915_mutex_lock_interruptible(dev);
> > >  	if (ret)
> > > @@ -92,22 +118,33 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
> > >  	}
> > >  
> > >  	ret = i915_gem_object_get_pages(obj);
> > > -	if (ret) {
> > > -		mutex_unlock(&dev->struct_mutex);
> > > -		return ERR_PTR(ret);
> > > -	}
> > > +	if (ret)
> > > +		goto error;
> > >  
> > > -	obj->dma_buf_vmapping = vmap(obj->pages, obj->base.size / PAGE_SIZE, 0, PAGE_KERNEL);
> > > -	if (!obj->dma_buf_vmapping) {
> > > -		DRM_ERROR("failed to vmap object\n");
> > > -		goto out_unlock;
> > > -	}
> > > +	ret = -ENOMEM;
> > > +
> > > +	pages = drm_malloc_ab(obj->pages->nents, sizeof(struct page *));
> > > +	if (pages == NULL)
> > > +		goto error;
> > > +
> > > +	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i)
> > > +		pages[i] = sg_page(sg);
> > > +
> > > +	obj->dma_buf_vmapping = vmap(pages, obj->pages->nents, 0, PAGE_KERNEL);
> > > +	drm_free_large(pages);
> > > +
> > > +	if (!obj->dma_buf_vmapping)
> > > +		goto error;
> > >  
> > >  	obj->vmapping_count = 1;
> > >  	i915_gem_object_pin_pages(obj);
> > >  out_unlock:
> > >  	mutex_unlock(&dev->struct_mutex);
> > >  	return obj->dma_buf_vmapping;
> > > +
> > > +error:
> > > +	mutex_unlock(&dev->struct_mutex);
> > > +	return ERR_PTR(ret);
> > >  }
> > >  
> > >  static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr)
> > 
> > The return on vmap failing looks incorrect to me here. Also, I think
> > leaving the DRM_ERROR would have been nice.
> 
> Since we already return the ERR_PTR(-ENOMEM) we are not breaking any
> semantics by reporting the oom for vmap as well. And yes it would be
> nice if vmap gave a specific error as well. So other than the change to
> an explicit errno, I'm not sure what mistake you are point out.
> 
> In this case the DRM_ERROR has an obvious errno returned to userspace,
> much more informative.
> > 
> > > @@ -270,26 +233,10 @@ void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
> > >  		BUG();
> > >  	}
> > >  
> > > -	if (obj->sg_table) {
> > > -		i915_ppgtt_insert_sg_entries(ppgtt,
> > > -					     obj->sg_table->sgl,
> > > -					     obj->sg_table->nents,
> > > -					     obj->gtt_space->start >> PAGE_SHIFT,
> > > -					     pte_flags);
> > > -	} else if (dev_priv->mm.gtt->needs_dmar) {
> > > -		BUG_ON(!obj->sg_list);
> > > -
> > > -		i915_ppgtt_insert_sg_entries(ppgtt,
> > > -					     obj->sg_list,
> > > -					     obj->num_sg,
> > > -					     obj->gtt_space->start >> PAGE_SHIFT,
> > > -					     pte_flags);
> > > -	} else
> > > -		i915_ppgtt_insert_pages(ppgtt,
> > > -					obj->gtt_space->start >> PAGE_SHIFT,
> > > -					obj->base.size >> PAGE_SHIFT,
> > > -					obj->pages,
> > > -					pte_flags);
> > > +	i915_ppgtt_insert_sg_entries(ppgtt,
> > > +				     obj->sg_table ?: obj->pages,
> > > +				     obj->gtt_space->start >> PAGE_SHIFT,
> > > +				     pte_flags);
> > >  }
> > 
> > I got lost here. Is it, if there is a prime sg_table use that, otherwise
> > just use the object's sgt? If so, I think has_dma_mapping is more
> > readable.
> > Also, I wonder if ?: pissed off the clang people?
> 
> Right, this is just a step along the path to enlightment. 2 out of the 3
> paths now use obj->pages with the dmabuf being the only exception to
> still create an obj->sg_table scatterlist. '?:' is widely used by the
> kernel, if clang doesn't yet support it, that's their problem. But rest
> assured it is removed in a couple of patches after migrating dmabuf over
> to the page ops.
> -Chris
> 
> -- 
> Chris Wilson, Intel Open Source Technology Centre
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 07/24] drm: Introduce drm_mm_create_block()
  2012-09-04 20:02 ` [PATCH 07/24] drm: Introduce drm_mm_create_block() Chris Wilson
@ 2012-09-12 13:43   ` Daniel Vetter
  0 siblings, 0 replies; 55+ messages in thread
From: Daniel Vetter @ 2012-09-12 13:43 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Dave Airlie, intel-gfx

On Tue, Sep 04, 2012 at 09:02:59PM +0100, Chris Wilson wrote:
> To be used later by i915 to preallocate exact blocks of space from the
> range manager.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Dave Airlie <airlied@redhat.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> ---
>  drivers/gpu/drm/drm_mm.c |   49 ++++++++++++++++++++++++++++++++++++++++++++++
>  include/drm/drm_mm.h     |    4 ++++
>  2 files changed, 53 insertions(+)
> 
> diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
> index 9bb82f7..5db8c20 100644
> --- a/drivers/gpu/drm/drm_mm.c
> +++ b/drivers/gpu/drm/drm_mm.c
> @@ -161,6 +161,55 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
>  	}
>  }
>  
> +struct drm_mm_node *drm_mm_create_block(struct drm_mm *mm,
> +					unsigned long start,
> +					unsigned long size,
> +					bool atomic)
> +{
> +	struct drm_mm_node *hole, *node;
> +	unsigned long end = start + size;
> +
> +	list_for_each_entry(hole, &mm->hole_stack, hole_stack) {
> +		unsigned long hole_start;
> +		unsigned long hole_end;
> +
> +		BUG_ON(!hole->hole_follows);
> +		hole_start = drm_mm_hole_node_start(hole);
> +		hole_end = drm_mm_hole_node_end(hole);
> +
> +		if (hole_start > start || hole_end < end)
> +			continue;
> +
> +		node = drm_mm_kmalloc(mm, atomic);
> +		if (unlikely(node == NULL))
> +			return NULL;
> +
> +		node->start = start;
> +		node->size = size;
> +		node->mm = mm;
> +		node->allocated = 1;
> +
> +		INIT_LIST_HEAD(&node->hole_stack);
> +		list_add(&node->node_list, &hole->node_list);
> +
> +		if (start == hole_start) {
> +			hole->hole_follows = 0;
> +			list_del_init(&hole->hole_stack);
> +		}
> +
> +		node->hole_follows = 0;
> +		if (end != hole_end) {
> +			list_add(&node->hole_stack, &mm->hole_stack);
> +			node->hole_follows = 1;
> +		}
> +
> +		return node;
> +	}
> +
> +	return NULL;
> +}
> +EXPORT_SYMBOL(drm_mm_create_block);
> +
>  struct drm_mm_node *drm_mm_get_block_generic(struct drm_mm_node *hole_node,
>  					     unsigned long size,
>  					     unsigned alignment,
> diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
> index 06d7f79..4020f96 100644
> --- a/include/drm/drm_mm.h
> +++ b/include/drm/drm_mm.h
> @@ -102,6 +102,10 @@ static inline bool drm_mm_initialized(struct drm_mm *mm)
>  /*
>   * Basic range manager support (drm_mm.c)
>   */
> +extern struct drm_mm_node *drm_mm_create_block(struct drm_mm *mm,
> +					       unsigned long start,
> +					       unsigned long size,
> +					       bool atomic);
>  extern struct drm_mm_node *drm_mm_get_block_generic(struct drm_mm_node *node,
>  						    unsigned long size,
>  						    unsigned alignment,
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 11/24] drm: Introduce an iterator over holes in the drm_mm range manager
  2012-09-04 20:03 ` [PATCH 11/24] drm: Introduce an iterator over holes in the drm_mm range manager Chris Wilson
@ 2012-09-12 13:54   ` Daniel Vetter
  0 siblings, 0 replies; 55+ messages in thread
From: Daniel Vetter @ 2012-09-12 13:54 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Dave Airlie, intel-gfx

On Tue, Sep 04, 2012 at 09:03:03PM +0100, Chris Wilson wrote:
> This will be used i915 in forthcoming patches in order to measure the
> largest contiguous chunk of memory available for enabling chipset
> features.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Dave Airlie <airlied@redhat.com>

One nitpick below.

> @@ -99,6 +112,19 @@ static inline bool drm_mm_initialized(struct drm_mm *mm)
>  	     entry != NULL; entry = next, \
>  		next = entry ? list_entry(entry->node_list.next, \
>  			struct drm_mm_node, node_list) : NULL) \
> +
> +/* Note that we need to unroll list_for_each_entry in order to inline
> + * setting hole_start and hole_end on each iteration and keep the
> + * macro sane.
> + */
> +#define drm_mm_for_each_hole(entry, mm, hole_start, hole_end) \
> +	for (entry = list_entry((mm)->hole_stack.next, typeof(struct drm_mm_node), hole_stack); \
> +	     &entry->hole_stack != &(mm)->hole_stack ? \
> +	     hole_start = drm_mm_hole_node_start(entry), \
> +	     hole_end = drm_mm_hole_node_end(entry) : \
> +	     0; \
> +	     entry = list_entry(entry->hole_stack.next, typeof(struct drm_mm_node), hole_stack))

Minor bikeshed for the macro:
- typeof(struct drm_mm_node) is a bit redundant
- I'd add a , 1 to the conditional check to not implicitly rely on
  drm_mm_hole_node_end != 0 for the correctness of this macro.

With that this is:
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

To follow due process can you please resubmit these two drm_mm patches to
dri-devel, so that I can properly bugger Dave for his maintainer-ack?

Thanks, Daniel

> +
>  /*
>   * Basic range manager support (drm_mm.c)
>   */
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 06/24] drm/i915: Convert the dmabuf object to use the new i915_gem_object_ops
  2012-09-04 20:02 ` [PATCH 06/24] drm/i915: Convert the dmabuf object to use the new i915_gem_object_ops Chris Wilson
@ 2012-09-14 18:02   ` Ben Widawsky
  2012-09-14 18:24     ` Chris Wilson
  2012-09-14 21:43   ` Daniel Vetter
  1 sibling, 1 reply; 55+ messages in thread
From: Ben Widawsky @ 2012-09-14 18:02 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue,  4 Sep 2012 21:02:58 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> By providing a callback for when we need to bind the pages, and then
> release them again later, we can shorten the amount of time we hold the
> foreign pages mapped and pinned, and importantly the dmabuf objects then
> behave as any other normal object with respect to the shrinker and
> memory management.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
with nitpicks below:
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
> ---
>  drivers/gpu/drm/i915/i915_drv.h        |    1 -
>  drivers/gpu/drm/i915/i915_gem.c        |   10 ++++----
>  drivers/gpu/drm/i915/i915_gem_dmabuf.c |   44 ++++++++++++++++++++++----------
>  drivers/gpu/drm/i915/i915_gem_gtt.c    |    4 +--
>  4 files changed, 37 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 1a714fa..a86f50d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -998,7 +998,6 @@ struct drm_i915_gem_object {
>  	int pages_pin_count;
>  
>  	/* prime dma-buf support */
> -	struct sg_table *sg_table;
>  	void *dma_buf_vmapping;
>  	int vmapping_count;
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 06589a9..58075e3 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1692,7 +1692,7 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
>  {
>  	const struct drm_i915_gem_object_ops *ops = obj->ops;
>  
> -	if (obj->sg_table || obj->pages == NULL)
> +	if (obj->pages == NULL)
>  		return 0;
>  
>  	BUG_ON(obj->gtt_space);
> @@ -1838,7 +1838,7 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
>  	const struct drm_i915_gem_object_ops *ops = obj->ops;
>  	int ret;
>  
> -	if (obj->sg_table || obj->pages)
> +	if (obj->pages)
>  		return 0;
>  
>  	BUG_ON(obj->pages_pin_count);
> @@ -3731,9 +3731,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  
>  	trace_i915_gem_object_destroy(obj);
>  
> -	if (gem_obj->import_attach)
> -		drm_prime_gem_destroy(gem_obj, obj->sg_table);
> -
>  	if (obj->phys_obj)
>  		i915_gem_detach_phys_object(dev, obj);
>  
> @@ -3755,6 +3752,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  
>  	BUG_ON(obj->pages);
>  
> +	if (obj->base.import_attach)
> +		drm_prime_gem_destroy(&obj->base, NULL);
> +
>  	drm_gem_object_release(&obj->base);
>  	i915_gem_info_remove_obj(dev_priv, obj->base.size);
>  

Was the order in which destroy happens moved intentionally?

> diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> index 4bb1b94..ca3497e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> @@ -82,7 +82,8 @@ out:
>  }
>  
>  static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment,
> -			    struct sg_table *sg, enum dma_data_direction dir)
> +				   struct sg_table *sg,
> +				   enum dma_data_direction dir)
>  {
>  	dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, dir);
>  	sg_free_table(sg);

I thought we frown upon unnecessary whitespace fixes in patches which
have behavioral changes?

> @@ -228,11 +229,35 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
>  	return dma_buf_export(obj, &i915_dmabuf_ops, obj->base.size, 0600);
>  }
>  
> +static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
> +{
> +	struct sg_table *sg;
> +
> +	sg = dma_buf_map_attachment(obj->base.import_attach, DMA_BIDIRECTIONAL);
> +	if (IS_ERR(sg))
> +		return PTR_ERR(sg);
> +
> +	obj->pages = sg;
> +	obj->has_dma_mapping = true;
> +	return 0;
> +}
> +
> +static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj)
> +{
> +	dma_buf_unmap_attachment(obj->base.import_attach,
> +				 obj->pages, DMA_BIDIRECTIONAL);
> +	obj->has_dma_mapping = false;
> +}
> +
> +static const struct drm_i915_gem_object_ops i915_gem_object_dmabuf_ops = {
> +	.get_pages = i915_gem_object_get_pages_dmabuf,
> +	.put_pages = i915_gem_object_put_pages_dmabuf,
> +};
> +
>  struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
>  					     struct dma_buf *dma_buf)
>  {
>  	struct dma_buf_attachment *attach;
> -	struct sg_table *sg;
>  	struct drm_i915_gem_object *obj;
>  	int ret;
>  
> @@ -251,34 +276,25 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
>  	if (IS_ERR(attach))
>  		return ERR_CAST(attach);
>  
> -	sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
> -	if (IS_ERR(sg)) {
> -		ret = PTR_ERR(sg);
> -		goto fail_detach;
> -	}
>  
>  	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
>  	if (obj == NULL) {
>  		ret = -ENOMEM;
> -		goto fail_unmap;
> +		goto fail_detach;
>  	}
>  
>  	ret = drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
>  	if (ret) {
>  		kfree(obj);
> -		goto fail_unmap;
> +		goto fail_detach;
>  	}
>  
> -	obj->has_dma_mapping = true;
> -	obj->sg_table = sg;
> +	i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops);
>  	obj->base.import_attach = attach;
>  
>  	return &obj->base;
>  
> -fail_unmap:
> -	dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL);
>  fail_detach:
>  	dma_buf_detach(dma_buf, attach);
>  	return ERR_PTR(ret);
>  }
> -
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 6746109..c86dc59 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -234,7 +234,7 @@ void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
>  	}
>  
>  	i915_ppgtt_insert_sg_entries(ppgtt,
> -				     obj->sg_table ?: obj->pages,
> +				     obj->pages,
>  				     obj->gtt_space->start >> PAGE_SHIFT,
>  				     pte_flags);
>  }
> @@ -325,7 +325,7 @@ void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
>  	struct drm_device *dev = obj->base.dev;
>  	unsigned int agp_type = cache_level_to_agp_type(dev, cache_level);
>  
> -	intel_gtt_insert_sg_entries(obj->sg_table ?: obj->pages,
> +	intel_gtt_insert_sg_entries(obj->pages,
>  				    obj->gtt_space->start >> PAGE_SHIFT,
>  				    agp_type);
>  	obj->has_global_gtt_mapping = 1;



-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 06/24] drm/i915: Convert the dmabuf object to use the new i915_gem_object_ops
  2012-09-14 18:02   ` Ben Widawsky
@ 2012-09-14 18:24     ` Chris Wilson
  0 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2012-09-14 18:24 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: intel-gfx

On Fri, 14 Sep 2012 11:02:02 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> On Tue,  4 Sep 2012 21:02:58 +0100
> Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > @@ -3731,9 +3731,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
> >  
> >  	trace_i915_gem_object_destroy(obj);
> >  
> > -	if (gem_obj->import_attach)
> > -		drm_prime_gem_destroy(gem_obj, obj->sg_table);
> > -
> >  	if (obj->phys_obj)
> >  		i915_gem_detach_phys_object(dev, obj);
> >  
> > @@ -3755,6 +3752,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
> >  
> >  	BUG_ON(obj->pages);
> >  
> > +	if (obj->base.import_attach)
> > +		drm_prime_gem_destroy(&obj->base, NULL);
> > +
> >  	drm_gem_object_release(&obj->base);
> >  	i915_gem_info_remove_obj(dev_priv, obj->base.size);
> >  
> 
> Was the order in which destroy happens moved intentionally?

Yes. ;)

> > diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> > index 4bb1b94..ca3497e 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> > @@ -82,7 +82,8 @@ out:
> >  }
> >  
> >  static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment,
> > -			    struct sg_table *sg, enum dma_data_direction dir)
> > +				   struct sg_table *sg,
> > +				   enum dma_data_direction dir)
> >  {
> >  	dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, dir);
> >  	sg_free_table(sg);
> 
> I thought we frown upon unnecessary whitespace fixes in patches which
> have behavioral changes?

Call it a leftover from the time I spent moving much of the common code
to drm_prime.c
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 06/24] drm/i915: Convert the dmabuf object to use the new i915_gem_object_ops
  2012-09-04 20:02 ` [PATCH 06/24] drm/i915: Convert the dmabuf object to use the new i915_gem_object_ops Chris Wilson
  2012-09-14 18:02   ` Ben Widawsky
@ 2012-09-14 21:43   ` Daniel Vetter
  1 sibling, 0 replies; 55+ messages in thread
From: Daniel Vetter @ 2012-09-14 21:43 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue, Sep 04, 2012 at 09:02:58PM +0100, Chris Wilson wrote:
> By providing a callback for when we need to bind the pages, and then
> release them again later, we can shorten the amount of time we hold the
> foreign pages mapped and pinned, and importantly the dmabuf objects then
> behave as any other normal object with respect to the shrinker and
> memory management.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Queued for -next, thanks for the patch.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 01/24] drm/i915: Introduce drm_i915_gem_object_ops
  2012-09-04 20:02 ` [PATCH 01/24] drm/i915: Introduce drm_i915_gem_object_ops Chris Wilson
  2012-09-06 22:32   ` Ben Widawsky
@ 2012-10-11 18:28   ` Jesse Barnes
  1 sibling, 0 replies; 55+ messages in thread
From: Jesse Barnes @ 2012-10-11 18:28 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue,  4 Sep 2012 21:02:53 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> In order to specialise functions depending upon the type of object, we
> can attach vfuncs to each object via a new ->ops pointer.
> 
> For instance, this will be used in future patches to only bind pages from
> a dma-buf for the duration that the object is used by the GPU - and so
> prevent them from pinning those pages for the entire of the object.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.h        |   12 +++++-
>  drivers/gpu/drm/i915/i915_gem.c        |   71 +++++++++++++++++++++-----------
>  drivers/gpu/drm/i915/i915_gem_dmabuf.c |    4 +-
>  3 files changed, 60 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index f16ab5e..f180874 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -898,9 +898,16 @@ enum i915_cache_level {
>  	I915_CACHE_LLC_MLC, /* gen6+, in docs at least! */
>  };
>  
> +struct drm_i915_gem_object_ops {
> +	int (*get_pages)(struct drm_i915_gem_object *);
> +	void (*put_pages)(struct drm_i915_gem_object *);
> +};
> +
>  struct drm_i915_gem_object {
>  	struct drm_gem_object base;
>  
> +	const struct drm_i915_gem_object_ops *ops;
> +
>  	/** Current space allocated to this object in the GTT, if any. */
>  	struct drm_mm_node *gtt_space;
>  	struct list_head gtt_list;
> @@ -1305,7 +1312,8 @@ int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
>  			struct drm_file *file_priv);
>  void i915_gem_load(struct drm_device *dev);
>  int i915_gem_init_object(struct drm_gem_object *obj);
> -void i915_gem_object_init(struct drm_i915_gem_object *obj);
> +void i915_gem_object_init(struct drm_i915_gem_object *obj,
> +			 const struct drm_i915_gem_object_ops *ops);
>  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>  						  size_t size);
>  void i915_gem_free_object(struct drm_gem_object *obj);
> @@ -1318,7 +1326,7 @@ int __must_check i915_gem_object_unbind(struct drm_i915_gem_object *obj);
>  void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
>  void i915_gem_lastclose(struct drm_device *dev);
>  
> -int __must_check i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj);
> +int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
>  int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
>  int i915_gem_object_sync(struct drm_i915_gem_object *obj,
>  			 struct intel_ring_buffer *to);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 87a64e5..66fbd9f 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1650,18 +1650,12 @@ i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
>  	return obj->madv == I915_MADV_DONTNEED;
>  }
>  
> -static int
> +static void
>  i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
>  {
>  	int page_count = obj->base.size / PAGE_SIZE;
>  	int ret, i;
>  
> -	BUG_ON(obj->gtt_space);
> -
> -	if (obj->pages == NULL)
> -		return 0;
> -
> -	BUG_ON(obj->gtt_space);
>  	BUG_ON(obj->madv == __I915_MADV_PURGED);
>  
>  	ret = i915_gem_object_set_to_cpu_domain(obj, true);
> @@ -1693,9 +1687,21 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
>  
>  	drm_free_large(obj->pages);
>  	obj->pages = NULL;
> +}
>  
> -	list_del(&obj->gtt_list);
> +static int
> +i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
> +{
> +	const struct drm_i915_gem_object_ops *ops = obj->ops;
> +
> +	if (obj->sg_table || obj->pages == NULL)
> +		return 0;
> +
> +	BUG_ON(obj->gtt_space);
>  
> +	ops->put_pages(obj);
> +
> +	list_del(&obj->gtt_list);
>  	if (i915_gem_object_is_purgeable(obj))
>  		i915_gem_object_truncate(obj);
>  
> @@ -1712,7 +1718,7 @@ i915_gem_purge(struct drm_i915_private *dev_priv, long target)
>  				 &dev_priv->mm.unbound_list,
>  				 gtt_list) {
>  		if (i915_gem_object_is_purgeable(obj) &&
> -		    i915_gem_object_put_pages_gtt(obj) == 0) {
> +		    i915_gem_object_put_pages(obj) == 0) {
>  			count += obj->base.size >> PAGE_SHIFT;
>  			if (count >= target)
>  				return count;
> @@ -1724,7 +1730,7 @@ i915_gem_purge(struct drm_i915_private *dev_priv, long target)
>  				 mm_list) {
>  		if (i915_gem_object_is_purgeable(obj) &&
>  		    i915_gem_object_unbind(obj) == 0 &&
> -		    i915_gem_object_put_pages_gtt(obj) == 0) {
> +		    i915_gem_object_put_pages(obj) == 0) {
>  			count += obj->base.size >> PAGE_SHIFT;
>  			if (count >= target)
>  				return count;
> @@ -1742,10 +1748,10 @@ i915_gem_shrink_all(struct drm_i915_private *dev_priv)
>  	i915_gem_evict_everything(dev_priv->dev);
>  
>  	list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, gtt_list)
> -		i915_gem_object_put_pages_gtt(obj);
> +		i915_gem_object_put_pages(obj);
>  }
>  
> -int
> +static int
>  i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
>  {
>  	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> @@ -1754,9 +1760,6 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
>  	struct page *page;
>  	gfp_t gfp;
>  
> -	if (obj->pages || obj->sg_table)
> -		return 0;
> -
>  	/* Assert that the object is not currently in any GPU domain. As it
>  	 * wasn't in the GTT, there shouldn't be any way it could have been in
>  	 * a GPU cache
> @@ -1806,7 +1809,6 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
>  	if (i915_gem_object_needs_bit17_swizzle(obj))
>  		i915_gem_object_do_bit_17_swizzle(obj);
>  
> -	list_add_tail(&obj->gtt_list, &dev_priv->mm.unbound_list);
>  	return 0;
>  
>  err_pages:
> @@ -1818,6 +1820,24 @@ err_pages:
>  	return PTR_ERR(page);
>  }
>  
> +int
> +i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
> +{
> +	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> +	const struct drm_i915_gem_object_ops *ops = obj->ops;
> +	int ret;
> +
> +	if (obj->sg_table || obj->pages)
> +		return 0;
> +
> +	ret = ops->get_pages(obj);
> +	if (ret)
> +		return ret;
> +
> +	list_add_tail(&obj->gtt_list, &dev_priv->mm.unbound_list);
> +	return 0;
> +}
> +
>  void
>  i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
>  			       struct intel_ring_buffer *ring,
> @@ -2071,7 +2091,6 @@ void i915_gem_reset(struct drm_device *dev)
>  		obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
>  	}
>  
> -
>  	/* The fence registers are invalidated so clear them out */
>  	i915_gem_reset_fences(dev);
>  }
> @@ -2871,7 +2890,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
>  		return -E2BIG;
>  	}
>  
> -	ret = i915_gem_object_get_pages_gtt(obj);
> +	ret = i915_gem_object_get_pages(obj);
>  	if (ret)
>  		return ret;
>  
> @@ -3610,15 +3629,16 @@ unlock:
>  	return ret;
>  }
>  
> -void i915_gem_object_init(struct drm_i915_gem_object *obj)
> +void i915_gem_object_init(struct drm_i915_gem_object *obj,
> +			  const struct drm_i915_gem_object_ops *ops)
>  {
> -	obj->base.driver_private = NULL;
> -
>  	INIT_LIST_HEAD(&obj->mm_list);
>  	INIT_LIST_HEAD(&obj->gtt_list);
>  	INIT_LIST_HEAD(&obj->ring_list);
>  	INIT_LIST_HEAD(&obj->exec_list);
>  
> +	obj->ops = ops;
> +
>  	obj->fence_reg = I915_FENCE_REG_NONE;
>  	obj->madv = I915_MADV_WILLNEED;
>  	/* Avoid an unnecessary call to unbind on the first bind. */
> @@ -3627,6 +3647,11 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj)
>  	i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
>  }
>  
> +static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
> +	.get_pages = i915_gem_object_get_pages_gtt,
> +	.put_pages = i915_gem_object_put_pages_gtt,
> +};
> +
>  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>  						  size_t size)
>  {
> @@ -3653,7 +3678,7 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>  	mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
>  	mapping_set_gfp_mask(mapping, mask);
>  
> -	i915_gem_object_init(obj);
> +	i915_gem_object_init(obj, &i915_gem_object_ops);
>  
>  	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
>  	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
> @@ -3711,7 +3736,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  		dev_priv->mm.interruptible = was_interruptible;
>  	}
>  
> -	i915_gem_object_put_pages_gtt(obj);
> +	i915_gem_object_put_pages(obj);
>  	i915_gem_object_free_mmap_offset(obj);
>  
>  	drm_gem_object_release(&obj->base);
> diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> index 43c9530..e4f1141 100644
> --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> @@ -41,7 +41,7 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme
>  	if (ret)
>  		return ERR_PTR(ret);
>  
> -	ret = i915_gem_object_get_pages_gtt(obj);
> +	ret = i915_gem_object_get_pages(obj);
>  	if (ret) {
>  		sg = ERR_PTR(ret);
>  		goto out;
> @@ -89,7 +89,7 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
>  		goto out_unlock;
>  	}
>  
> -	ret = i915_gem_object_get_pages_gtt(obj);
> +	ret = i915_gem_object_get_pages(obj);
>  	if (ret) {
>  		mutex_unlock(&dev->struct_mutex);
>  		return ERR_PTR(ret);

Ben's comments are good ones, some kdoc would be nice.  Anyway:

Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 02/24] drm/i915: Pin backing pages whilst exporting through a dmabuf vmap
  2012-09-04 20:02 ` [PATCH 02/24] drm/i915: Pin backing pages whilst exporting through a dmabuf vmap Chris Wilson
  2012-09-06 22:55   ` Ben Widawsky
@ 2012-10-11 18:30   ` Jesse Barnes
  1 sibling, 0 replies; 55+ messages in thread
From: Jesse Barnes @ 2012-10-11 18:30 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue,  4 Sep 2012 21:02:54 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> We need to refcount our pages in order to prevent reaping them at
> inopportune times, such as when they currently vmapped or exported to
> another driver. However, we also wish to keep the lazy deallocation of
> our pages so we need to take a pin/unpinned approach rather than a
> simple refcount.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---

Why do we need the pages pinned if the object is vmapped?  Shouldn't we
only pin if a vmapped object is currently being used by the GPU or
mapped through the GTT?  Or is that what you meant?

Assuming that's correct:
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 03/24] drm/i915: Pin backing pages for pwrite
  2012-09-04 20:02 ` [PATCH 03/24] drm/i915: Pin backing pages for pwrite Chris Wilson
  2012-09-07  0:07   ` Ben Widawsky
@ 2012-10-11 18:31   ` Jesse Barnes
  1 sibling, 0 replies; 55+ messages in thread
From: Jesse Barnes @ 2012-10-11 18:31 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue,  4 Sep 2012 21:02:55 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> By using the recently introduced pinning of pages, we can safely drop
> the mutex in the knowledge that the pages are not going to disappear
> beneath us, and so we can simplify the code for iterating over the pages.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem.c |   37 +++++++++++++------------------------
>  1 file changed, 13 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index aa088ef..8a4eac0 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -690,7 +690,7 @@ shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
>  				       page_length);
>  	kunmap_atomic(vaddr);
>  
> -	return ret;
> +	return ret ? -EFAULT : 0;
>  }
>  
>  /* Only difference to the fast-path function is that this can handle bit17
> @@ -724,7 +724,7 @@ shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
>  					     page_do_bit17_swizzling);
>  	kunmap(page);
>  
> -	return ret;
> +	return ret ? -EFAULT : 0;
>  }
>  
>  static int
> @@ -733,7 +733,6 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
>  		      struct drm_i915_gem_pwrite *args,
>  		      struct drm_file *file)
>  {
> -	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
>  	ssize_t remain;
>  	loff_t offset;
>  	char __user *user_data;
> @@ -742,7 +741,6 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
>  	int hit_slowpath = 0;
>  	int needs_clflush_after = 0;
>  	int needs_clflush_before = 0;
> -	int release_page;
>  
>  	user_data = (char __user *) (uintptr_t) args->data_ptr;
>  	remain = args->size;
> @@ -768,6 +766,12 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
>  	    && obj->cache_level == I915_CACHE_NONE)
>  		needs_clflush_before = 1;
>  
> +	ret = i915_gem_object_get_pages(obj);
> +	if (ret)
> +		return ret;
> +
> +	i915_gem_object_pin_pages(obj);
> +
>  	offset = args->offset;
>  	obj->dirty = 1;
>  
> @@ -793,18 +797,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
>  			((shmem_page_offset | page_length)
>  				& (boot_cpu_data.x86_clflush_size - 1));
>  
> -		if (obj->pages) {
> -			page = obj->pages[offset >> PAGE_SHIFT];
> -			release_page = 0;
> -		} else {
> -			page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
> -			if (IS_ERR(page)) {
> -				ret = PTR_ERR(page);
> -				goto out;
> -			}
> -			release_page = 1;
> -		}
> -
> +		page = obj->pages[offset >> PAGE_SHIFT];
>  		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
>  			(page_to_phys(page) & (1 << 17)) != 0;
>  
> @@ -816,26 +809,20 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
>  			goto next_page;
>  
>  		hit_slowpath = 1;
> -		page_cache_get(page);
>  		mutex_unlock(&dev->struct_mutex);
> -
>  		ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
>  					user_data, page_do_bit17_swizzling,
>  					partial_cacheline_write,
>  					needs_clflush_after);
>  
>  		mutex_lock(&dev->struct_mutex);
> -		page_cache_release(page);
> +
>  next_page:
>  		set_page_dirty(page);
>  		mark_page_accessed(page);
> -		if (release_page)
> -			page_cache_release(page);
>  
> -		if (ret) {
> -			ret = -EFAULT;
> +		if (ret)
>  			goto out;
> -		}
>  
>  		remain -= page_length;
>  		user_data += page_length;
> @@ -843,6 +830,8 @@ next_page:
>  	}
>  
>  out:
> +	i915_gem_object_unpin_pages(obj);
> +
>  	if (hit_slowpath) {
>  		/* Fixup: Kill any reinstated backing storage pages */
>  		if (obj->madv == __I915_MADV_PURGED)

I'll leave the pread/pwrite reviewing to Daniel...

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 09/24] drm/i915: Fix location of stolen memory register for SandyBridge+
  2012-09-04 20:03 ` [PATCH 09/24] drm/i915: Fix location of stolen memory register for SandyBridge+ Chris Wilson
@ 2012-10-11 18:43   ` Jesse Barnes
  2012-10-11 19:06     ` Jesse Barnes
  0 siblings, 1 reply; 55+ messages in thread
From: Jesse Barnes @ 2012-10-11 18:43 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue,  4 Sep 2012 21:03:01 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> A few of the earlier registers where enlarged and so the Base Data of
> Stolem Memory Register (BDSM) was pushed to 0xb0.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_stolen.c |    9 ++++++++-
>  1 file changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index a01ff74..a528e4a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -63,7 +63,11 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev)
>  	 * its value of TOLUD.
>  	 */
>  	base = 0;
> -	if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
> +	if (INTEL_INFO(dev)->gen >= 6) {
> +		/* Read Base Data of Stolen Memory Register (BDSM) directly */
> +		pci_read_config_dword(pdev, 0xB0, &base);
> +		base &= ~4095; /* lower bits used for locking register */
> +	} else if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
>  		/* Read Graphics Base of Stolen Memory directly */
>  		pci_read_config_dword(pdev, 0xA4, &base);
>  #if 0
> @@ -172,6 +176,9 @@ int i915_gem_init_stolen(struct drm_device *dev)
>  	if (dev_priv->mm.stolen_base == 0)
>  		return 0;
>  
> +	DRM_DEBUG_KMS("found %d bytes of stolen memory at %08lx\n",
> +		      dev_priv->mm.gtt->stolen_size, dev_priv->mm.stolen_base);
> +
>  	/* Basic memrange allocator for stolen space */
>  	drm_mm_init(&dev_priv->mm.stolen, 0, prealloc_size);
>  

A bit scary we've had this around so long?  And now I can't find docs
for this...  Assuming it tests out ok though:

Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 10/24] drm/i915: Avoid clearing preallocated regions from the GTT
  2012-09-04 20:03 ` [PATCH 10/24] drm/i915: Avoid clearing preallocated regions from the GTT Chris Wilson
@ 2012-10-11 18:45   ` Jesse Barnes
  0 siblings, 0 replies; 55+ messages in thread
From: Jesse Barnes @ 2012-10-11 18:45 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue,  4 Sep 2012 21:03:02 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.h     |    2 ++
>  drivers/gpu/drm/i915/i915_gem_gtt.c |   35 ++++++++++++++++++++++++++++++++---
>  2 files changed, 34 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index f614c26..533361e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -899,6 +899,8 @@ enum i915_cache_level {
>  	I915_CACHE_LLC_MLC, /* gen6+, in docs at least! */
>  };
>  
> +#define I915_GTT_RESERVED ((struct drm_mm_node *)0x1)
> +
>  struct drm_i915_gem_object_ops {
>  	int (*get_pages)(struct drm_i915_gem_object *);
>  	void (*put_pages)(struct drm_i915_gem_object *);
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index c86dc59..d1b4cc8 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -378,18 +378,47 @@ void i915_gem_init_global_gtt(struct drm_device *dev,
>  			      unsigned long end)
>  {
>  	drm_i915_private_t *dev_priv = dev->dev_private;
> +	struct drm_mm_node *entry;
> +	struct drm_i915_gem_object *obj;
>  
> -	/* Substract the guard page ... */
> +	/* Subtract the guard page ... */
>  	drm_mm_init(&dev_priv->mm.gtt_space, start, end - start - PAGE_SIZE);
>  	if (!HAS_LLC(dev))
>  		dev_priv->mm.gtt_space.color_adjust = i915_gtt_color_adjust;
>  
> +	/* Mark any preallocated objects as occupied */
> +	list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) {
> +		DRM_DEBUG_KMS("reserving preallocated space: %x + %zx\n",
> +			      obj->gtt_offset, obj->base.size);
> +
> +		BUG_ON(obj->gtt_space != I915_GTT_RESERVED);
> +		obj->gtt_space = drm_mm_create_block(&dev_priv->mm.gtt_space,
> +						     obj->gtt_offset,
> +						     obj->base.size,
> +						     false);
> +		obj->has_global_gtt_mapping = 1;
> +	}
> +
>  	dev_priv->mm.gtt_start = start;
>  	dev_priv->mm.gtt_mappable_end = mappable_end;
>  	dev_priv->mm.gtt_end = end;
>  	dev_priv->mm.gtt_total = end - start;
>  	dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start;
>  
> -	/* ... but ensure that we clear the entire range. */
> -	intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE);
> +	/* Clear any non-preallocated blocks */
> +	list_for_each_entry(entry, &dev_priv->mm.gtt_space.hole_stack, hole_stack) {
> +		unsigned long hole_start = entry->start + entry->size;
> +		unsigned long hole_end = list_entry(entry->node_list.next,
> +						    struct drm_mm_node,
> +						    node_list)->start;
> +
> +		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
> +			      hole_start, hole_end);
> +
> +		intel_gtt_clear_range(hole_start / PAGE_SIZE,
> +				      (hole_end-hole_start) / PAGE_SIZE);
> +	}
> +
> +	/* And finally clear the reserved guard page */
> +	intel_gtt_clear_range(end / PAGE_SIZE - 1, 1);
>  }

Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 12/24] drm/i915: Delay allocation of stolen space for FBC
  2012-09-04 20:03 ` [PATCH 12/24] drm/i915: Delay allocation of stolen space for FBC Chris Wilson
@ 2012-10-11 18:49   ` Jesse Barnes
  2012-10-11 18:56     ` Chris Wilson
  0 siblings, 1 reply; 55+ messages in thread
From: Jesse Barnes @ 2012-10-11 18:49 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue,  4 Sep 2012 21:03:04 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> As we may wish to wrap regions preallocated by the BIOS, we need to do
> that before carving out contiguous chunks of stolen space for FBC.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.h        |    1 +
>  drivers/gpu/drm/i915/i915_gem_stolen.c |  110 ++++++++++++++++----------------
>  drivers/gpu/drm/i915/intel_display.c   |    3 +
>  3 files changed, 59 insertions(+), 55 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 533361e..31d3a9f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1491,6 +1491,7 @@ int i915_gem_evict_everything(struct drm_device *dev);
>  
>  /* i915_gem_stolen.c */
>  int i915_gem_init_stolen(struct drm_device *dev);
> +int i915_gem_stolen_setup_compression(struct drm_device *dev);
>  void i915_gem_cleanup_stolen(struct drm_device *dev);
>  
>  /* i915_gem_tiling.c */
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index a528e4a..17119d7 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -86,21 +86,13 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev)
>  	return base;
>  }
>  
> -static void i915_warn_stolen(struct drm_device *dev)
> -{
> -	DRM_INFO("not enough stolen space for compressed buffer, disabling\n");
> -	DRM_INFO("hint: you may be able to increase stolen memory size in the BIOS to avoid this\n");
> -}
> -
> -static void i915_setup_compression(struct drm_device *dev, int size)
> +static int i915_setup_compression(struct drm_device *dev, int size)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct drm_mm_node *compressed_fb, *uninitialized_var(compressed_llb);
> -	unsigned long cfb_base;
> -	unsigned long ll_base = 0;
>  
> -	/* Just in case the BIOS is doing something questionable. */
> -	intel_disable_fbc(dev);
> +	DRM_DEBUG_KMS("reserving %d bytes of contiguous stolen space for FBC\n",
> +		      size);
>  
>  	compressed_fb = drm_mm_search_free(&dev_priv->mm.stolen, size, 4096, 0);
>  	if (compressed_fb)
> @@ -108,11 +100,11 @@ static void i915_setup_compression(struct drm_device *dev, int size)
>  	if (!compressed_fb)
>  		goto err;
>  
> -	cfb_base = dev_priv->mm.stolen_base + compressed_fb->start;
> -	if (!cfb_base)
> -		goto err_fb;
> -
> -	if (!(IS_GM45(dev) || HAS_PCH_SPLIT(dev))) {
> +	if (HAS_PCH_SPLIT(dev))
> +		I915_WRITE(ILK_DPFC_CB_BASE, compressed_fb->start);
> +	else if (IS_GM45(dev)) {
> +		I915_WRITE(DPFC_CB_BASE, compressed_fb->start);
> +	} else {
>  		compressed_llb = drm_mm_search_free(&dev_priv->mm.stolen,
>  						    4096, 4096, 0);
>  		if (compressed_llb)
> @@ -121,56 +113,78 @@ static void i915_setup_compression(struct drm_device *dev, int size)
>  		if (!compressed_llb)
>  			goto err_fb;
>  
> -		ll_base = dev_priv->mm.stolen_base + compressed_llb->start;
> -		if (!ll_base)
> -			goto err_llb;
> -	}
> +		dev_priv->compressed_llb = compressed_llb;
>  
> -	dev_priv->cfb_size = size;
> +		I915_WRITE(FBC_CFB_BASE,
> +			   dev_priv->mm.stolen_base + compressed_fb->start);
> +		I915_WRITE(FBC_LL_BASE,
> +			   dev_priv->mm.stolen_base + compressed_llb->start);
> +	}
>  
>  	dev_priv->compressed_fb = compressed_fb;
> -	if (HAS_PCH_SPLIT(dev))
> -		I915_WRITE(ILK_DPFC_CB_BASE, compressed_fb->start);
> -	else if (IS_GM45(dev)) {
> -		I915_WRITE(DPFC_CB_BASE, compressed_fb->start);
> -	} else {
> -		I915_WRITE(FBC_CFB_BASE, cfb_base);
> -		I915_WRITE(FBC_LL_BASE, ll_base);
> -		dev_priv->compressed_llb = compressed_llb;
> -	}
> +	dev_priv->cfb_size = size;
>  
> -	DRM_DEBUG_KMS("FBC base 0x%08lx, ll base 0x%08lx, size %dM\n",
> -		      (long)cfb_base, (long)ll_base, size >> 20);
> -	return;
> +	return size;
>  
> -err_llb:
> -	drm_mm_put_block(compressed_llb);
>  err_fb:
>  	drm_mm_put_block(compressed_fb);
>  err:
>  	dev_priv->no_fbc_reason = FBC_STOLEN_TOO_SMALL;
> -	i915_warn_stolen(dev);
> +	DRM_INFO("not enough stolen space for compressed buffer (need %d bytes), disabling\n", size);
> +	DRM_INFO("hint: you may be able to increase stolen memory size in the BIOS to avoid this\n");
> +	return 0;
> +}
> +
> +int i915_gem_stolen_setup_compression(struct drm_device *dev)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_mm_node *node;
> +	unsigned long hole_start, hole_end, size;
> +
> +	if (dev_priv->mm.stolen_base == 0)
> +		return 0;
> +
> +	if (dev_priv->cfb_size)
> +		return dev_priv->cfb_size;
> +
> +	/* Try to set up FBC with a reasonable compressed buffer size */
> +	size = 0;
> +	drm_mm_for_each_hole(node, &dev_priv->mm.stolen, hole_start, hole_end) {
> +		unsigned long hole_size = hole_end - hole_start;
> +		if (hole_size > size)
> +			size = hole_size;
> +	}
> +
> +	/* Try to get a 32M buffer... */
> +	if (size > (36*1024*1024))
> +		size = 32*1024*1024;
> +	else /* fall back to 3/4 of the stolen space */
> +		size = size * 3 / 4;
> +
> +	return i915_setup_compression(dev, size);
>  }
>  
>  static void i915_cleanup_compression(struct drm_device *dev)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  
> -	drm_mm_put_block(dev_priv->compressed_fb);
> +	if (dev_priv->compressed_fb)
> +		drm_mm_put_block(dev_priv->compressed_fb);
> +
>  	if (dev_priv->compressed_llb)
>  		drm_mm_put_block(dev_priv->compressed_llb);
> +
> +	dev_priv->cfb_size = 0;
>  }
>  
>  void i915_gem_cleanup_stolen(struct drm_device *dev)
>  {
> -	if (I915_HAS_FBC(dev) && i915_powersave)
> -		i915_cleanup_compression(dev);
> +	i915_cleanup_compression(dev);
>  }
>  
>  int i915_gem_init_stolen(struct drm_device *dev)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> -	unsigned long prealloc_size = dev_priv->mm.gtt->stolen_size;
>  
>  	dev_priv->mm.stolen_base = i915_stolen_to_physical(dev);
>  	if (dev_priv->mm.stolen_base == 0)
> @@ -180,21 +194,7 @@ int i915_gem_init_stolen(struct drm_device *dev)
>  		      dev_priv->mm.gtt->stolen_size, dev_priv->mm.stolen_base);
>  
>  	/* Basic memrange allocator for stolen space */
> -	drm_mm_init(&dev_priv->mm.stolen, 0, prealloc_size);
> -
> -	/* Try to set up FBC with a reasonable compressed buffer size */
> -	if (I915_HAS_FBC(dev) && i915_powersave) {
> -		int cfb_size;
> -
> -		/* Leave 1M for line length buffer & misc. */
> -
> -		/* Try to get a 32M buffer... */
> -		if (prealloc_size > (36*1024*1024))
> -			cfb_size = 32*1024*1024;
> -		else /* fall back to 7/8 of the stolen space */
> -			cfb_size = prealloc_size * 7 / 8;
> -		i915_setup_compression(dev, cfb_size);
> -	}
> +	drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->mm.gtt->stolen_size);
>  
>  	return 0;
>  }
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 778cbb8..221d035 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -7231,6 +7231,9 @@ void intel_modeset_init(struct drm_device *dev)
>  	/* Just disable it once at startup */
>  	i915_disable_vga(dev);
>  	intel_setup_outputs(dev);
> +
> +	/* Just in case the BIOS is doing something questionable. */
> +	intel_disable_fbc(dev);
>  }
>  
>  void intel_modeset_gem_init(struct drm_device *dev)

Assuming you actually call this function in a later patch:

Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 15/24] drm/i915: Differentiate between prime and stolen objects
  2012-09-04 20:03 ` [PATCH 15/24] drm/i915: Differentiate between prime and stolen objects Chris Wilson
@ 2012-10-11 18:50   ` Jesse Barnes
  0 siblings, 0 replies; 55+ messages in thread
From: Jesse Barnes @ 2012-10-11 18:50 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue,  4 Sep 2012 21:03:07 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> Stolen objects also share the property that they have no backing shmemfs
> filp, but they can be used with pwrite/pread/gtt-mapping.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.h |    5 +++++
>  drivers/gpu/drm/i915/i915_gem.c |    4 ++--
>  2 files changed, 7 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 637babb..cc3cc4f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1049,6 +1049,11 @@ struct drm_i915_gem_object {
>  	atomic_t pending_flip;
>  };
>  
> +inline static bool i915_gem_object_is_prime(struct drm_i915_gem_object *obj)
> +{
> +	return obj->base.import_attach != NULL;
> +}
> +
>  #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
>  
>  /**
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 58075e3..f1cef1f 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -553,7 +553,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
>  	/* prime objects have no backing filp to GEM pread/pwrite
>  	 * pages from.
>  	 */
> -	if (!obj->base.filp) {
> +	if (i915_gem_object_is_prime(obj)) {
>  		ret = -EINVAL;
>  		goto out;
>  	}
> @@ -902,7 +902,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>  	/* prime objects have no backing filp to GEM pread/pwrite
>  	 * pages from.
>  	 */
> -	if (!obj->base.filp) {
> +	if (i915_gem_object_is_prime(obj)) {
>  		ret = -EINVAL;
>  		goto out;
>  	}

Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 16/24] drm/i915: Support readback of stolen objects upon error
  2012-09-04 20:03 ` [PATCH 16/24] drm/i915: Support readback of stolen objects upon error Chris Wilson
@ 2012-10-11 18:51   ` Jesse Barnes
  0 siblings, 0 replies; 55+ messages in thread
From: Jesse Barnes @ 2012-10-11 18:51 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue,  4 Sep 2012 21:03:08 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_irq.c |    8 ++++++++
>  1 file changed, 8 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index dd49046..fe3f60c 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -923,6 +923,14 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
>  						     reloc_offset);
>  			memcpy_fromio(d, s, PAGE_SIZE);
>  			io_mapping_unmap_atomic(s);
> +		} else if (src->stolen) {
> +			unsigned long offset;
> +
> +			offset = dev_priv->mm.stolen_base;
> +			offset += src->stolen->start;
> +			offset += i << PAGE_SHIFT;
> +
> +			memcpy_fromio(d, (void *)offset, PAGE_SIZE);
>  		} else {
>  			struct page *page;
>  			void *s;

Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 19/24] drm/i915: Introduce i915_gem_object_create_stolen()
  2012-09-04 20:03 ` [PATCH 19/24] drm/i915: Introduce i915_gem_object_create_stolen() Chris Wilson
@ 2012-10-11 18:53   ` Jesse Barnes
  0 siblings, 0 replies; 55+ messages in thread
From: Jesse Barnes @ 2012-10-11 18:53 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue,  4 Sep 2012 21:03:11 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> Allow for the creation of GEM objects backed by stolen memory. As these
> are not backed by ordinary pages, we create a fake dma mapping and store
> the address in the scatterlist rather than obj->pages.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.h        |    3 +
>  drivers/gpu/drm/i915/i915_gem.c        |    1 +
>  drivers/gpu/drm/i915/i915_gem_stolen.c |  122 ++++++++++++++++++++++++++++++++
>  3 files changed, 126 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index cc3cc4f..f19a4f2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1500,6 +1500,9 @@ int i915_gem_evict_everything(struct drm_device *dev);
>  int i915_gem_init_stolen(struct drm_device *dev);
>  int i915_gem_stolen_setup_compression(struct drm_device *dev);
>  void i915_gem_cleanup_stolen(struct drm_device *dev);
> +struct drm_i915_gem_object *
> +i915_gem_object_create_stolen(struct drm_device *dev, u32 size);
> +void i915_gem_object_release_stolen(struct drm_i915_gem_object *obj);
>  
>  /* i915_gem_tiling.c */
>  void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 070ddf2..2c04ea4 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3847,6 +3847,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  	obj->pages_pin_count = 0;
>  	i915_gem_object_put_pages(obj);
>  	i915_gem_object_free_mmap_offset(obj);
> +	i915_gem_object_release_stolen(obj);
>  
>  	BUG_ON(obj->pages);
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index 17119d7..d91f6eb 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -198,3 +198,125 @@ int i915_gem_init_stolen(struct drm_device *dev)
>  
>  	return 0;
>  }
> +
> +static struct sg_table *
> +i915_pages_create_for_stolen(struct drm_device *dev,
> +			     u32 offset, u32 size)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct sg_table *st;
> +	struct scatterlist *sg;
> +
> +	/* We hide that we have no struct page backing our stolen object
> +	 * by wrapping the contiguous physical allocation with a fake
> +	 * dma mapping in a single scatterlist.
> +	 */
> +
> +	st = kmalloc(sizeof(*st), GFP_KERNEL);
> +	if (st == NULL)
> +		return NULL;
> +
> +	if (!sg_alloc_table(st, 1, GFP_KERNEL)) {
> +		kfree(st);
> +		return NULL;
> +	}
> +
> +	sg = st->sgl;
> +	sg->offset = offset;
> +	sg->length = size;
> +
> +	sg_dma_address(sg) = dev_priv->mm.stolen_base + offset;
> +	sg_dma_len(sg) = size;
> +
> +	return st;
> +}
> +
> +static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
> +{
> +	BUG();
> +	return -EINVAL;
> +}
> +
> +static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj)
> +{
> +	/* Should only be called during free */
> +	sg_free_table(obj->pages);
> +	kfree(obj->pages);
> +}
> +
> +static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = {
> +	.get_pages = i915_gem_object_get_pages_stolen,
> +	.put_pages = i915_gem_object_put_pages_stolen,
> +};
> +
> +struct drm_i915_gem_object *
> +_i915_gem_object_create_stolen(struct drm_device *dev,
> +			       struct drm_mm_node *stolen)
> +{
> +	struct drm_i915_gem_object *obj;
> +
> +	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> +	if (obj == NULL)
> +		return NULL;
> +
> +	if (drm_gem_private_object_init(dev, &obj->base, stolen->size))
> +		goto cleanup;
> +
> +	i915_gem_object_init(obj, &i915_gem_object_stolen_ops);
> +
> +	obj->pages = i915_pages_create_for_stolen(dev,
> +						  stolen->start, stolen->size);
> +	if (obj->pages == NULL)
> +		goto cleanup;
> +
> +	obj->has_dma_mapping = true;
> +	obj->pages_pin_count = 1;
> +	obj->stolen = stolen;
> +
> +	obj->base.write_domain = I915_GEM_DOMAIN_GTT;
> +	obj->base.read_domains = I915_GEM_DOMAIN_GTT;
> +	obj->cache_level = I915_CACHE_NONE;
> +
> +	return obj;
> +
> +cleanup:
> +	kfree(obj);
> +	return NULL;
> +}
> +
> +struct drm_i915_gem_object *
> +i915_gem_object_create_stolen(struct drm_device *dev, u32 size)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_i915_gem_object *obj;
> +	struct drm_mm_node *stolen;
> +
> +	if (dev_priv->mm.stolen_base == 0)
> +		return 0;
> +
> +	DRM_DEBUG_KMS("creating stolen object: size=%x\n", size);
> +	if (size == 0)
> +		return NULL;
> +
> +	stolen = drm_mm_search_free(&dev_priv->mm.stolen, size, 4096, 0);
> +	if (stolen)
> +		stolen = drm_mm_get_block(stolen, size, 4096);
> +	if (stolen == NULL)
> +		return NULL;
> +
> +	obj = _i915_gem_object_create_stolen(dev, stolen);
> +	if (obj)
> +		return obj;
> +
> +	drm_mm_put_block(stolen);
> +	return NULL;
> +}
> +
> +void
> +i915_gem_object_release_stolen(struct drm_i915_gem_object *obj)
> +{
> +	if (obj->stolen) {
> +		drm_mm_put_block(obj->stolen);
> +		obj->stolen = NULL;
> +	}
> +}

Can _i915_gem_object_create_stolen be static?

Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 20/24] drm/i915: Allocate fbcon from stolen memory
  2012-09-04 20:03 ` [PATCH 20/24] drm/i915: Allocate fbcon from stolen memory Chris Wilson
@ 2012-10-11 18:54   ` Jesse Barnes
  0 siblings, 0 replies; 55+ messages in thread
From: Jesse Barnes @ 2012-10-11 18:54 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue,  4 Sep 2012 21:03:12 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/intel_fb.c |    4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
> index 97f6735..9de9cd9 100644
> --- a/drivers/gpu/drm/i915/intel_fb.c
> +++ b/drivers/gpu/drm/i915/intel_fb.c
> @@ -84,7 +84,9 @@ static int intelfb_create(struct intel_fbdev *ifbdev,
>  
>  	size = mode_cmd.pitches[0] * mode_cmd.height;
>  	size = ALIGN(size, PAGE_SIZE);
> -	obj = i915_gem_alloc_object(dev, size);
> +	obj = i915_gem_object_create_stolen(dev, size);
> +	if (obj == NULL)
> +		obj = i915_gem_alloc_object(dev, size);
>  	if (!obj) {
>  		DRM_ERROR("failed to allocate framebuffer\n");
>  		ret = -ENOMEM;

Just for fun?  Sounds good, may as well put it to use somehow.

Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 21/24] drm/i915: Allocate ringbuffers from stolen memory
  2012-09-04 20:03 ` [PATCH 21/24] drm/i915: Allocate ringbuffers " Chris Wilson
@ 2012-10-11 18:54   ` Jesse Barnes
  0 siblings, 0 replies; 55+ messages in thread
From: Jesse Barnes @ 2012-10-11 18:54 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue,  4 Sep 2012 21:03:13 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/intel_ringbuffer.c |    6 +++++-
>  1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 984a0c5..577a96a 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1096,7 +1096,11 @@ static int intel_init_ring_buffer(struct drm_device *dev,
>  			return ret;
>  	}
>  
> -	obj = i915_gem_alloc_object(dev, ring->size);
> +	obj = NULL;
> +	if (!HAS_LLC(dev))
> +		obj = i915_gem_object_create_stolen(dev, ring->size);
> +	if (obj == NULL)
> +		obj = i915_gem_alloc_object(dev, ring->size);
>  	if (obj == NULL) {
>  		DRM_ERROR("Failed to allocate ringbuffer\n");
>  		ret = -ENOMEM;

Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 22/24] drm/i915: Allocate overlay registers from stolen memory
  2012-09-04 20:03 ` [PATCH 22/24] drm/i915: Allocate overlay registers " Chris Wilson
@ 2012-10-11 18:55   ` Jesse Barnes
  0 siblings, 0 replies; 55+ messages in thread
From: Jesse Barnes @ 2012-10-11 18:55 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue,  4 Sep 2012 21:03:14 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/intel_overlay.c |    6 ++++--
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
> index afd0f30..2fa20a4 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -1368,8 +1368,10 @@ void intel_setup_overlay(struct drm_device *dev)
>  
>  	overlay->dev = dev;
>  
> -	reg_bo = i915_gem_alloc_object(dev, PAGE_SIZE);
> -	if (!reg_bo)
> +	reg_bo = i915_gem_object_create_stolen(dev, PAGE_SIZE);
> +	if (reg_bo == NULL)
> +		reg_bo = i915_gem_alloc_object(dev, PAGE_SIZE);
> +	if (reg_bo == NULL)
>  		goto out_free;
>  	overlay->reg_bo = reg_bo;
>  

Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 23/24] drm/i915: Use a slab for object allocation
  2012-09-04 20:03 ` [PATCH 23/24] drm/i915: Use a slab for object allocation Chris Wilson
@ 2012-10-11 18:55   ` Jesse Barnes
  0 siblings, 0 replies; 55+ messages in thread
From: Jesse Barnes @ 2012-10-11 18:55 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue,  4 Sep 2012 21:03:15 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> The primary purpose of this was to debug some use-after-free memory
> corruption that was causing an OOPS inside drm/i915. As it turned out
> the corruption was being caused elsewhere and i915.ko as a major user of
> many objects was being hit hardest.
> 
> Indeed as we do frequent the generic kmalloc caches, dedicating one to
> ourselves (or at least naming one for us depending upon the core) aids
> debugging our own slab usage.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_dma.c        |    3 +++
>  drivers/gpu/drm/i915/i915_drv.h        |    4 ++++
>  drivers/gpu/drm/i915/i915_gem.c        |   28 +++++++++++++++++++++++-----
>  drivers/gpu/drm/i915/i915_gem_dmabuf.c |    5 ++---
>  drivers/gpu/drm/i915/i915_gem_stolen.c |    4 ++--
>  5 files changed, 34 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 2c09900..f2e3439 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1760,6 +1760,9 @@ int i915_driver_unload(struct drm_device *dev)
>  
>  	destroy_workqueue(dev_priv->wq);
>  
> +	if (dev_priv->slab)
> +		kmem_cache_destroy(dev_priv->slab);
> +
>  	pci_dev_put(dev_priv->bridge_dev);
>  	kfree(dev->dev_private);
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index f19a4f2..ec8c0fc 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -391,6 +391,7 @@ struct intel_gmbus {
>  
>  typedef struct drm_i915_private {
>  	struct drm_device *dev;
> +	struct kmem_cache *slab;
>  
>  	const struct intel_device_info *info;
>  
> @@ -1316,12 +1317,15 @@ int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>  int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
>  			struct drm_file *file_priv);
>  void i915_gem_load(struct drm_device *dev);
> +void *i915_gem_object_alloc(struct drm_device *dev);
> +void i915_gem_object_free(struct drm_i915_gem_object *obj);
>  int i915_gem_init_object(struct drm_gem_object *obj);
>  void i915_gem_object_init(struct drm_i915_gem_object *obj,
>  			 const struct drm_i915_gem_object_ops *ops);
>  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>  						  size_t size);
>  void i915_gem_free_object(struct drm_gem_object *obj);
> +
>  int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
>  				     uint32_t alignment,
>  				     bool map_and_fenceable,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 2c04ea4..a32d3eb 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -193,6 +193,18 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>  	return 0;
>  }
>  
> +void *i915_gem_object_alloc(struct drm_device *dev)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	return kmem_cache_alloc(dev_priv->slab, GFP_KERNEL | __GFP_ZERO);
> +}
> +
> +void i915_gem_object_free(struct drm_i915_gem_object *obj)
> +{
> +	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> +	kmem_cache_free(dev_priv->slab, obj);
> +}
> +
>  static int
>  i915_gem_create(struct drm_file *file,
>  		struct drm_device *dev,
> @@ -216,7 +228,7 @@ i915_gem_create(struct drm_file *file,
>  	if (ret) {
>  		drm_gem_object_release(&obj->base);
>  		i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
> -		kfree(obj);
> +		i915_gem_object_free(obj);
>  		return ret;
>  	}
>  
> @@ -3770,12 +3782,12 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>  	struct address_space *mapping;
>  	u32 mask;
>  
> -	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> +	obj = i915_gem_object_alloc(dev);
>  	if (obj == NULL)
>  		return NULL;
>  
>  	if (drm_gem_object_init(dev, &obj->base, size) != 0) {
> -		kfree(obj);
> +		i915_gem_object_free(obj);
>  		return NULL;
>  	}
>  
> @@ -3858,7 +3870,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  	i915_gem_info_remove_obj(dev_priv, obj->base.size);
>  
>  	kfree(obj->bit_17);
> -	kfree(obj);
> +	i915_gem_object_free(obj);
>  }
>  
>  int
> @@ -4236,8 +4248,14 @@ init_ring_lists(struct intel_ring_buffer *ring)
>  void
>  i915_gem_load(struct drm_device *dev)
>  {
> -	int i;
>  	drm_i915_private_t *dev_priv = dev->dev_private;
> +	int i;
> +
> +	dev_priv->slab =
> +		kmem_cache_create("i915_gem_object",
> +				  sizeof(struct drm_i915_gem_object), 0,
> +				  SLAB_HWCACHE_ALIGN,
> +				  NULL);
>  
>  	INIT_LIST_HEAD(&dev_priv->mm.active_list);
>  	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
> diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> index ca3497e..f307e31 100644
> --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> @@ -276,8 +276,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
>  	if (IS_ERR(attach))
>  		return ERR_CAST(attach);
>  
> -
> -	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> +	obj = i915_gem_object_alloc(dev);
>  	if (obj == NULL) {
>  		ret = -ENOMEM;
>  		goto fail_detach;
> @@ -285,7 +284,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
>  
>  	ret = drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
>  	if (ret) {
> -		kfree(obj);
> +		i915_gem_object_free(obj);
>  		goto fail_detach;
>  	}
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index d91f6eb..fc9228a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -255,7 +255,7 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
>  {
>  	struct drm_i915_gem_object *obj;
>  
> -	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> +	obj = i915_gem_object_alloc(dev);
>  	if (obj == NULL)
>  		return NULL;
>  
> @@ -280,7 +280,7 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
>  	return obj;
>  
>  cleanup:
> -	kfree(obj);
> +	i915_gem_object_free(obj);
>  	return NULL;
>  }
>  

Nice.

Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 12/24] drm/i915: Delay allocation of stolen space for FBC
  2012-10-11 18:49   ` Jesse Barnes
@ 2012-10-11 18:56     ` Chris Wilson
  0 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2012-10-11 18:56 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: intel-gfx

On Thu, 11 Oct 2012 11:49:36 -0700, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> On Tue,  4 Sep 2012 21:03:04 +0100
> Chris Wilson <chris@chris-wilson.co.uk> wrote:
> 
> > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> > index 778cbb8..221d035 100644
> > --- a/drivers/gpu/drm/i915/intel_display.c
> > +++ b/drivers/gpu/drm/i915/intel_display.c
> > @@ -7231,6 +7231,9 @@ void intel_modeset_init(struct drm_device *dev)
> >  	/* Just disable it once at startup */
> >  	i915_disable_vga(dev);
> >  	intel_setup_outputs(dev);
> > +
> > +	/* Just in case the BIOS is doing something questionable. */
> > +	intel_disable_fbc(dev);
> >  }
> >  
> >  void intel_modeset_gem_init(struct drm_device *dev)
> 
> Assuming you actually call this function in a later patch:

Yikes, you are right. This was one patch but Daniel complained that the
double tweaking was non-obvious, so I tried to split the reordering of
intel_enable_fbc() and the delayed allocation of cfb. I failed to keep
the code working in between these two patches. :(
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH 09/24] drm/i915: Fix location of stolen memory register for SandyBridge+
  2012-10-11 18:43   ` Jesse Barnes
@ 2012-10-11 19:06     ` Jesse Barnes
  0 siblings, 0 replies; 55+ messages in thread
From: Jesse Barnes @ 2012-10-11 19:06 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: intel-gfx

On Thu, 11 Oct 2012 11:43:53 -0700
Jesse Barnes <jbarnes@virtuousgeek.org> wrote:

> On Tue,  4 Sep 2012 21:03:01 +0100
> Chris Wilson <chris@chris-wilson.co.uk> wrote:
> 
> > A few of the earlier registers where enlarged and so the Base Data of
> > Stolem Memory Register (BDSM) was pushed to 0xb0.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_gem_stolen.c |    9 ++++++++-
> >  1 file changed, 8 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> > index a01ff74..a528e4a 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> > @@ -63,7 +63,11 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev)
> >  	 * its value of TOLUD.
> >  	 */
> >  	base = 0;
> > -	if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
> > +	if (INTEL_INFO(dev)->gen >= 6) {
> > +		/* Read Base Data of Stolen Memory Register (BDSM) directly */
> > +		pci_read_config_dword(pdev, 0xB0, &base);
> > +		base &= ~4095; /* lower bits used for locking register */
> > +	} else if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
> >  		/* Read Graphics Base of Stolen Memory directly */
> >  		pci_read_config_dword(pdev, 0xA4, &base);
> >  #if 0
> > @@ -172,6 +176,9 @@ int i915_gem_init_stolen(struct drm_device *dev)
> >  	if (dev_priv->mm.stolen_base == 0)
> >  		return 0;
> >  
> > +	DRM_DEBUG_KMS("found %d bytes of stolen memory at %08lx\n",
> > +		      dev_priv->mm.gtt->stolen_size, dev_priv->mm.stolen_base);
> > +
> >  	/* Basic memrange allocator for stolen space */
> >  	drm_mm_init(&dev_priv->mm.stolen, 0, prealloc_size);
> >  
> 
> A bit scary we've had this around so long?  And now I can't find docs
> for this...  Assuming it tests out ok though:
> 
> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

Ok finally found this in the device 0 docs.  0xb0 is correct.  However,
I think we could use the MCHBAR mirror of this too at 0x1080c0 or the
device 2 config space register 0x5c instead if we wanted.

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 55+ messages in thread

* [PATCH 16/24] drm/i915: Support readback of stolen objects upon error
  2012-08-30 15:30 Next iteration of stolen support Chris Wilson
@ 2012-08-30 15:31 ` Chris Wilson
  0 siblings, 0 replies; 55+ messages in thread
From: Chris Wilson @ 2012-08-30 15:31 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_irq.c |    8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index f437398..7324850 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -894,6 +894,14 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
 						     reloc_offset);
 			memcpy_fromio(d, s, PAGE_SIZE);
 			io_mapping_unmap_atomic(s);
+		} else if (src->stolen) {
+			unsigned long offset;
+
+			offset = dev_priv->mm.stolen_base;
+			offset += src->stolen->start;
+			offset += i << PAGE_SHIFT;
+
+			memcpy_fromio(d, (void *)offset, PAGE_SIZE);
 		} else {
 			struct page *page;
 			void *s;
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 55+ messages in thread

end of thread, other threads:[~2012-10-11 19:05 UTC | newest]

Thread overview: 55+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-09-04 20:02 Stolen memory, again Chris Wilson
2012-09-04 20:02 ` [PATCH 01/24] drm/i915: Introduce drm_i915_gem_object_ops Chris Wilson
2012-09-06 22:32   ` Ben Widawsky
2012-10-11 18:28   ` Jesse Barnes
2012-09-04 20:02 ` [PATCH 02/24] drm/i915: Pin backing pages whilst exporting through a dmabuf vmap Chris Wilson
2012-09-06 22:55   ` Ben Widawsky
2012-10-11 18:30   ` Jesse Barnes
2012-09-04 20:02 ` [PATCH 03/24] drm/i915: Pin backing pages for pwrite Chris Wilson
2012-09-07  0:07   ` Ben Widawsky
2012-09-12 13:13     ` Daniel Vetter
2012-09-12 13:20       ` Daniel Vetter
2012-10-11 18:31   ` Jesse Barnes
2012-09-04 20:02 ` [PATCH 04/24] drm/i915: Pin backing pages for pread Chris Wilson
2012-09-07  0:10   ` Ben Widawsky
2012-09-04 20:02 ` [PATCH 05/24] drm/i915: Replace the array of pages with a scatterlist Chris Wilson
2012-09-07  1:49   ` Ben Widawsky
2012-09-10 16:34     ` Chris Wilson
2012-09-12 13:33       ` Daniel Vetter
2012-09-04 20:02 ` [PATCH 06/24] drm/i915: Convert the dmabuf object to use the new i915_gem_object_ops Chris Wilson
2012-09-14 18:02   ` Ben Widawsky
2012-09-14 18:24     ` Chris Wilson
2012-09-14 21:43   ` Daniel Vetter
2012-09-04 20:02 ` [PATCH 07/24] drm: Introduce drm_mm_create_block() Chris Wilson
2012-09-12 13:43   ` Daniel Vetter
2012-09-04 20:03 ` [PATCH 08/24] drm/i915: Fix detection of stolen base for gen2 Chris Wilson
2012-09-04 20:03 ` [PATCH 09/24] drm/i915: Fix location of stolen memory register for SandyBridge+ Chris Wilson
2012-10-11 18:43   ` Jesse Barnes
2012-10-11 19:06     ` Jesse Barnes
2012-09-04 20:03 ` [PATCH 10/24] drm/i915: Avoid clearing preallocated regions from the GTT Chris Wilson
2012-10-11 18:45   ` Jesse Barnes
2012-09-04 20:03 ` [PATCH 11/24] drm: Introduce an iterator over holes in the drm_mm range manager Chris Wilson
2012-09-12 13:54   ` Daniel Vetter
2012-09-04 20:03 ` [PATCH 12/24] drm/i915: Delay allocation of stolen space for FBC Chris Wilson
2012-10-11 18:49   ` Jesse Barnes
2012-10-11 18:56     ` Chris Wilson
2012-09-04 20:03 ` [PATCH 13/24] drm/i915: Defer allocation of stolen memory for FBC until first use Chris Wilson
2012-09-04 20:03 ` [PATCH 14/24] drm/i915: Allow objects to be created with no backing pages, but stolen space Chris Wilson
2012-09-04 20:03 ` [PATCH 15/24] drm/i915: Differentiate between prime and stolen objects Chris Wilson
2012-10-11 18:50   ` Jesse Barnes
2012-09-04 20:03 ` [PATCH 16/24] drm/i915: Support readback of stolen objects upon error Chris Wilson
2012-10-11 18:51   ` Jesse Barnes
2012-09-04 20:03 ` [PATCH 17/24] drm/i915: Handle stolen objects in pwrite Chris Wilson
2012-09-04 20:03 ` [PATCH 18/24] drm/i915: Handle stolen objects for pread Chris Wilson
2012-09-04 20:03 ` [PATCH 19/24] drm/i915: Introduce i915_gem_object_create_stolen() Chris Wilson
2012-10-11 18:53   ` Jesse Barnes
2012-09-04 20:03 ` [PATCH 20/24] drm/i915: Allocate fbcon from stolen memory Chris Wilson
2012-10-11 18:54   ` Jesse Barnes
2012-09-04 20:03 ` [PATCH 21/24] drm/i915: Allocate ringbuffers " Chris Wilson
2012-10-11 18:54   ` Jesse Barnes
2012-09-04 20:03 ` [PATCH 22/24] drm/i915: Allocate overlay registers " Chris Wilson
2012-10-11 18:55   ` Jesse Barnes
2012-09-04 20:03 ` [PATCH 23/24] drm/i915: Use a slab for object allocation Chris Wilson
2012-10-11 18:55   ` Jesse Barnes
2012-09-04 20:03 ` [PATCH 24/24] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl Chris Wilson
  -- strict thread matches above, loose matches on Subject: below --
2012-08-30 15:30 Next iteration of stolen support Chris Wilson
2012-08-30 15:31 ` [PATCH 16/24] drm/i915: Support readback of stolen objects upon error Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.